# Imports

In [58]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Embedding, LSTM
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

import tensorflow as tf

# Importing matplotlib to plot images.
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# Importing SK-learn to calculate precision and recall
import sklearn
import sklearn.metrics
from sklearn.model_selection import train_test_split

# Used for graph export
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import graph_io
from keras import backend as K

import pickle as pkl

from pathlib import Path
import os.path
import sys

# Participant Infos

In [2]:
# later create here list with `list(range(1, 21))`
PID = 42
# later have set like: smartphones = {"N6", "N5X", "S4", "S3Mini"}
smartphone = "N5X"
# if with 0 nothing is found try one higher until maybe 10
extension = 0

sensors = {
    "acc",
    "gyro",
    "ori",
    "grav",
    "mag",
    "rot"
}

tasks = {
    "points",
    "fitts"
}

file_names = sensors.union(tasks)

# Read Files

In [3]:
raw_data = dict()

for file in file_names:
    file_path = str(Path.home()) + "/data/fapra_imu-" +  str(PID) + "-" + file + "-" + smartphone + "-" + str(extension) + ".csv"
    if not os.path.isfile(file_path):
        print(file_path + "not found")
        continue
    raw_data[file] = pd.read_csv(file_path, ";")

# split by time in seperate lists

In [4]:
time_filtered_data = dict()
for name in file_names:
    tmp = []
    # remove [1:] if both lenght are equal (first press is removed)
    for k, end in enumerate(raw_data["points"].time[1:]):
        start = int(raw_data["fitts"].time[k])
        # create mask for time interval
        mask = (raw_data[name]["time"] > start) & (raw_data[name]["time"] <= end)
        # only return items matching to mask
        tmp.append(raw_data[name].loc[mask])
    time_filtered_data[name] = tmp
# list to dataframe
time_filtered_data["points"] = pd.concat(time_filtered_data["points"])
time_filtered_data["fitts"] = pd.concat(time_filtered_data["fitts"])

# set screen resolution

In [5]:
if smartphone == "N5X":
    pixels = {"width": 1080, "height": 1920}
elif smartphone == "S3Mini":
    pixels = {"width": 480, "height": 800}
elif smartphone == "S4":
    pixels = {"width": 1080, "height": 1920}
elif smartphone == "N6":
    pixels = {"width": 1440, "height": 2560}

# scale screen

In [6]:
time_filtered_data["points"]["x-press"] = time_filtered_data["points"]["x-press"].div(pixels["width"])
time_filtered_data["points"]["x-circle"] = time_filtered_data["points"]["x-circle"].div(pixels["width"])
time_filtered_data["points"]["y-press"] = time_filtered_data["points"]["y-press"].div(pixels["height"])
time_filtered_data["points"]["y-circle"] = time_filtered_data["points"]["y-circle"].div(pixels["height"])

# filter unique timestamps

In [7]:
for k, item in enumerate(time_filtered_data["points"]):
    for sensor in sensors:
        time_filtered_data[sensor][k] = time_filtered_data[sensor][k].drop_duplicates(subset="time", keep="last")

# lenght of output (debug)

In [8]:
for k, point in time_filtered_data["points"].iterrows():
    for sensor in sensors:
        print(k, sensor)
        print(len(time_filtered_data[sensor][k - 1]))

1 grav
50
1 ori
224
1 rot
44
1 mag
35
1 acc
53
1 gyro
183
2 grav
45
2 ori
180
2 rot
41
2 mag
30
2 acc
45
2 gyro
147
3 grav
40
3 ori
175
3 rot
38
3 mag
29
3 acc
37
3 gyro
135
4 grav
34
4 ori
153
4 rot
37
4 mag
26
4 acc
39
4 gyro
123
5 grav
42
5 ori
176
5 rot
38
5 mag
31
5 acc
49
5 gyro
136
6 grav
136
6 ori
311
6 rot
136
6 mag
34
6 acc
136
6 gyro
278
7 grav
128
7 ori
293
7 rot
128
7 mag
32
7 acc
128
7 gyro
260
8 grav
204
8 ori
460
8 rot
204
8 mag
51
8 acc
204
8 gyro
409
9 grav
112
9 ori
254
9 rot
112
9 mag
28
9 acc
112
9 gyro
226
10 grav
116
10 ori
255
10 rot
116
10 mag
29
10 acc
116
10 gyro
226
11 grav
112
11 ori
251
11 rot
112
11 mag
28
11 acc
112
11 gyro
222
12 grav
104
12 ori
235
12 rot
104
12 mag
26
12 acc
104
12 gyro
209
13 grav
152
13 ori
339
13 rot
152
13 mag
38
13 acc
152
13 gyro
301
14 grav
148
14 ori
331
14 rot
148
14 mag
37
14 acc
148
14 gyro
295
15 grav
220
15 ori
484
15 rot
220
15 mag
54
15 acc
220
15 gyro
430
16 grav
104
16 ori
237
16 rot
104
16 mag
26
16 acc
104
16 gyro
2

168 gyro
219
169 grav
132
169 ori
299
169 rot
132
169 mag
33
169 acc
132
169 gyro
267
170 grav
112
170 ori
254
170 rot
112
170 mag
29
170 acc
112
170 gyro
225
171 grav
116
171 ori
263
171 rot
116
171 mag
29
171 acc
116
171 gyro
234
172 grav
296
172 ori
670
172 rot
297
172 mag
74
172 acc
297
172 gyro
597
173 grav
132
173 ori
302
173 rot
132
173 mag
33
173 acc
132
173 gyro
268
174 grav
112
174 ori
246
174 rot
112
174 mag
28
174 acc
112
174 gyro
218
175 grav
128
175 ori
291
175 rot
128
175 mag
32
175 acc
128
175 gyro
259
176 grav
92
176 ori
208
176 rot
92
176 mag
23
176 acc
92
176 gyro
185
177 grav
116
177 ori
255
177 rot
116
177 mag
29
177 acc
116
177 gyro
225
178 grav
116
178 ori
260
178 rot
116
178 mag
29
178 acc
116
178 gyro
229
179 grav
296
179 ori
652
179 rot
296
179 mag
72
179 acc
296
179 gyro
579
180 grav
224
180 ori
498
180 rot
224
180 mag
55
180 acc
224
180 gyro
443
181 grav
108
181 ori
238
181 rot
108
181 mag
27
181 acc
108
181 gyro
211
182 grav
112
182 ori
253
182 rot
112
182 

# Create Array

In [9]:
%%time
result_interval = []
for k, point in time_filtered_data["points"].iterrows():
    print(k)
    one_interval = []
    interval = dict()
    for sensor in sensors:
        interval[sensor] = time_filtered_data[sensor][k - 1]

    # keeps index of each sensor
    position = dict()
    # keeps value of last sensor event
    last_values = dict()
    for sensor in sensors:
        position[sensor] = 0
        last_values[sensor] = 0

    # find maximum first timestamp in all sensors
    current_time = -1
    for sensor in sensors:
        if interval[sensor]["time"].iloc[position[sensor]] > current_time:
            current_time = interval[sensor]["time"].iloc[position[sensor]]
        last_values[sensor] = interval[sensor][:].iloc[position[sensor]]
    
    # first result all values smaller then current_time
    while True:
        all_valid = True
        for sensor in sensors:
            if interval[sensor]["time"].iloc[position[sensor] + 1] <= current_time:
                position[sensor] += 1
                last_values[sensor] = interval[sensor][:].iloc[position[sensor]]
                all_valid = False
        if all_valid:
            break
    one_interval.append(last_values)
    result_interval.append(one_interval)
    
    # do until end of all sensor values
    while True:
        all_done = True
        # find minimum next timestamp in all sensor
        minimum = sys.maxsize
        for sensor in sensors:
            if position[sensor] + 1 >= len(interval[sensor]["time"]):
                continue
            if interval[sensor]["time"].iloc[position[sensor] + 1] < minimum:
                minimum = interval[sensor]["time"].iloc[position[sensor] + 1]
        current_time = minimum
        # now assign all sensor that have minimum timestamp
        for sensor in sensors:
            if position[sensor] + 1 >= len(interval[sensor]["time"]):
                continue
            if interval[sensor]["time"].iloc[position[sensor] + 1] <= current_time:
                position[sensor] += 1
                last_values[sensor] = interval[sensor][:].iloc[position[sensor]]
                all_done = False
        one_interval.append(last_values)
        if all_done:
            break
    result_interval.append(one_interval)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


In [10]:
for sensor in sensors:
    print(position[sensor] + 1)

156
348
156
39
156
310


In [None]:
print(len(result_interval))

574


# make numpy arrays (without dicts)

In [68]:
final_result = []
for interval in result_interval:
    one_interval = []
    for i in interval:
        event = []
        for sensor in i.values():
            # except timestamp
            for value in sensor[1:]:
                event.append(value)
        np_event = np.asarray(event)
        one_interval.append(np_event)
    # make list to array, transpose and make 2d matrix
    final_result.append(np.array(np.asarray(one_interval).transpose()))

In [69]:
print(final_result[0].shape)
print(len(final_result))

(18, 300)
574


# split data in test and train (TODO with multiple participants)

In [70]:
x_train, x_test = train_test_split(final_result, test_size=1/3)

In [71]:
print(len(x_train), len(x_test))

382 192


# save dump pickles

In [72]:
x_train_path = str(Path.home()) + "/data/x_train-" +  str(PID) + "-" + smartphone + "-" + str(extension) + ".p"
x_test_path = str(Path.home()) + "/data/x_test-" +  str(PID) + "-" + smartphone + "-" + str(extension) + ".p"

In [73]:
pkl.dump(x_train, open( x_train_path, "wb" ))
pkl.dump(x_test, open( x_test_path, "wb" ))