# Imports

In [1]:
import pandas as pd
import math

# Importing matplotlib to plot images.
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# for file-management
import pickle as pkl
from pathlib import Path
import os.path
import sys

Using TensorFlow backend.


# Participant Infos

In [4]:
participants = list(range(1, 21))
smartphones = {"N6", "N5X", "S4", "S3Mini"}
# TODO remove
participants = list(range(42, 43))
smartphones = {"N5X"}
extension = 0

sensors = {
    "acc",
    "gyro",
    "ori",
    "grav",
    "mag",
    "rot"
}

tasks = {
    "points",
    "fitts"
}

file_names = sensors.union(tasks)

In [36]:
%%time
for PID in participants:
    for smartphone in smartphones:
        print("processing:", str(PID), smartphone)

        # set screen resolution
        if smartphone == "N5X":
            pixels = {"width": 1080, "height": 1920}
        elif smartphone == "S3Mini":
            pixels = {"width": 480, "height": 800}
        elif smartphone == "S4":
            pixels = {"width": 1080, "height": 1920}
        elif smartphone == "N6":
            pixels = {"width": 1440, "height": 2560}

        # Read Files
        raw_data = dict()

        for file in file_names:
            file_path = str(Path.home()) + "/data/raw/fapra_imu-" +  str(PID) + "-" + file + "-" + smartphone + "-" + str(extension) + ".csv"
            if not os.path.isfile(file_path):
                print(file_path + "not found")
                continue
            raw_data[file] = pd.read_csv(file_path, ";")

        # split by time in seperate lists
        time_filtered_data = dict()
        for name in file_names:
            tmp = []
            # remove [1:] if both lenght are equal (first press is removed)
            for k, end in enumerate(raw_data["points"].time[1:]):
                start = int(raw_data["fitts"].time[k])
                # create mask for time interval
                mask = (raw_data[name]["time"] > start) & (raw_data[name]["time"] <= end)
                # only return items matching to mask
                tmp.append(raw_data[name].loc[mask])
            time_filtered_data[name] = tmp
        # list to dataframe
        time_filtered_data["points"] = pd.concat(time_filtered_data["points"])
        time_filtered_data["fitts"] = pd.concat(time_filtered_data["fitts"])

        # scale screen
        time_filtered_data["points"]["x-press"] = time_filtered_data["points"]["x-press"].div(pixels["width"])
        time_filtered_data["points"]["x-circle"] = time_filtered_data["points"]["x-circle"].div(pixels["width"])
        time_filtered_data["points"]["y-press"] = time_filtered_data["points"]["y-press"].div(pixels["height"])
        time_filtered_data["points"]["y-circle"] = time_filtered_data["points"]["y-circle"].div(pixels["height"])

        # filter unique timestamps
        for k, item in enumerate(time_filtered_data["points"]):
            for sensor in sensors:
                time_filtered_data[sensor][k] = time_filtered_data[sensor][k].drop_duplicates(subset="time", keep="last")

        # Create Array
        result_interval = []
        for k, point in time_filtered_data["points"].iterrows():
            print("create-interval", k)
            one_interval = []
            interval = dict()
            for sensor in sensors:
                interval[sensor] = time_filtered_data[sensor][k - 1]

            # keeps index of each sensor
            position = dict()
            # keeps value of last sensor event
            last_values = dict()
            for sensor in sensors:
                position[sensor] = 0
                last_values[sensor] = 0

            # find maximum first timestamp in all sensors
            current_time = -1
            for sensor in sensors:
                if interval[sensor]["time"].iloc[position[sensor]] > current_time:
                    current_time = interval[sensor]["time"].iloc[position[sensor]]
                last_values[sensor] = interval[sensor][:].iloc[position[sensor]]

            # first result all values smaller then current_time
            while True:
                all_valid = True
                for sensor in sensors:
                    if interval[sensor]["time"].iloc[position[sensor] + 1] <= current_time:
                        position[sensor] += 1
                        last_values[sensor] = interval[sensor][:].iloc[position[sensor]]
                        all_valid = False
                if all_valid:
                    break
            one_interval.append(last_values)

            # do until end of all sensor values
            while True:
                all_done = True
                # find minimum next timestamp in all sensor
                minimum = sys.maxsize
                for sensor in sensors:
                    if position[sensor] + 1 >= len(interval[sensor]["time"]):
                        continue
                    if interval[sensor]["time"].iloc[position[sensor] + 1] < minimum:
                        minimum = interval[sensor]["time"].iloc[position[sensor] + 1]
                current_time = minimum
                # now assign all sensor that have minimum timestamp
                for sensor in sensors:
                    if position[sensor] + 1 >= len(interval[sensor]["time"]):
                        continue
                    if interval[sensor]["time"].iloc[position[sensor] + 1] <= current_time:
                        position[sensor] += 1
                        last_values[sensor] = interval[sensor][:].iloc[position[sensor]]
                        all_done = False
                one_interval.append(last_values)
                if all_done:
                    break
            result_interval.append(one_interval)

        # make numpy arrays (without dicts)
        print("make numpy-array interval")
        final_intervals = []
        for k, interval in enumerate(result_interval):
            print("transpose-interval", k)
            current_interval = []
            for i in interval:
                event = []
                for sensor in i.values():
                    # except timestamp
                    for value in sensor[1:]:
                        event.append(value)
                current_interval.append(np.asarray(event))
            # make list to array, transpose and make 2d matrix
            final_intervals.append(np.array(np.asarray(current_interval).transpose()))
            
        print(len(final_intervals))

        print("make numpy-array points")
        final_points = time_filtered_data["points"][['x-press','x-circle','y-press','y-circle']].values
        final_result = [final_points, final_intervals]

        # save dump pickles
        data_path = str(Path.home()) + "/data/pickles/fapra_imu-processed-" +  str(PID) + "-" + smartphone + ".pkl"
        pkl.dump(final_result, open( data_path, "wb" ))

processing: 42 N5X
interval 1
interval 2
interval 3
interval 4
interval 5
interval 6
interval 7
interval 8
interval 9
interval 10
interval 11
interval 12
interval 13
interval 14
interval 15
interval 16
interval 17
interval 18
interval 19
interval 20
interval 21
interval 22
interval 23
interval 24
interval 25
interval 26
interval 27
interval 28
interval 29
interval 30
interval 31
interval 32
interval 33
interval 34
interval 35
interval 36
interval 37
interval 38
interval 39
interval 40
interval 41
interval 42
interval 43
interval 44
interval 45
interval 46
interval 47
interval 48
interval 49
interval 50
interval 51
interval 52
interval 53
interval 54
interval 55
interval 56
interval 57
interval 58
interval 59
interval 60
interval 61
interval 62
interval 63
interval 64
interval 65
interval 66
interval 67
interval 68
interval 69
interval 70
interval 71
interval 72
interval 73
interval 74
interval 75
interval 76
interval 77
interval 78
interval 79
interval 80
interval 81
interval 82
interv

In [70]:
x_train, x_test = train_test_split(final_result, test_size=1/3)

In [71]:
print(len(x_train), len(x_test))

382 192
