In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import pickle
import time

In [2]:
DATA_SET = 'upper_indep'
# load data
# data = np.load(f'preprocessed/IMWUT_OnHW-chars_dataset_2020-09-22/{DATA_SET}.pkl', allow_pickle=True)
# set global variables
LOG = True
SAMPLE_LENGTH = 64 # resampled to 64 in preprocessing.ipynb
N_CHANNELS = 13
CLASSES = 54 # in the case of 'both' lower and upper case.
if DATA_SET.__contains__('lower') or DATA_SET.__contains__('upper'):
    CLASSES = 26 # in the case of 'lower' or 'upper' case.


In [3]:
MAX_ACC_FRONT = 32768
MAX_GYR = 32768
MAX_ACC_BACK = 8192
MAX_MAG = 8192
MAX_FORCE = 4096

DESIRED_SAMPLE_LENGTH = SAMPLE_LENGTH

def log(o):
    if LOG:
        print(o)

################# util functions #############

def remove_empty(xs, ys):
    i = 0
    while i < len(xs):
        x = xs[i]
        if len(x) == 0:
            xs = np.delete(xs, i)
            ys = np.delete(ys, i)
            continue
        else:
            i += 1
    return xs, ys

def reshape(xs):
    for i in range(len(xs)):
        # log(xs[i].shape)
        xs[i] = np.transpose(xs[i])
    return xs
        # log(xs[i].shape)
    # new_xs = []
    # for i in range(len(xs)):
    #     channels = []
    #     try:
    #         xs[i][0]
    #     except:
    #         log(xs[i])
    #     for k in range(len(xs[i][0])):
    #       channels.append([])
    #     for j in range(len(xs[i])):
    #       for k in range(len(xs[i][j])):
    #         channels[k].append(xs[i][j][k])
    #     new_xs.append([])
    #     new_xs[i] = channels
    # return new_xs


def unreshape(xs):
    return reshape(xs)
    # old_xs = []
    # for i in range(len(xs)):
    #     sensors = []
    #     for j in range(len(xs[i][0])):
    #         sensors.append([])
    #     for k in range(len(xs[i])):
    #         for j in range(len(xs[i][k])):
    #             sensors[j].append(xs[i][k][j])
    #     old_xs.append(sensors)
    # return old_xs

def resample(xs, desired_sample_length):
  for i in range(len(xs)):
    x = np.arange(0, len(xs[i][0]), len(xs[i][0])/desired_sample_length)[:desired_sample_length]
    resampled = []
    for k in range(len(xs[i])):
      xp = np.arange(0, len(xs[i][k]))
      resampled.append(np.interp(x, xp, xs[i][k]))
      # xs[i][k] = np.interp(x, xp, xs[i][k])
      if len(resampled[k]) != desired_sample_length:
          raise ValueError()
    resampled = np.array(resampled)
    xs[i] = resampled
  return xs

def reshape_resample_unreshape(xss, desired_sample_length):
    reshaped_xss = reshape(xss)
    resampled_reshaped_xss = resample(reshaped_xss, desired_sample_length)
    unreshaped_resampled_reshaped_xss = unreshape(resampled_reshaped_xss)
    return unreshaped_resampled_reshaped_xss

def normalize(xs):
    for i in range(len(xs)):
        xs[i] /= np.array([MAX_ACC_FRONT,
                  MAX_ACC_FRONT,
                  MAX_ACC_FRONT,
                  MAX_ACC_BACK,
                  MAX_ACC_BACK,
                  MAX_ACC_BACK,
                  MAX_GYR/2,
                  MAX_GYR/2,
                  MAX_GYR/2,
                  MAX_MAG/2,
                  MAX_MAG/2,
                  MAX_MAG/2,
                  MAX_FORCE/2]).reshape(1,13)
        for j in range(len(xs[i])):
            xs[i][j][12] -= 1
    return xs

def remove_hover(xs):
    new_xs = []
    for x in xs:
        i = 0
        while i < len(x) and x[i][12] < 0.2:
            i += 1
        j = len(x) - 1
        while j > 0 and x[j][12] < 0.2:
            j -= 1
        new_xs.append(x[i:j+1])
    return new_xs



def resample_normalize_fold(fold):
    xtrain, ytrain, xtest, ytest = fold
    xtrain, ytrain = remove_empty(xtrain, ytrain)
    xtest, ytest = remove_empty(xtest, ytest)
    xtrain_resampled = reshape_resample_unreshape(xtrain, DESIRED_SAMPLE_LENGTH)
    xtrain_resampled_normalized = normalize(xtrain_resampled)
    xtest_resampled = reshape_resample_unreshape(xtest, DESIRED_SAMPLE_LENGTH)
    xtest_resampled_normalized = normalize(xtest_resampled)

    xtrain = np.array(np.array(xtrain_resampled_normalized).tolist())
    xtest = np.array(np.array(xtest_resampled_normalized).tolist())
    ytrain = np.array(ytrain)
    ytest = np.array(ytest)
    return xtrain, ytrain, xtest, ytest

def resample_normalize_hoverless_fold(fold):
    xtrain, ytrain, xtest, ytest = fold
    xtrain, ytrain, xtest, ytest = *remove_empty(xtrain, ytrain), *remove_empty(xtest, ytest)
    xtrain = remove_hover(xtrain)
    xtest = remove_hover(xtest)
    xtrain = reshape_resample_unreshape(xtrain, DESIRED_SAMPLE_LENGTH)
    xtest = reshape_resample_unreshape(xtest, DESIRED_SAMPLE_LENGTH)
    xtrain = normalize(xtrain)
    xtest = normalize(xtest)
    xtrain = np.array(np.array(xtrain).tolist())
    xtest = np.array(np.array(xtest).tolist())
    ytrain = np.array(ytrain)
    ytest = np.array(ytest)
    return xtrain, ytrain, xtest, ytest


def bounds(xs):
    maxs = []
    mins = []
    for k in range(13):
        maxs.append(0)
        mins.append(0)
    for i in range(len(xs)):
        # per item
        for j in range(len(xs[i])):
            # per timestamp
            for k in range(13):
                x = xs[i][j][k]
                if x > maxs[k]:
                    maxs[k] = x
                if x < mins[k]:
                    mins[k] = x
    return mins, maxs

def log_bounds(xs):
    if LOG:
        log(f'min and max per channel: {list(zip(*bounds(xs)))}')




def find(l, condition):
    c = 0
    indices = []
    for i in range(len(l)):
        e = l[i]
        if condition(e):
            c+=1
            indices.append(i)
    return indices

def resample_normalize(data_set):
    with open(f'../IMWUT_OnHW-chars_dataset_2020-09-22/{data_set}.pkl', 'rb') as file:
        data = pickle.load(file)
    for i in range(len(data)):
        data[i] = resample_normalize_fold(data[i])
    return data

def resample_normalize_hoverless(data_set):
    with open(f'../IMWUT_OnHW-chars_dataset_2020-09-22/{data_set}.pkl', 'rb') as file:
        data = pickle.load(file)
    for i in range(len(data)):
        data[i] = resample_normalize_hoverless_fold(data[i])
    return data

def plot(data, labels, n_ex, n_channels):
    if LOG:
        plt.figure(figsize=(n_ex*5, 20*n_channels/N_CHANNELS)) # dimensions of the plot in inches
        for i in range(n_ex):
            channels = [] # sensor channels
            for k in range(n_channels):
                channels.append([])
                for j in range(len(data[i])):
                    channels[k].append(data[i][j][k])
            for k in range(n_channels):
                plt.subplot(n_channels, n_ex, n_ex*k+i+1)
                plt.plot(channels[k])
            plt.title(labels[i])
        plt.show()


def dump_data(data, data_set):
    with open(f'preprocessed/{data_set}.pkl', 'wb') as file:
        pickle.dump(data, file)

def dump_data_hoverless(data, data_set):
    with open(f'preprocessed/hoverless/{data_set}.pkl', 'wb') as file:
        pickle.dump(data, file)

def preprocess(data_set):
    data = resample_normalize(data_set)
    dump_data(data,data_set)

def preprocess_hoverless(data_set):
    data = resample_normalize_hoverless(data_set)
    dump_data_hoverless(data, data_set)

def preprocess_all():
    for data_set in ['both_dep', 'both_indep', 'lower_dep', 'lower_indep', 'upper_dep', 'upper_indep']:
    # for data_set in ['both_indep']:
        preprocess(data_set)

def preprocess_all_hoverless():
    for data_set in ['both_dep', 'both_indep', 'lower_dep', 'lower_indep', 'upper_dep', 'upper_indep']:
    # for data_set in ['both_indep']:
        preprocess_hoverless(data_set)


In [5]:
preprocess_all()
# preprocess_all_hoverless()




  return array(a, dtype, copy=False, order=order)
