In [13]:
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from scipy import signal
import csv

In [14]:
classes = ['run', 'walk', 'stand', 'sit', 'sit-to-stand', 'stand-to-sit', 
        'stair-up', 'stair-down', 'jump-one-leg', 'jump-two-leg', 'curve-left-step', 
        'curve-right-step', 'curve-left-spin-Lfirst', 'curve-left-spin-Rfirst', 
        'curve-right-spin-Lfirst', 'curve-right-spin-Rfirst', 'lateral-shuffle-left', 
        'lateral-shuffle-right','v-cut-left-Lfirst', 'v-cut-left-Rfirst', 'v-cut-right-Lfirst', 'v-cut-right-Rfirst']

sensors = ['EMG1', 'EMG2', 'EMG3', 'EMG4', 'Microphone', 'ACC upper X', 'ACC upper Y','ACC upper Z', 'Goniometer X',
          'ACC lower X', 'ACC lower Y', 'ACC lower Z', 'Goniometer Y', 'Gyro upper X', 'Gyro upper Y', 'Gyro upper Z',
          'Gyro lower X', 'Gyro lower Y', 'Gyro lower Z']

data_path = "/Users/thomasklein/Projects/BremenBigDataChallenge2019/bbdc_2019_Bewegungsdaten/"

variance_analysis_params = {'EMG1':(100, 0.5, 20,  6),
                            'EMG2':(200, 0.5, 20, 10),
                            'EMG3':(200, 0.2, 20,  6),
                            'EMG4':(200, 0.5, 20,  6),
                            'Microphone': (200, 0.6, 20, 4),
                            'ACC upper X':(200, 0.2, 20, 6),
                            'ACC upper Y':(200, 0.5, 20, 5),
                            'ACC upper Z':(200, 0.5, 20, 5),
                            'Goniometer X':(200, 0.5, 20, 6),
                            'ACC lower X':(200, 0.2, 20, 6),
                            'ACC lower Y':(200, 0.2, 20, 6),
                            'ACC lower Z':(200, 0.2, 20, 6),
                            'Goniometer Y':(200, 0.5, 20, 6),
                            'Gyro upper X':(200, 0.5, 20, 6),
                            'Gyro upper Y':(200, 0.5, 20, 6),
                            'Gyro upper Z':(200, 0.5, 20, 6),
                            'Gyro lower X':(200, 0.5, 20, 6),
                            'Gyro lower Y':(200, 0.5, 20, 6),
                            'Gyro lower Z':(200, 0.5, 20, 6)}


In [15]:
def variance_filter(data, windowsize):
    """
    Filters a signal by sliding a window over it, under which the variance is calculated.
    At every time point t, the method outputs the variance of the original signal within a window around t
    """
    half = windowsize//2
    res = np.zeros(data.shape[0]-windowsize)
    for i in range(half,len(data)-half):
        res[i-half] = np.std(data[i-half:i+half])
    return res/np.max(res)

def peakfinder(data, h_thresh, w_thresh, n):
    """
    Finds peaks in a spectrum. Returns the first six peaks as 
    
    location1, location2, ...  location6
    height1,   height2,   ...  height6
    width1,    width2,    ...  width6
    
    The location is normalized so that the location of the first peak corresponds to zero.
    
    arguments: 
        data = 1d-array (of variance intensity values)
        h_thresh = the threshold for the minimum height a peak needs to have
        w_thresh = the threshold for the minimum width a peak needs to have
        n = the number of peaks to extract
    """
    locations, properties = signal.find_peaks(data, height=h_thresh, width=w_thresh)
    heights = properties['peak_heights']
    widths = properties['width_heights']
    
    results = np.stack((locations,heights,widths))
    
    # if no peaks were found, return empty array
    if results.size == 0:
        return np.zeros((3,n))
    
    # subtract location of biggest peak to shift everything
    results[0,:] = results[0,:] - results[0,0]
    
    # sort descending
    results = results[:,-results[1,:].argsort()]
    
    # select biggest six
    results = results[:,0:n]
    
    # rounding to 3 decimal places
    results = np.round(results,3)

    # if not enough peaks found, fill up with zeros
    if results.shape[1] != n:
        results = np.pad(results,((0,0),(0,n-results.shape[1])),'constant', constant_values=0)
        
    return results

In [16]:
def variance_analysis(data, windowsize, h_thresh, w_thresh, n):
    """
    data = 1d-numpy array of shape [timesteps]
    """
    vardata = variance_filter(data, windowsize)
    res = peakfinder(vardata, h_thresh, w_thresh, n)
    return res.flatten()


def variance_feature_extractor(data, sensor):
    """
    Calls variance analysis with appropr
    """
    windowsize, h_thresh, w_thresh, n = variance_analysis_params[sensor]
    
    return variance_analysis(data, windowsize, h_thresh, w_thresh, n)

In [17]:
def feature_extractor(data):
    """
    data = 2d-numpy array of shape [timesteps, sensors]
    
    """
    features = []
    for idx,sensor in enumerate(sensors):
        features = features + list(variance_feature_extractor(data[:,idx], sensor))
    
    return features

In [18]:
def dataset_creator(file):
    df = pd.read_csv(file)
    with open("features.csv", "a", newline='') as f:
        for index, row in df.iterrows():
            path = row['Datafile']
            data = pd.read_csv(data_path+path).values
            features = feature_extractor(data)
            features.append(classes.index(row['Label']))
            writer = csv.writer(f)
            writer.writerow(features)

In [19]:
dataset_creator(data_path+"train.csv")

KeyboardInterrupt: 