# Figh Hive Project

In [1]:
import keras.layers
import pandas as pd
import numpy as np
import scipy
import scipy.signal
import matplotlib.pyplot as plt

In [2]:
''' If you have pyqt installed, this command will pop out interactive windows for graphs'''
%matplotlib qt

In [3]:
# Lets read in our data
import os
directory = 'Data/'
path = 'Figh_Data/Fist/Aden/1.csv'
myo_df = pd.read_csv(directory + path)
myo_df.columns = myo_df.columns.str.replace(' ', '')
# myo_df = myo_df.groupby('Arm').get_group('left') # This only needs to be done if you have two Myos running at the same time
display(myo_df)

Unnamed: 0,Unnamed:0,Time,DeviceID,DeviceID_Unit,Warm?,Sync,Arm,Arm_Unit,Timestamp,Timestamp_Unit,...,session_id,hostname,v4address,v4data_port,v4service_port,v6address,v6data_port,v6service_port,stream_id,effective_srate
0,0,4627.134319,2.172860e+12,Number,0.0,1.0,0.0,Arm,4627.1387,Time,...,default,GLE-1000-PC07,,16572,16572,,16572,16572,1,199.471698
1,1,4627.139332,2.172860e+12,Number,0.0,1.0,0.0,Arm,4627.1387,Time,...,default,GLE-1000-PC07,,16572,16572,,16572,16572,1,199.471698
2,2,4627.144345,2.172860e+12,Number,0.0,1.0,0.0,Arm,4627.1396,Time,...,default,GLE-1000-PC07,,16572,16572,,16572,16572,1,199.471698
3,3,4627.149359,2.172860e+12,Number,0.0,1.0,0.0,Arm,4627.1396,Time,...,default,GLE-1000-PC07,,16572,16572,,16572,16572,1,199.471698
4,4,4627.154372,2.172860e+12,Number,0.0,1.0,0.0,Arm,4627.1610,Time,...,default,GLE-1000-PC07,,16572,16572,,16572,16572,1,199.471698
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1059,1059,4632.443356,2.172860e+12,Number,0.0,1.0,0.0,Arm,4632.4360,Time,...,default,GLE-1000-PC07,,16572,16572,,16572,16572,1,199.471698
1060,1060,4632.448369,2.172860e+12,Number,0.0,1.0,0.0,Arm,4632.4570,Time,...,default,GLE-1000-PC07,,16572,16572,,16572,16572,1,199.471698
1061,1061,4632.453382,2.172860e+12,Number,0.0,1.0,0.0,Arm,4632.4570,Time,...,default,GLE-1000-PC07,,16572,16572,,16572,16572,1,199.471698
1062,1062,4632.458396,2.172860e+12,Number,0.0,1.0,0.0,Arm,4632.4585,Time,...,default,GLE-1000-PC07,,16572,16572,,16572,16572,1,199.471698


In [4]:
'''These are the columns I want to keep to do training on'''
Good_cols = ["Orientation_W",
             "Orientation_X",
             "Orientation_Y",
             "Orientation_Z",
             "Acc_X",
             "Acc_Y",
             "Acc_Z",
             "Gyro_X",
             "Gyro_Y",
             "Gyro_Z",
             "EMG_1",
             "EMG_2",
             "EMG_3",
             "EMG_4",
             "EMG_5",
             "EMG_6",
             "EMG_7",
             "EMG_8",
             "Roll",
             "Pitch",
             "Yaw"]

'''These are the columns I want to keep to make the training set'''
Better_cols = ["Orientation_W",
             "Orientation_X",
             "Orientation_Y",
             "Orientation_Z",
             "Acc_X",
             "Acc_Y",
             "Acc_Z",
             "Gyro_X",
             "Gyro_Y",
             "Gyro_Z",
             "EMG_1",
             "EMG_2",
             "EMG_3",
             "EMG_4",
             "EMG_5",
             "EMG_6",
             "EMG_7",
             "EMG_8",
             "Roll",
             "Pitch",
             "Yaw",
             "fist",
             "high",
             "gun",
             "rest" ]

Y_cols = ["fist",
         "high",
         "gun",
         "rest"]

In [5]:
''' Imports and useful functions'''
import pyxdf

def xdf_to_dataframe(xdf_data):
    ''' Xdf Data should be a list of streams (dictionaries)
        Function returns a dictionary of dataframes, one dataframe per stream'''
    dataframes = {}
    for stream in xdf_data:
        df = pd.DataFrame()
        data = stream['time_series']
        timestamps = stream['time_stamps']
        df['Time'] = timestamps
        chan_names, units = get_channel_names(stream['info'])
        counts = data.shape[0]
        for series, name, unit in zip(range(data.shape[1]), chan_names, units):
            df[name[0]]  = data[:, series]
            if unit:
                df[name[0] + '_Unit'] = np.repeat(unit, counts)

        for item in stream['info']:
            if item not in ['name', 'desc', 'data']:
                try:
                    df[item] = np.repeat(stream['info'][item], counts)
                except:
                    continue
        dataframes[stream['info']['name'][0]] = df

    return dataframes

def get_channel_names(info):
    channels = info['desc'][0]['channels'][0]['channel']
    names = [chan['label'] for chan in channels ]
    units = [chan['unit'] for chan in channels ]
    return names, units

# Myo Data Processing
The following cells are for EMG data processing from the sample file

In [6]:
 # Lets read in our data
import os
directory = 'Data/Figh_Data/'
for move in ["Fist/", "Gun/", "High/"]:
    for partner in ["Aden/", "Evan/", "Will/"]:
        for num in range(1, 21):
            data, header = pyxdf.load_xdf(directory + move + partner + str(num) + ".xdf")
            dfs = xdf_to_dataframe(data)
            ''' Save new dataframe'''
            dfs['Thalmic Labs MyoMyo'].to_csv(directory + move + partner + str(num) + ".csv")

In [7]:
'''
Preprocessing functions
'''
import scipy as sp
import scipy.signal

def filteremg_BP(emg, low_pass=3, sfreq=250, high_band=20, low_band=95):
    """
    emg: EMG data
    high: high-pass cut off frequency
    low: low-pass cut off frequency
    sfreq: sampling frequency
    """
    # Zero mean emg signal
    emg = emg - emg.mean()

    # normalise cut-off frequencies to sampling frequency
    high_band = high_band/(sfreq/2)
    low_band = low_band/(sfreq/2)


    # create bandpass filter for EMG
    b1, a1 = sp.signal.butter(4, [high_band,low_band], btype='bandpass', analog=True)

    # process EMG signal: filter EMG
    emg_filtered = sp.signal.filtfilt(b1, a1, emg)

    # process EMG signal: rectify
    emg_rectified = abs(emg_filtered)

    # create notch filter and apply it
    b1, a1 = sp.signal.iirnotch(60, 30,sfreq)
    emg_finished = sp.signal.filtfilt(b1, a1, emg_rectified)

    return emg_finished

In [8]:
import math

'''Do all preprocessing for the different datas'''

'''
Key:
    Fist = 1
    Gun = 2
    High Five = 3
    Resting = 0
'''

directory = 'Data/Figh_Data/'
for move in ["Fist/", "Gun/", "High/"]:
    for partner in ["Aden/", "Evan/", "Will/"]:
        for num in range(1, 21):
            myo_df = pd.read_csv(directory + move + partner + str(num) + ".csv")
            myo_df.columns = myo_df.columns.str.replace(' ', '')

            '''rectify emg data'''
            for col in ['EMG_' + str(i) for i in range(1, 9)]:
                myo_df[col] = myo_df[col].apply(abs)

            '''Rolling average'''
            myo_df = myo_df.rolling(400).mean()

            # """Apply Filter"""
            # emg_keys = ['EMG_' + str(i) for i in range(1, 9)]
            # myo_df[emg_keys] = myo_df[emg_keys].apply(filteremg_BP,raw=True)

            '''Keep only the good columns and fix index/nan issues'''
            myo_df = myo_df[Good_cols]
            myo_df = myo_df.dropna()
            myo_df = myo_df.reset_index()
            '''Get rid of first and last ~15% for resting stuff'''
            maxIndex = len(myo_df)
            minCut = math.floor(maxIndex*.15)
            maxCut = math.floor(maxIndex*.85)
            myo_df = myo_df.iloc[minCut:maxCut+1, :]

            myo_df = myo_df.reset_index()
            myo_df = myo_df[Good_cols]
            if (move == "Fist/"):
                myo_df['fist'] = 1
                myo_df['gun'] = 0
                myo_df['high'] = 0
                myo_df['rest'] = 0
            elif (move == "Gun/"):
                myo_df['fist'] = 0
                myo_df['gun'] = 1
                myo_df['high'] = 0
                myo_df['rest'] = 0
            elif (move == "High/"):
                myo_df['fist'] = 0
                myo_df['gun'] = 0
                myo_df['high'] = 1
                myo_df['rest'] = 0
            else:
                myo_df['fist'] = 0
                myo_df['gun'] = 0
                myo_df['high'] = 0
                myo_df['rest'] = 1

            myo_df.to_csv(directory + move + partner + str(num) + "_r.csv")

myo_df = pd.read_csv("Data/Figh_Data/Fist/Aden/1_r.csv")

for channel in range(1,9):
    plt.figure()
    ax = myo_df['EMG_' + str(channel)].plot()
    plt.title('EMG_' + str(channel))
    plt.ylabel('mVolts')
    plt.xlabel('Time')

for channel in ['X', 'Y', 'Z']:
    plt.figure()
    myo_df['Acc_' + channel].plot()
    plt.title('Acc_' + str(channel))
    plt.ylabel('g')
    plt.xlabel('Time')



In [9]:
'''Generate one big sample-set'''
directory = 'Data/Figh_Data/'
bigData = pd.DataFrame()
for move in ["Fist/", "Gun/", "High/"]:
    for partner in ["Aden/", "Evan/", "Will/"]:
        for num in range(1, 21):
            myo_df = pd.read_csv(directory + move + partner + str(num) + "_r.csv")
            bigData = pd.concat([bigData, myo_df])
bigData = bigData[Better_cols]
bigData.to_csv(directory + "bigData.csv")



In [10]:
'''
Model functions
'''
def build_timeseries(mat, y_col_index, TIME_STEPS):
    # y_col_index is the index of column that would act as output column
    # total number of time-series samples would be len(mat) - TIME_STEPS
    dim_0 = mat.shape[0] - TIME_STEPS
    dim_1 = mat.shape[1]
    x = np.zeros((dim_0, TIME_STEPS, dim_1))
    y = np.zeros((dim_0,))

    for i in range(dim_0-4):
        x[i] = mat[i:TIME_STEPS+i]
        y[i] = mat[TIME_STEPS+i+4, y_col_index]
    print("length of time-series i/o",x.shape,y.shape)
    return x, y

#If data set is 1000
#1000-365 days in the future = 635
#Fix by making it x = np.zeroes(dim0-365)

def trim_dataset(mat, batch_size):
    """
    trims dataset to a size that's divisible by BATCH_SIZE
    """
    no_of_rows_drop = mat.shape[0]%batch_size
    if(no_of_rows_drop > 0):
        return mat[:-no_of_rows_drop]
    else:
        return mat


In [11]:
'''
Getting training and test sets
'''
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

myo_df = pd.read_csv(directory + "bigData.csv")
myo_df = myo_df[Better_cols]

df_train, df_test = train_test_split(myo_df, train_size=0.7, test_size=0.3, shuffle=True)
df_val, df_test = train_test_split(df_test, train_size=0.6, test_size=0.4, shuffle=True)
df_test.describe()
df_tr_x = df_train[Good_cols]
df_tr_y = df_train[Y_cols]
df_te_x = df_val[Good_cols]
df_te_y = df_val[Y_cols]

df_tester_x = df_test[Good_cols]
df_tester_y = df_test[Y_cols]

print("Train and Test size", len(df_train), len(df_test))
# scale the feature MinMax, build array
# x = df_train.loc[:,Good_cols].values
# min_max_scaler = MinMaxScaler()
# x_train = min_max_scaler.fit_transform(x)
# x_test = min_max_scaler.transform(df_test.loc[:,Good_cols])


INPUT_WIDTH = df_tr_x.shape[1]
CLASSES = 4

print(str(INPUT_WIDTH) + "!!!")

Train and Test size 50325 8628
21!!!


In [12]:
import tensorflow

'''
Building the model
'''

# Import functions from Keras
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Embedding, Dropout, Input


figh_model = Sequential()
figh_model.add(Dense(400, activation='relu', input_dim=INPUT_WIDTH))
figh_model.add(Dropout(.1))
figh_model.add(Dense(200, activation='relu'))
figh_model.add(Dropout(.1))
figh_model.add(Dense(100, activation='relu'))
figh_model.add(Dropout(.1))
figh_model.add(Dense(50, activation='relu'))
figh_model.add(Dense(CLASSES, activation='softmax')) # softmax makes it so that the outputs sum to 1 for each of the 4 output classes
figh_model.compile(loss='mean_squared_error',
                   optimizer='adam',
                   metrics=['accuracy'])
figh_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 400)               8800      
                                                                 
 dropout (Dropout)           (None, 400)               0         
                                                                 
 dense_1 (Dense)             (None, 200)               80200     
                                                                 
 dropout_1 (Dropout)         (None, 200)               0         
                                                                 
 dense_2 (Dense)             (None, 100)               20100     
                                                                 
 dropout_2 (Dropout)         (None, 100)               0         
                                                                 
 dense_3 (Dense)             (None, 50)                5

In [13]:
history = figh_model.fit(df_tr_x, df_tr_y, epochs=55, verbose=1,
          validation_data=(df_te_x,
                           df_te_y))

# Plot training & validation loss values
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.figure()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

score = figh_model.evaluate(df_tester_x, df_tester_y, verbose=1,)
print('Test score:', score[0])
print('Test accuracy:', score[1] )

Epoch 1/55
Epoch 2/55
Epoch 3/55
Epoch 4/55
Epoch 5/55
Epoch 6/55
Epoch 7/55
Epoch 8/55
Epoch 9/55
Epoch 10/55
Epoch 11/55
Epoch 12/55
Epoch 13/55
Epoch 14/55
Epoch 15/55
Epoch 16/55
Epoch 17/55
Epoch 18/55
Epoch 19/55
Epoch 20/55
Epoch 21/55
Epoch 22/55
Epoch 23/55
Epoch 24/55
Epoch 25/55
Epoch 26/55
Epoch 27/55
Epoch 28/55
Epoch 29/55
Epoch 30/55
Epoch 31/55
Epoch 32/55
Epoch 33/55
Epoch 34/55
Epoch 35/55
Epoch 36/55
Epoch 37/55
Epoch 38/55
Epoch 39/55
Epoch 40/55
Epoch 41/55
Epoch 42/55
Epoch 43/55
Epoch 44/55
Epoch 45/55
Epoch 46/55
Epoch 47/55
Epoch 48/55
Epoch 49/55
Epoch 50/55
Epoch 51/55
Epoch 52/55
Epoch 53/55
Epoch 54/55
Epoch 55/55
Test score: 0.0004636068479157984
Test accuracy: 0.999072790145874
