In [7]:
# imports
import numpy as np
import pandas as pd
import sys
import pickle

sys.path.append('..')
from utils.features import *
from utils.signal_processing import *

print("Packages Imported")

Packages Imported


In [12]:
# settings
data_in = '../data/emg_recordings'
#data_in = "C:/Users/User/Project-TNNI-ACD/data/emg_recordings"

In [13]:
# function to add labels from metadata to data

def add_labels(df, m):
    # first set label column to all nans
    df['label'] = np.nan
    # loop for each action/label
    for i, row in m.iterrows():
        # get start time and label
        t = row[0]
        cat = row[1].strip()
        
        # turn HOLD and STOP labels into corresponding action
        if cat == "HOLD":
            label = m[1].iloc[i-1].strip()
        elif cat == 'STOP':
            label = m[1].iloc[i-2].strip()
        else:
            label = cat
            
        # set all labels passed start time to correct label
        df['label'][df[0] >= t] = label
    return df

In [14]:
ses3 = pd.read_csv(f"{data_in}/test_sess3_data.txt", header=None)
m3 = pd.read_csv(f"{data_in}/test_sess3_metadata.txt", header=None)

df3 = add_labels(ses3, m3)

In [15]:
ses4 = pd.read_csv(f"{data_in}/test_sess4_data.txt", header=None)
m4 = pd.read_csv(f"{data_in}/test_sess4_metadata.txt", header=None)

df4 = add_labels(ses4, m4)

In [16]:
ses5 = pd.read_csv(f"{data_in}/test_sess5_data.txt", header=None)
m5 = pd.read_csv(f"{data_in}/test_sess5_metadata.txt", header=None)

df5 = add_labels(ses5, m5)

In [17]:
# concatenate all sessions
df = pd.concat([df3, df4, df5]).dropna().reset_index(drop=True)

In [18]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,label
0,122155.864712,-77062.46875,-77093.445312,-73484.914062,-73730.742188,-103410.390625,-103418.390625,-173520.75,-173397.546875,REST
1,122155.868712,-76983.476562,-77014.007812,-73467.03125,-73712.40625,-103027.210938,-103034.4375,-173118.4375,-172994.953125,REST
2,122155.872712,-76935.195312,-76965.304688,-73453.265625,-73698.507812,-102904.882812,-102912.507812,-173069.765625,-172946.484375,REST
3,122155.876712,-77000.6875,-77031.289062,-73466.515625,-73712.5,-103259.335938,-103266.984375,-173449.359375,-173326.40625,REST
4,122155.880712,-77053.59375,-77084.703125,-73480.265625,-73726.382812,-103445.171875,-103453.0625,-173564.53125,-173441.359375,REST


In [19]:
df['label'].unique()

array(['REST', 'WRIST DOWN', 'SNAP', 'CLENCH FIST', 'WRIST UP'],
      dtype=object)

In [20]:
# metric calculations

def compute_metrics(data):
    
    # time domain
    tfuncs = [mean_absolute_value, slope_sign_changes, root_mean_square]
    
    # freq domain
    power, freq = to_fdomain(data, 250)
    ffuncs = [median_frequency, mean_frequency]
    
    return [f(data) for f in tfuncs] # + [f(power, freq) for f in ffuncs]

In [21]:
## THIS CELL IS FOR TESTING COMPUTE_METRICS

raw_data = df.iloc[0:50]

# if the data is between rest and a motion, don't include it
if len(raw_data['label'].unique()) > 1:
    i += window_size
    #continue

# loop through all pairs of channels and transform, filter, and compute metrics
all_channel_metrics = []
for e in range(1,9,2):
    sig = tripolar_laplacian(raw_data[e], raw_data[e+1])
    filtered_sig = filter_emg(np.array(sig), 250)

    metrics = compute_metrics(filtered_sig)
    
metrics

[103.21974907407392, 23, 116.01697863878724]

In [22]:
# create features based on 0.2 seconds (50 rows) of data points

window_size = 50
i = 0
feats = []
# loop through 50 data points at a time
while i + window_size < len(df):
    # select data of interest
    raw_data = df.iloc[i:i + window_size]
    
    # if the data is between rest and a motion, don't include it
    if len(raw_data['label'].unique()) > 1:
        i += window_size
        continue

    # loop through all pairs of channels and transform, filter, and compute metrics
    all_channel_metrics = []
    for e in range(1,9,2):
        sig = tripolar_laplacian(raw_data[e], raw_data[e+1])
        filtered_sig = filter_emg(np.array(sig), 250)

        metrics = compute_metrics(filtered_sig)

        all_channel_metrics.extend(metrics)

    # add metrics plus label to matrix and go next
    feats.append(all_channel_metrics + [raw_data['label'].unique()[0]])
    i += window_size

In [23]:
feature_df = pd.DataFrame(feats)
feature_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,23.054868,23,26.879249,5.092542,23,5.949945,100.545968,23,112.540689,103.219749,23,116.016979,REST
1,20.062343,23,22.540444,6.796910,23,8.351373,97.515596,23,109.450591,100.765259,23,113.423511,REST
2,15.540885,23,17.822811,3.516999,23,4.172749,90.467274,23,101.667903,94.108229,23,106.311052,REST
3,12.998037,24,14.932618,5.141250,23,6.221360,68.654617,23,76.694255,73.497742,23,82.659126,REST
4,13.582772,24,15.559570,10.066973,23,11.405492,65.496634,23,73.336346,70.924455,23,79.760386,REST
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1403,1212.371725,23,1342.899112,18.340115,23,20.513414,1010.647343,23,1123.103268,559.697951,23,625.203057,CLENCH FIST
1404,1208.833108,23,1340.400620,18.367587,23,21.286912,1009.983750,23,1124.214785,557.586899,23,621.912292,CLENCH FIST
1405,1208.159727,23,1339.882094,18.461581,23,21.112132,1009.411446,23,1123.136257,558.443133,23,622.274518,CLENCH FIST
1406,1222.780625,23,1356.351442,17.943304,24,19.967945,1028.985873,24,1134.201384,561.552460,24,623.135654,CLENCH FIST


In [24]:
feature_df[feature_df.columns[-1]].value_counts()

REST           548
WRIST DOWN     218
CLENCH FIST    215
WRIST UP       214
SNAP           213
Name: 12, dtype: int64

In [25]:
# now machine learning

In [26]:
# imports
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

In [27]:
X = feature_df.copy()
y = X.pop(feature_df.columns[-1])

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=100)

In [29]:
# SVM
model = SVC()
model.fit(X_train, y_train)

In [30]:
print(model.score(X_train, y_train))
print(model.score(X_test, y_test))

0.4337121212121212
0.4090909090909091


In [31]:
# defining parameter range
params = {'C': [100, 1000, 10000], 
              'gamma': [0.01, 0.001, 0.0001],
              'kernel': ['rbf']}

clf = GridSearchCV(
        estimator = SVC(),
        param_grid = params,
        cv=5,
        n_jobs=-1,
        verbose=1
    )

clf.fit(X_train, y_train)
best_params = clf.best_params_

Fitting 5 folds for each of 9 candidates, totalling 45 fits


In [32]:
best_params

{'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}

In [33]:
model = SVC(**best_params)
model.fit(X_train, y_train)

print(model.score(X_train, y_train))
print(model.score(X_test, y_test))

0.9081439393939394
0.6619318181818182


In [34]:
# train final model on full dataset

final_model = SVC(**best_params)
final_model.fit(X, y)

# save trained model
with open('../data/models/SVC.pkl','wb') as f:
    pickle.dump(final_model,f)