In [126]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from collections import defaultdict

import math

path = 'Z:\\Stroke MC10\\LabeledData\\'

In [170]:
def getFeatures(EMGSignal):
    
    EPS = .0001
    
    Feat = {}
    Feat['MeanAbsValue'] = np.mean(np.abs(EMGSignal))
    
    PosInds = EMGSignal>0
    Feat['ZeroCrossings'] = sum((PosInds[1:len(PosInds)] != PosInds[0:(len(PosInds)-1)]) & (np.abs(np.diff(EMGSignal))>EPS))
    
    DiffEMGSignal = np.diff(EMGSignal)
    PosInds = DiffEMGSignal>0
    Feat['SlopeZeroCrossings'] = sum((PosInds[1:len(PosInds)] != PosInds[0:(len(PosInds)-1)]) & 
                                     (np.abs(np.diff(DiffEMGSignal))>EPS))
    
    Feat['WaveformLength'] = np.mean(np.abs(EMGSignal[1:len(EMGSignal)-1]-EMGSignal[2:len(EMGSignal)]))
    
    Feat['WillisonAmplitude'] = sum(np.abs(EMGSignal[1:len(EMGSignal)-1]-EMGSignal[2:len(EMGSignal)])>EPS)
    
    Feat['RMS'] = math.sqrt(np.mean(np.square(EMGSignal)))
    
    Feat['Variance'] = np.var(EMGSignal)
    
    
    FFTPow = np.square(np.abs(np.fft.rfft(EMGSignal)))
    for i, key in enumerate(['FFT:0-20Hz', 'FFT:20-40Hz', 'FFT:40-60Hz', 'FFT:60-80Hz', 'FFT:80-100Hz', 'FFT:100-120Hz']):
        Feat[key] = np.sum(FFTPow[1+int(len(FFTPow)/6 * i):int(len(FFTPow)/6 * (i-1))])
    # TODO: Sample Entropy and FFT
    
    Feat['FFT Sum'] = np.sum(FFTPow)
    
    return Feat

In [171]:
def getClips(EMGSignal, winsize, overlap):
    Clips = []
    Features = defaultdict(list)
    for indStart in range(0,int(len(EMGSignal)-winsize),int(winsize*(1-overlap))):
        Clip = np.asarray(EMGSignal[indStart:indStart+125])
        Feat = getFeatures(Clip)
        Clips.append(Clip)
        for key in Feat:
            Features[key].append(Feat[key])
        
    return Clips, Features

In [172]:
winsize = .5 * 250 # Test different values later (*250 to convert sec to samples)
overlap = 0 # 0-1; use value such that winsize*overlap is integer for now

RawDataClips = []
Features = defaultdict(list)
ClipLabels = []
SubjID = []
Location = []

for x in itertools.product(range(1,31),['Lab Day1','Lab Day2'],['Gastrocnemius', 'Hamstring'],['MAS', 'MVC', 'VCM']):
    if x[2]=='Hamstring':
        Act = 'KF'
    else:
        Act = 'PF'
    
    oldLen = len(RawDataClips)
    
    try:
        Data = pd.read_csv(path + 'CS' + str(x[0]).zfill(3) + '\\' + x[1] + '\\' + x[2] + '_' + x[3] + ' ' + Act + '_labeled.csv',
                          header = None)
        if x[2]=='Hamstring':
            Labind = 8
        else:
            Labind = 5
            
        EMGData = Data[4]
        Label = Data[Labind]
        
        for l in zip(['Spastic Activity', 'Non-Spastic Activity', 'Inactive'],['SA','HA','IA']):
            lab_old = l[0]
            lab_new = l[1]
            
            lab_inds = Label==lab_old
            lab_inds_diff = lab_inds[1:len(lab_inds)].values != lab_inds[0:len(lab_inds)-1].values
            
            # Get starts/ends of continuous labels
            cont_label_startend = [i+1 for i,x in enumerate(lab_inds_diff) if x]
            
            # Note: this assumes that the data stream begins and ends with unlabeled data
            # Even indices are starts, odds are ends
            for nLabel in range(0,len(cont_label_startend),2): 
                LabeledData = EMGData[cont_label_startend[nLabel]:cont_label_startend[nLabel+1]]
                
                newClips, newFeatures = getClips(LabeledData, winsize, overlap)
                
                RawDataClips += newClips
                for key in newFeatures:
                    Features[key] += newFeatures[key]
                SubjID += [x[0]] * len(newClips)
                ClipLabels += [lab_new]*len(newClips)
                Location += [x[2]]*len(newClips)
            
    except(FileNotFoundError):
        print(path + 'CS' + str(x[0]).zfill(3) + '\\' + x[1] + '\\' + x[2] + '_' + x[3] + ' ' + Act + '_labeled.csv')
        

Z:\Stroke MC10\LabeledData\CS001\Lab Day2\Gastrocnemius_MAS PF_labeled.csv
Z:\Stroke MC10\LabeledData\CS001\Lab Day2\Gastrocnemius_MVC PF_labeled.csv
Z:\Stroke MC10\LabeledData\CS001\Lab Day2\Gastrocnemius_VCM PF_labeled.csv
Z:\Stroke MC10\LabeledData\CS001\Lab Day2\Hamstring_MAS KF_labeled.csv
Z:\Stroke MC10\LabeledData\CS001\Lab Day2\Hamstring_MVC KF_labeled.csv
Z:\Stroke MC10\LabeledData\CS001\Lab Day2\Hamstring_VCM KF_labeled.csv
Z:\Stroke MC10\LabeledData\CS002\Lab Day1\Gastrocnemius_MVC PF_labeled.csv
Z:\Stroke MC10\LabeledData\CS002\Lab Day1\Hamstring_VCM KF_labeled.csv
Z:\Stroke MC10\LabeledData\CS003\Lab Day2\Hamstring_VCM KF_labeled.csv
Z:\Stroke MC10\LabeledData\CS004\Lab Day1\Gastrocnemius_MVC PF_labeled.csv
Z:\Stroke MC10\LabeledData\CS004\Lab Day1\Gastrocnemius_VCM PF_labeled.csv
Z:\Stroke MC10\LabeledData\CS005\Lab Day2\Gastrocnemius_VCM PF_labeled.csv
Z:\Stroke MC10\LabeledData\CS006\Lab Day1\Hamstring_MVC KF_labeled.csv
Z:\Stroke MC10\LabeledData\CS006\Lab Day2\Hamstri

In [173]:
FullData = pd.DataFrame(Features).assign(RawData=pd.Series(RawDataClips), SubjID=pd.Series(SubjID), Label=pd.Series(ClipLabels), Location=pd.Series(Location))

In [174]:
FullData

Unnamed: 0,FFT Sum,FFT:0-20Hz,FFT:100-120Hz,FFT:20-40Hz,FFT:40-60Hz,FFT:60-80Hz,FFT:80-100Hz,MeanAbsValue,RMS,SlopeZeroCrossings,Variance,WaveformLength,WillisonAmplitude,ZeroCrossings,Label,Location,RawData,SubjID
0,6.591425e-05,6.545479e-05,0.0,0.0,0.0,0.0,0.0,0.000043,0.000092,15,8.436106e-09,0.000043,16,9,SA,Gastrocnemius,"[-1.25376354234e-05, 0.000256419017388, 0.0004...",1
1,2.937566e-05,2.854022e-05,0.0,0.0,0.0,0.0,0.0,0.000035,0.000061,22,3.759755e-09,0.000041,18,12,SA,Gastrocnemius,"[-1.54166153598e-05, -2.58201167855e-05, -2.52...",1
2,2.050793e-05,1.908770e-05,0.0,0.0,0.0,0.0,0.0,0.000029,0.000051,17,2.623056e-09,0.000035,11,6,SA,Gastrocnemius,"[1.65414212235e-06, 1.16601537833e-05, 9.49498...",1
3,2.214623e-05,2.092626e-05,0.0,0.0,0.0,0.0,0.0,0.000030,0.000053,16,2.834230e-09,0.000035,16,12,SA,Gastrocnemius,"[-8.83690111516e-06, -1.39761029783e-06, 4.586...",1
4,7.703489e-06,7.374743e-06,0.0,0.0,0.0,0.0,0.0,0.000025,0.000031,15,9.858629e-10,0.000030,1,1,HA,Gastrocnemius,"[1.23416107599e-05, -1.96147267426e-05, -9.001...",1
5,8.130242e-06,7.903574e-06,0.0,0.0,0.0,0.0,0.0,0.000027,0.000032,12,1.040651e-09,0.000034,0,0,HA,Gastrocnemius,"[-2.24706359465e-05, 1.10109352551e-05, -2.817...",1
6,1.024028e-05,9.763181e-06,0.0,0.0,0.0,0.0,0.0,0.000028,0.000036,9,1.310335e-09,0.000033,4,4,HA,Gastrocnemius,"[-2.33031173085e-05, -3.17387412667e-06, -1.63...",1
7,8.322934e-06,7.658138e-06,0.0,0.0,0.0,0.0,0.0,0.000026,0.000033,18,1.064839e-09,0.000035,4,4,HA,Gastrocnemius,"[4.52581818988e-05, 4.66920502156e-05, 1.57840...",1
8,7.298265e-06,6.721589e-06,0.0,0.0,0.0,0.0,0.0,0.000022,0.000031,20,9.341753e-10,0.000034,4,3,HA,Gastrocnemius,"[-4.54703491651e-06, -2.61460899681e-06, 1.992...",1
9,1.120278e-05,1.104285e-05,0.0,0.0,0.0,0.0,0.0,0.000029,0.000038,17,1.433953e-09,0.000036,6,3,HA,Gastrocnemius,"[3.4171715202e-06, -3.72436117115e-05, -8.9238...",1


In [152]:
[sum(FullData.SubjID[FullData.Location=='Hamstring']==s) for s in FullData.SubjID[FullData.Location=='Hamstring'].unique()]

[18, 88, 46, 91, 115, 23, 24, 151, 95, 49, 19, 133, 38, 33, 4]

In [153]:
[sum(FullData.SubjID[FullData.Location=='Gastrocnemius']==s) for s in FullData.SubjID[FullData.Location=='Gastrocnemius'].unique()]

[26,
 36,
 74,
 43,
 81,
 85,
 37,
 89,
 61,
 64,
 7,
 80,
 2,
 4,
 17,
 59,
 58,
 29,
 30,
 64,
 14]