## Combining information for individual subjects

In [337]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sb

import time

In [338]:
# Create column labels

jointIndex = []
for i in range(0,8):
    # Create column for each of the eight joint sensors
    jointIndex.append(('Joint ' + str(i + 1)))
    
# Add the four specific muscle sensor to the joint labels
sensorIndex = jointIndex + ['Flexor Digit', 'Extensor Digit', 'Biceps', 'Triceps']

In [339]:
# Joining EMG, movement, and repetition results into one file for each subject and exercise

def ConcatSubjects(dataset):
    # Check which dataset is being used
    if dataset == 2:
        # Dataset 2 contained the non-disabled subjects, the subject numbers below were chosen to match clinical 
        # parameters between both datasets as closely as possible and to ensure an even number of subjects.
        dB_subjects = [2, 6, 8, 10, 12, 13, 16, 21, 23, 26, 31]
        dB = 'DB2'
    elif dataset == 3:
        dB_subjects = np.arange(1,12)
        dB = 'DB3'
    
    # Cycle through all eleven subjects in a single dataset
    for i in range(0,11):
        print('Starting Database ' + str(dataset) + '\n  Subject ' + str(dB_subjects[i]))
        # Cycle through both exercises
        for j in range(1,3):
            # Load files then join into one
            print('   Exercise ' + str(j))
            filepath = dB + '/' + dB + '_S' + str(dB_subjects[i]) + '_E' + str(j)
            sensor = pd.read_csv((filepath + '_EMG.csv'), names = sensorIndex)
            print('    Sensor csv read')
            movement = pd.read_csv((filepath + '_restimulus.csv'), names = ['Movement'])
            print('    Movement csv read')
            subjectDf = sensor.join(movement)
            rep = pd.read_csv((filepath + '_rerepetition.csv'), names = ['Repetition'])
            print('    Repetition csv read')
            subjectDf = subjectDf.join(rep)
            print('    Dataframe successfully joined')
            
            subjectDf.to_csv(('dataset/unprocessed/' + dB + '_S' + str(dB_subjects[i]) + '_E' + str(j) + '.csv'))
            print('   File successfully written\n')
            
    print('All files successfully written')

In [4]:
# Create individual subject dataframes for each exercise

ConcatSubjects(2)
ConcatSubjects(3)

Starting Database 2
  Subject 2
   Exercise 1
    Sensor csv read
    Movement csv read
    Repetition csv read
    Dataframe successfully joined
   File successfully written
   Exercise 2
    Sensor csv read
    Movement csv read
    Repetition csv read
    Dataframe successfully joined
   File successfully written
Starting Database 2
  Subject 6
   Exercise 1
    Sensor csv read
    Movement csv read
    Repetition csv read
    Dataframe successfully joined
   File successfully written
   Exercise 2
    Sensor csv read
    Movement csv read
    Repetition csv read
    Dataframe successfully joined
   File successfully written
Starting Database 2
  Subject 8
   Exercise 1
    Sensor csv read
    Movement csv read
    Repetition csv read
    Dataframe successfully joined
   File successfully written
   Exercise 2
    Sensor csv read
    Movement csv read
    Repetition csv read
    Dataframe successfully joined
   File successfully written
Starting Database 2
  Subject 10
   Exercise 1

## Preprocessing, Windowing, and Feature Extraction
Standardise around a mean of 0

Windows of 250ms length with 20ms increment

$$ N = \text{window length} $$
$$ x_{n} = \text{EMG signal in a segment}$$

### Root Mean Square
$$\sqrt(\frac{1}{N} \sum_{n=1}^{N} x_n{^2}) $$

### Mean Absolute Value
$$\frac{1}{N} \sum_{n=1}^{N}|x_{n}|$$

### Waveform Length
$$\sum_{n=1}^{N} |x_{n+1} - x_{n}|$$

In [340]:
# Standardising subjects around a mean of zero
def StandardiseSubject(subject, exercise):
    df = pd.read_csv('dataset/unprocessed/' + subject + '_' + exercise + '.csv', index_col = 0)
    dfCopy = df.copy()

    # Check if DB3 and subject 6 or 7, if so ignore digit sensors (indexes 8 and 9) due to no sensor data
    if subject == 'DB3_S6' or subject == 'DB3_S7':
        sensorsUsed = jointIndex + ['Biceps', 'Triceps']
    else:
        sensorsUsed = sensorIndex

    # Standardise around mean of zero for each sensor
    for i in range(0,len(sensorsUsed)):
        series = df[sensorsUsed[i]]
        mean = series.mean()
        stdev = series.std()
        standardised = (series - mean)/stdev
        dfCopy[sensorsUsed[i]] = standardised

    # Return full standardised subject dataframe
    return dfCopy

# Function to save dataframe for later use
def SaveDF(dfToSave, dataset, exercise, feature):
    dfToSave.to_csv(('dataset\DB' + str(dataset) + '_' + exercise + '_' + feature + '.csv'))


In [341]:
def ProcessDataset(dataset, exercise, feature, supress_comments = False): 
    # Calculate Waveform Length across defined windows
    def WFLengthAbs(win_df, start, end):
        # Create blank dataframe to store calculation
        absDf = pd.DataFrame()
        for i in range(start,len(win_df)-1):
            dfSubtract = win_df.iloc[i] - win_df.iloc[(i+1)]
            absDf = absDf.append(dfSubtract.abs(), ignore_index=True)
            
            return(absDf)
    
    # Function to add columns
    def AddInfoCol(dataf, movement, repetition, subjectNum):
        dataf['Movement'] = movement
        dataf['Repetition'] = repetition
        dataf['Subject'] = subjectNum
        
        return(dataf)
    
    # Function for windowing calculation (of size 250ms and increment of 20ms)
    def CalculateWindow(dataframe, standardSubject, subject, movement, 
                        feature, window = 250, incr = 20):
        # Filter dataframe down to single movement
        movement_df = standardSubject[standardSubject['Movement'] == movement]

        # Cycle through each repetition
        for repetition in range(1,7):
            rep = movement_df[movement_df['Repetition'] == repetition]
            if supress_comments == False:
                print('   Repetition ' + str(repetition) + ' started')
            
            # Perform windowing
            i = 0
            while (i < ((len(rep)-window)/incr)+1):
                start = i * incr
                end = window + (start)
                
                # Calculate Root Mean Square for the window
                if feature.upper() == 'RMS':
                    rep_square = rep.iloc[:,0:12]**2
                    calc_df = rep_square[start:end].mean().apply(np.sqrt)
                    calc_df = AddInfoCol(calc_df, movement, repetition, subject)
                    
                    dataframe = dataframe.append(calc_df, ignore_index=True)
                
                # Calculate Mean Absolute Value for the window
                elif feature.upper() == 'MAV':
                    calc_df = rep[start:end].iloc[:,0:12].abs().mean()
                    calc_df = AddInfoCol(calc_df, movement, repetition, subject)
                    
                    dataframe = dataframe.append(calc_df, ignore_index=True)
                    
                # Calculate Waveform Length for the window
                elif feature.upper() == 'WL':
                    winDf = rep.iloc[:,0:12]
                    absDf = WFLengthAbs(winDf, start, end)
                    
                    calc_df = absDf.sum()
                    calc_df = AddInfoCol(calc_df, movement, repetition, subject)
                    
                    dataframe = dataframe.append(calc_df, ignore_index=True)
                    
                elif feature.upper() == 'SC':
                    break
                    
                else: print("Feature not found")

                i = i + 1

        print('  Window Calculations Finished.')
        return(dataframe)
    
    start_time = time.time()
    if dataset == 2: subjects = [2, 6, 8, 10, 12, 13, 16, 21, 23, 26, 31]
    elif dataset == 3: subjects = np.arange(1,12)
    
    # Create blank dataframe
    df_feature = pd.DataFrame()
    # Cycle through subjects and perform windowing and feature extraction calculations
    for subject in subjects:
        print("Starting subject " + str(subject))
        # Get file path name to send into standardSubject function for loading
        subjectPath = 'DB' + str(dataset) + '_S' + str(subject)
        standardSubject = StandardiseSubject(subjectPath, exercise)
        for movement in standardSubject['Movement'].unique():
            if movement != 0:
                print(" Starting Movement " + str(movement))
                df_feature = CalculateWindow(df_feature, standardSubject, subject, movement, feature)   
    
    print('Finished dataset ' + str(dataset) + ', feature ' + feature)
    end_time = time.time()
    print('Finished in ' + str(end_time - start_time))
    return(df_feature)

In [124]:
# Create preprocessed files for each of the three feature extraction methods on both dataset
# Limiting to exercise 1 due to time constraints
for dataset in range(2,4):
    exercise = 1
    rms_df = ProcessDataset(dataset, ('E'+str(exercise)), 'RMS')
    SaveDF(rms_df, str(dataset), ('E' + str(exercise)), 'RMS')

    mav_df = ProcessDataset(dataset, ('E'+str(exercise)), 'MAV')
    SaveDF(mav_df, str(dataset), ('E' + str(exercise)), 'MAV')

    wl_df = ProcessDataset(dataset, ('E'+str(exercise)), 'WL')
    SaveDF(wl_df, str(dataset), ('E' + str(exercise)), 'WL')
        

Starting subject 2


  mask |= (ar1 == a)


 Starting Movement 18
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 19
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 20
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 21
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 22
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting 

   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 37
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 38
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 39
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 40
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
Starting subject 8
 Starting Movement 18
   Repetition 1 started
   Repetition 2

   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 32
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 33
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 34
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 35
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 36
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetit

  Window Calculations Finished.
 Starting Movement 27
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 28
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 29
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 30
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 31
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window C

   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 23
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 24
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 25
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 26
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 27
   Repetition 1 started
   Repetit

   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
Starting subject 26
 Starting Movement 18
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 19
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 20
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 21
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 22
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 

  Window Calculations Finished.
 Starting Movement 36
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 37
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 38
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 39
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window Calculations Finished.
 Starting Movement 40
   Repetition 1 started
   Repetition 2 started
   Repetition 3 started
   Repetition 4 started
   Repetition 5 started
   Repetition 6 started
  Window C

KeyboardInterrupt: 

### Deal with missing values
Subjects 6 and 7 from DB3 have missing data so require some tweaking

In [347]:
# Function to unify length of repetition across subjects that weren't missing sensors or had reduced number of repetitions
def IndMovRep(dataset, movement, repetition):
    # Select only those subjects that have full data records
    subjects = [1,2,4,5,10,11]
    # Create blank dataframe to add the data to ready for returning
    subsBlank = pd.DataFrame(np.zeros((1000,5)))
    minLength = len(subsBlank)
    # Find the shortest repetition duration
    for subject in subjects:
        subLen = len(dataset[dataset['Subject'] == subject])
        if subLen < minLength: minLength = subLen

    for subject in subjects:
        subjectDf = dataset[dataset['Subject'] == subject].reset_index()
        subjectDf = subjectDf.drop(['index'], axis=1)

        subsBlank = pd.DataFrame(subsBlank.iloc[0:minLength].values+subjectDf.iloc[0:minLength].values)
        subsBlank.columns = ['Extensor Digit', 'Flexor Digit', 'Movement', 'Repetition', 'Subject'] 

    subsBlank = subsBlank.drop(['Subject'], axis=1)
    subsBlank[['Extensor Digit', 'Flexor Digit']] = subsBlank[['Extensor Digit', 'Flexor Digit']]/len(subjects)
    subsBlank[['Movement', 'Repetition']] = [movement, repetition]
    return(subsBlank)

In [348]:
def replaceNaN(feature):
    complete_df = pd.read_csv('dataset/DB3_E1_' + feature + '.csv', index_col=0)
    complete_df = complete_df.replace(0, np.NaN)
    noNaN_df = complete_df.drop((jointIndex + ['Biceps', 'Triceps']), axis=1)
    noNaN_df = noNaN_df[~noNaN_df.Subject.isin([6,7])]

    filled_df = pd.DataFrame()

    for movement in range(1,18):
        subs_df = noNaN_df[noNaN_df['Movement'] == movement]
        for rep in range(1,7):
            subsRep_df = subs_df[subs_df['Repetition'] == rep]
            mean_rep = IndMovRep(subsRep_df, movement, rep)
            filled_df = filled_df.append(mean_rep, ignore_index=True)
    
    solved_df = pd.DataFrame()

    for sub in [6,7]:
        missing_df = complete_df[complete_df.Subject.isin([sub])].drop(['Extensor Digit', 'Flexor Digit'], axis=1)
        print(len(missing_df), len(complete_df), len(filled_df))

        for movement in range (1,18):
            miss_mov_df = missing_df[missing_df['Movement'] == movement]
            filled_mov_df = filled_df[filled_df['Movement'] == movement]
            for rep in range(1,7):
                miss_rep_df = miss_mov_df[miss_mov_df['Repetition'] == rep].reset_index()
                filled_rep_df = filled_mov_df[filled_mov_df['Repetition'] == rep].reset_index().drop(['Movement', 'Repetition'], axis=1)


                if len(miss_rep_df) > len(filled_rep_df): mLength = len(filled_rep_df)
                else: len(miss_rep_df)

                miss_rep_df['Extensor Digit'] = filled_rep_df['Extensor Digit']
                miss_rep_df['Flexor Digit'] = filled_rep_df['Flexor Digit']

                solved_df = solved_df.append(miss_rep_df, ignore_index=True)
                
    solved_df = solved_df.dropna().reset_index()
    solved_df.drop(['index', 'level_0'], axis=1)
    
    # Add to DB3
    complete_df.to_csv('dataset/DB3_E1_' + feature + '_NaN.csv')
    no67_df = complete_df[~complete_df.Subject.isin([6,7])]
    fixed_df = no67_df.append(solved_df, ignore_index=True).drop(['index', 'level_0'], axis=1)
    fixed_df.to_csv('dataset/DB3_E1_' + feature + '.csv')

In [349]:
for feature in ['RMS', 'MAV', 'WL']:
    replaceNaN(feature)

65071 572281 31139
63509 572281 31139


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


65071 572281 31139
63509 572281 31139


In [366]:
sdf = pd.read_csv('dataset/DB3_E1_RMS.csv', index_col=0)
sdf2 = pd.read_csv('dataset/DB2_E1_RMS.csv', index_col=0)
sdf = sdf.append(sdf2, ignore_index=True)

In [367]:
sdf.isnull().sum()

Biceps            0
Extensor Digit    0
Flexor Digit      0
Joint 1           0
Joint 2           0
Joint 3           0
Joint 4           0
Joint 5           0
Joint 6           0
Joint 7           0
Joint 8           0
Movement          0
Repetition        0
Subject           0
Triceps           0
dtype: int64

In [359]:
sdf = sdf.dropna()
sdf.isnull().sum()

In [None]:
sdf.to_csv('dataset/DB3_E1_WL.csv')