# Terrain Classification - Combined User Data
### Created by Keenan McConkey 2019.08.01

In [1]:
from __future__ import absolute_import, division, print_function

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import pymrmr
import sklearn

## Part 1 - Importing Preprocessed Data

### Part (a) - Functions for Data Import

In [2]:
# All the terrains, placements, vectors, power-assistance, users in the study
terrains = ['Concrete', 'Carpet', 'Linoleum', 'Asphalt', 'Sidewalk', 'Grass', 'Gravel']
powers = ['Manual'] # TODO: Fix power PSD data and add back in
placements_manual = ['Middle', 'Left', 'Right', 'Synthesis']
placements_power = ['Middle']
vectors = ['TimeFeats', 'FreqFeats', 'FFTs', 'PSDLogs']
users = ['All']
axes = ['X Accel', 'Y Accel', 'Z Accel', 'X Gyro', 'Y Gyro', 'Z Gyro']

In [3]:
'''Get the integer terrain value of a given label'''
def get_terrain_num(_label):
    for i, terrain in enumerate(terrains):
        if terrain in _label:
            return i
        
    raise Exception('Unknown terrain')

'''Get the name associated with a terrain integer'''
def get_terrain_name(terrain_num):
    return terrains[terrain_num]
    return terrains[terrain_num]

'''Get the placement location name for given label'''
def get_placement(_label):
    for placement in placements:
        if placement in _label:
            return placement
    
    raise Exception('Unknown placement')

'''Get the transform used for given label'''
def get_transform(_label):
    for transform in transforms:
        if transform in _label:
            return transform
    
    raise Exception('Unknown transform')

### Part (b) - Import Processed Data from Each User

In [4]:
'''Combine data from labelled datasets into a single dataframe'''
def combine_datasets(datasets):
    return pd.concat(list(datasets.values()), ignore_index=True, sort=False)

In [5]:
path = 'processed_data/new_setup/new_normalization/' 

# Nested dictionary of processed data:
# - Power assistance type
# -- Placement
# --- Feature Vector
# ---- User
power_dict = {}

# Create each nesting of the dictionary
for power in powers:
    placement_dict = {}
    
    # Power datasets only have middle placement (for now)
    if power == 'Power':
        placements = placements_power.copy()
    else:
        placements = placements_manual.copy()
    
    for placement in placements:
        vector_dict = {}

        for vector in vectors:
            user_dict = {}

            for user in users:
                # File name based on above parameters
                filename = power.lower() + '/' + placement + '_' + vector + '_' + user 
                if power == 'Power':
                    filename += '_Power'
                filename += '.csv'
                
                # Read data and update current user dictionary
                data = pd.read_csv(path + filename)
                user_dict.update({user: data})

            # Combine users to form a new entry of user dictionary, save to .csv
            # NaNs arise when you combine Synthesis feature vectors
            #combined_data = combine_datasets(user_dict).dropna(axis='columns')
            #user_dict.update({'All': combined_data})

            vector_dict.update({vector: user_dict})
        
        # Create a dictionary of the combined feature vectors for each user
        combined_vector_user_dict = {}
        combined_extracted_vector_user_dict = {}
        
        for user in user_dict.keys():
            # Get extraced vector/all vectors for current user and pop label column
            user_extracted_vectors = []
            user_all_vectors = []
            
            # Extracted vectors only
            for extracted_vector in ['TimeFeats', 'FreqFeats']:
                user_vector = vector_dict[extracted_vector][user]
                # Get append everything not label column since this is the same for all vectors
                labels = user_vector['Label']
                user_extracted_vectors.append(user_vector.loc[:, user_vector.columns != 'Label'])
            
            # Add the extracted vectors to the fully combined vector
            user_all_vectors.extend(user_extracted_vectors)
            
            # Add in the transform vectors
            for transform_vector in ['FFTs', 'PSDLogs']:
                user_vector = vector_dict[transform_vector][user]
                # Get append everything not label column since this is the same for all vectors
                labels = user_vector['Label']
                user_all_vectors.append(user_vector.loc[:, user_vector.columns != 'Label'])
            
            # Combine extracted vectors and add back label column
            combined_extracted_vector = pd.concat(user_extracted_vectors, axis='columns')
            combined_extracted_vector.insert(loc=0, column='Label', value=labels)
            combined_extracted_vector_user_dict.update({user: combined_extracted_vector})
            
            # Combine vectors and add back label column
            combined_vector = pd.concat(user_all_vectors, axis='columns')
            combined_vector.insert(loc=0, column='Label', value=labels)
            combined_vector_user_dict.update({user: combined_vector})
        
        # Add the combined feature vector to the vector dictionary
        vector_dict.update({'AllFeats': combined_extracted_vector_user_dict})
        vector_dict.update({'Combined': combined_vector_user_dict})
        
        placement_dict.update({placement: vector_dict})
    
    power_dict.update({power: placement_dict})

In [6]:
# Update vectors to reflect new combined vectors
vectors.extend(['AllFeats', 'Combined'])

In [7]:
# Check some data
power_dict['Manual']['Middle']['AllFeats']['All'].tail()

Unnamed: 0,Label,Mean X Accel Middle,Std X Accel Middle,Norm X Accel Middle,AC X Accel Middle,Max X Accel Middle,Min X Accel Middle,RMS X Accel Middle,ZCR X Accel Middle,Skew X Accel Middle,...,MSF Y Gyro Middle,RMSF Y Gyro Middle,FC Y Gyro Middle,VF Y Gyro Middle,RVF Y Gyro Middle,MSF Z Gyro Middle,RMSF Z Gyro Middle,FC Z Gyro Middle,VF Z Gyro Middle,RVF Z Gyro Middle
16065,4,-2.99349,0.058487,0.409874,0.094989,-0.334863,-0.373391,0.409337,0.086221,0.14274,...,0.201499,0.868215,1.188687,-1.518781,0.868215,0.617916,0.930225,-0.285041,0.168249,0.930225
16066,4,-5.150751,-0.42082,0.824175,0.433643,-0.860113,-0.373391,0.823611,-1.417281,-0.250344,...,-0.071406,0.332945,1.279011,-1.683046,0.332945,6.661553,4.234812,-0.09153,0.137974,4.234812
16067,4,-2.704405,-0.045126,0.249784,-0.019371,-0.793978,-0.047606,0.249256,-0.414946,-0.222345,...,-0.276171,-0.438779,0.745796,-0.775163,-0.438779,-0.055719,0.219836,-0.403381,0.182817,0.219836
16068,4,2.020397,-0.459843,-0.191738,-0.287109,-0.497873,0.697808,-0.192238,-0.359261,-0.638358,...,-0.267376,-0.372804,0.803745,-0.866864,-0.372804,1.136647,1.362874,-0.324626,0.173604,1.362874
16069,4,2.414125,0.033483,0.31222,0.024136,-0.173717,0.378144,0.311688,-0.971799,-0.985338,...,-0.276833,-0.444297,0.375796,-0.229739,-0.444297,1.937146,1.927521,-0.04182,0.127822,1.927521


In [8]:
# Check some data
power_dict['Manual']['Synthesis']['Combined']['All'].tail()

Unnamed: 0,Label,Mean Calc X Vel Synthesis,Std Calc X Vel Synthesis,Norm Calc X Vel Synthesis,AC Calc X Vel Synthesis,Max Calc X Vel Synthesis,Min Calc X Vel Synthesis,RMS Calc X Vel Synthesis,ZCR Calc X Vel Synthesis,Skew Calc X Vel Synthesis,...,PSDLog 54 Hz Right Z Gyro Synthesis,PSDLog 55 Hz Right Z Gyro Synthesis,PSDLog 56 Hz Right Z Gyro Synthesis,PSDLog 57 Hz Right Z Gyro Synthesis,PSDLog 58 Hz Right Z Gyro Synthesis,PSDLog 59 Hz Right Z Gyro Synthesis,PSDLog 60 Hz Right Z Gyro Synthesis,PSDLog 61 Hz Right Z Gyro Synthesis,PSDLog 62 Hz Right Z Gyro Synthesis,PSDLog 63 Hz Right Z Gyro Synthesis
17889,4,-0.793959,1.06555,-0.775904,-0.72222,-0.172572,-0.817171,-0.777825,-0.153511,1.123875,...,1.466184,1.508826,1.498868,1.499795,1.48731,1.45792,1.448934,1.489991,1.482408,1.457884
17890,4,0.033568,1.112526,0.049535,-0.137233,0.166666,-0.425848,0.048307,-0.153511,-0.798948,...,1.578223,1.563516,1.572959,1.573727,1.574875,1.576434,1.569256,1.565444,1.565786,1.560195
17891,4,-0.105755,2.305017,-0.03171,-0.205648,0.166666,-1.095395,-0.033006,-0.153511,-1.220858,...,-0.002977,0.003201,-0.023648,0.028786,0.044417,0.046217,0.042515,0.054399,0.038207,0.077722
17892,4,-1.251162,2.456792,-1.083621,-0.877803,-0.182611,-1.430997,-1.082005,-0.153511,1.513885,...,-0.661928,-0.652665,-0.628604,-0.644654,-0.625274,-0.576349,-0.573515,-0.598476,-0.600726,-0.544136
17893,4,-1.848859,-1.202419,-2.05455,-1.146205,-2.072623,-1.430997,-1.937256,-0.153511,2.309932,...,-1.161979,-1.159322,-1.131727,-1.110872,-1.096042,-1.076248,-1.07304,-1.049085,-1.042129,-1.024189


## Part 2 -  mRMR (minimum Redunancy Maximum Relevance)

mRMR tries to find which features have the highest information shared with classified state and lowest information shared with other features.

### Part (a) - Middle Frame Placement

#### Part (i) - Manual Wheelchair

In [9]:
pymrmr.mRMR(data=power_dict['Manual']['Middle']['TimeFeats']['All'], method='MID', nfeats=5)

['ZCR Y Gyro Middle',
 'ZCR Y Accel Middle',
 'ZCR Z Accel Middle',
 'ZCR X Accel Middle',
 'Std Y Accel Middle']

In [10]:
pymrmr.mRMR(data=power_dict['Manual']['Middle']['FreqFeats']['All'], method='MID', nfeats=5)

['FC Y Gyro Middle',
 'FC Y Accel Middle',
 'VF Z Accel Middle',
 'FC X Gyro Middle',
 'VF Y Gyro Middle']

In [11]:
pymrmr.mRMR(data=power_dict['Manual']['Middle']['AllFeats']['All'], method='MID', nfeats=5)

['FC Y Gyro Middle',
 'ZCR Y Accel Middle',
 'ZCR Z Accel Middle',
 'VF X Gyro Middle',
 'VF Y Gyro Middle']

In [12]:
pymrmr.mRMR(data=power_dict['Manual']['Middle']['FFTs']['All'], method='MID', nfeats=5)

['FFT 14 Hz Y Accel Middle',
 'FFT 1 Hz Y Gyro Middle',
 'FFT 2 Hz X Gyro Middle',
 'FFT 17 Hz Z Accel Middle',
 'FFT 21 Hz Y Accel Middle']

In [13]:
pymrmr.mRMR(data=power_dict['Manual']['Middle']['PSDLogs']['All'], method='MID', nfeats=5)

['PSDLog 33 Hz Y Gyro Middle',
 'PSDLog 0 Hz Y Gyro Middle',
 'PSDLog 56 Hz Y Accel Middle',
 'PSDLog 1 Hz X Gyro Middle',
 'PSDLog 59 Hz Y Accel Middle']

In [14]:
pymrmr.mRMR(data=power_dict['Manual']['Middle']['Combined']['All'], method='MID', nfeats=5)

['FC Y Gyro Middle',
 'PSDLog 12 Hz Z Accel Middle',
 'VF X Gyro Middle',
 'ZCR Z Accel Middle',
 'ZCR Y Accel Middle']

#### Part (i) - Power Assist Wheelchair

In [15]:
#pymrmr.mRMR(data=power_dict['Power']['Middle']['Features']['All'], method='MID', nfeats=5)

In [16]:
#pymrmr.mRMR(data=power_dict['Power']['Middle']['FFTs']['All'], method='MID', nfeats=5)

In [17]:
#pymrmr.mRMR(data=power_dict['Power']['Middle']['PSDLogs']['All'], method='MID', nfeats=5)

In [18]:
#pymrmr.mRMR(data=power_dict['Power']['Middle']['Combined']['All'], method='MID', nfeats=5)

### Part (b) - Left Wheel Placement

In [19]:
pymrmr.mRMR(data=power_dict['Manual']['Left']['TimeFeats']['All'], method='MID', nfeats=5)

['Std Z Accel Left',
 'ZCR Z Accel Left',
 'Min Z Gyro Left',
 'ZCR Y Accel Left',
 'ZCR Y Gyro Left']

In [20]:
pymrmr.mRMR(data=power_dict['Manual']['Left']['FreqFeats']['All'], method='MID', nfeats=5)

['FC Z Accel Left',
 'RVF Y Accel Left',
 'FC X Accel Left',
 'FC Y Accel Left',
 'RVF Z Accel Left']

In [21]:
pymrmr.mRMR(data=power_dict['Manual']['Left']['AllFeats']['All'], method='MID', nfeats=5)

['Std Z Accel Left',
 'FC Z Accel Left',
 'ZCR Y Gyro Left',
 'ZCR Z Accel Left',
 'Min Z Gyro Left']

In [22]:
pymrmr.mRMR(data=power_dict['Manual']['Left']['FFTs']['All'], method='MID', nfeats=5)

['FFT 20 Hz Z Accel Left',
 'FFT 43 Hz Z Accel Left',
 'FFT 14 Hz Z Accel Left',
 'FFT 63 Hz Z Accel Left',
 'FFT 38 Hz Z Accel Left']

In [23]:
pymrmr.mRMR(data=power_dict['Manual']['Left']['PSDLogs']['All'], method='MID', nfeats=5)

['PSDLog 27 Hz Z Accel Left',
 'PSDLog 62 Hz Z Accel Left',
 'PSDLog 17 Hz Z Accel Left',
 'PSDLog 40 Hz Z Accel Left',
 'PSDLog 48 Hz X Accel Left']

In [24]:
pymrmr.mRMR(data=power_dict['Manual']['Left']['Combined']['All'], method='MID', nfeats=5)

['PSDLog 27 Hz Z Accel Left',
 'FC Z Accel Left',
 'PSDLog 17 Hz Z Accel Left',
 'PSDLog 63 Hz Z Accel Left',
 'PSDLog 48 Hz X Accel Left']

### Part (c) - Right Wheel Placement

In [25]:
pymrmr.mRMR(data=power_dict['Manual']['Right']['TimeFeats']['All'], method='MID', nfeats=5)

['ZCR Z Accel Right',
 'RMS Z Accel Right',
 'Max Z Gyro Right',
 'ZCR Y Gyro Right',
 'Max Z Accel Right']

In [26]:
pymrmr.mRMR(data=power_dict['Manual']['Right']['FreqFeats']['All'], method='MID', nfeats=5)

['VF Z Accel Right',
 'RVF X Accel Right',
 'FC Y Accel Right',
 'RVF Z Accel Right',
 'FC X Accel Right']

In [27]:
pymrmr.mRMR(data=power_dict['Manual']['Right']['AllFeats']['All'], method='MID', nfeats=5)

['ZCR Z Accel Right',
 'RMS Z Accel Right',
 'VF Z Accel Right',
 'Max Z Gyro Right',
 'FC X Accel Right']

In [28]:
pymrmr.mRMR(data=power_dict['Manual']['Right']['FFTs']['All'], method='MID', nfeats=5)

['FFT 24 Hz Z Accel Right',
 'FFT 63 Hz Z Accel Right',
 'FFT 18 Hz Z Accel Right',
 'FFT 14 Hz Z Accel Right',
 'FFT 53 Hz Z Accel Right']

In [29]:
pymrmr.mRMR(data=power_dict['Manual']['Right']['PSDLogs']['All'], method='MID', nfeats=5)

['PSDLog 62 Hz Z Accel Right',
 'PSDLog 15 Hz Z Accel Right',
 'PSDLog 55 Hz Z Accel Right',
 'PSDLog 25 Hz Z Accel Right',
 'PSDLog 63 Hz Z Accel Right']

In [30]:
pymrmr.mRMR(data=power_dict['Manual']['Right']['Combined']['All'], method='MID', nfeats=5)

['ZCR Z Accel Right',
 'PSDLog 16 Hz Z Accel Right',
 'VF Z Accel Right',
 'PSDLog 26 Hz Z Accel Right',
 'PSDLog 62 Hz Z Accel Right']

### Part (d) - Synthesis "Placement"

In [31]:
pymrmr.mRMR(data=power_dict['Manual']['Synthesis']['TimeFeats']['All'], method='MID', nfeats=5)

['ZCR Right Z Accel Synthesis',
 'Min Left XY Accel Synthesis',
 'Min Calc X Vel Synthesis',
 'Min Right XY Accel Synthesis',
 'RMS Right Z Accel Synthesis']

In [32]:
pymrmr.mRMR(data=power_dict['Manual']['Synthesis']['FreqFeats']['All'], method='MID', nfeats=5)

['VF Left XY Accel Synthesis',
 'RMSF Left XY Accel Synthesis',
 'VF Right Z Accel Synthesis',
 'FC Right XY Accel Synthesis',
 'FC Left Z Accel Synthesis']

In [33]:
pymrmr.mRMR(data=power_dict['Manual']['Synthesis']['AllFeats']['All'], method='MID', nfeats=5)

['VF Left XY Accel Synthesis',
 'Min Left XY Accel Synthesis',
 'ZCR Right Z Accel Synthesis',
 'FC Right XY Accel Synthesis',
 'VF Right Z Accel Synthesis']

In [34]:
pymrmr.mRMR(data=power_dict['Manual']['Synthesis']['FFTs']['All'], method='MID', nfeats=5)

['FFT 25 Hz Left XY Accel Synthesis',
 'FFT 0 Hz Calc X Vel Synthesis',
 'FFT 17 Hz Right Z Accel Synthesis',
 'FFT 28 Hz Right XY Accel Synthesis',
 'FFT 58 Hz Left XY Accel Synthesis']

In [35]:
pymrmr.mRMR(data=power_dict['Manual']['Synthesis']['PSDLogs']['All'], method='MID', nfeats=5)

['PSDLog 35 Hz Left XY Accel Synthesis',
 'PSDLog 16 Hz Right Z Accel Synthesis',
 'PSDLog 49 Hz Right XY Accel Synthesis',
 'PSDLog 50 Hz Left XY Accel Synthesis',
 'PSDLog 18 Hz Left XY Accel Synthesis']

In [36]:
pymrmr.mRMR(data=power_dict['Manual']['Synthesis']['Combined']['All'], method='MID', nfeats=5)

['VF Left XY Accel Synthesis',
 'Min Left XY Accel Synthesis',
 'ZCR Right Z Accel Synthesis',
 'FC Right XY Accel Synthesis',
 'VF Right Z Accel Synthesis']

### Part (e) - Nested Dictionary of Top Features

In [None]:
# Create dictionary of top mRMR features to speed up calculations, up to to 50
power_dict_features = {}

for power in powers:
    placement_dict_features = {}
    
    for placement in placements:
        vector_dict_features = {}
        
        for vector in vectors:
            user_dict_features = {}

            # Only calculate for all users for now
            for user in ['All']:
                top_features = pymrmr.mRMR(data=power_dict[power][placement][vector][user],
                                           method='MID', nfeats=50)
                user_dict_features.update({user: top_features})

            vector_dict_features.update({vector: user_dict_features})
            
        placement_dict_features.update({placement: vector_dict_features})
    
    power_dict_features.update({power: placement_dict_features})

In [None]:
# Pickle the dictionary
import pickle

mRMR_dict_filename = '/home/caris/Wheelchair-Terrain-Classification/dicts/mRMR_Top50_Dictionary.pkl'
outfile = open(mRMR_dict_filename, 'wb')
pickle.dump(power_dict_features, outfile)
outfile.close()

In [None]:
# Unpickle the dictionary
#infile = open(mRMR_dict_filename, 'rb')
#power_dict_features = pickle.load(infile)
#infile.close()

## Part 3 - PCA (Principal Component Analysis)

In [None]:
from sklearn.decomposition import PCA

def get_pca_df(combined_data, n_components=2):
    # Setup PCA parameters
    pca = PCA(n_components=n_components)
    
    # Copy data to avoid modification
    data = combined_data.copy()
    
    # Extract terrain labels
    labels = data.pop('Label')
    
    # Get specified number of principal components and convert to dataframe
    pc = pca.fit_transform(data)
    pc_df = pd.DataFrame(data=pc, columns=['PC {}'.format(i + 1) for i in range(n_components)])
    
    #print('Explained Variance of Each PC: {}'.format(pca.explained_variance_ratio_))
    #print('Total Explained Variance: {}'.format(np.sum(pca.explained_variance_ratio_)))
    
    return pd.concat([labels, pc_df], axis='columns')

In [None]:
get_pca_df(power_dict['Manual']['Synthesis']['Combined']['All']).tail()

In [None]:
get_pca_df(power_dict['Manual']['Synthesis']['Combined']['All'], n_components=5).tail()

In [None]:
def visualize_2d_pca(pca_2d_df, figsize=(8, 8)):
    # Plot parameters
    plt.clf()
    plt.figure(figsize=figsize)
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    
    # Scatter plot of each terrain
    for terrain in terrains:
        terrain_indices = pca_2d_df['Label'] == get_terrain_num(terrain)
        plt.scatter(pca_2d_df.loc[terrain_indices, 'PC 1'], pca_2d_df.loc[terrain_indices, 'PC 2'])
    
    plt.legend(terrains)
    plt.show()

In [None]:
visualize_2d_pca(get_pca_df(power_dict['Manual']['Right']['FreqFeats']['All']))

## Part 4 - Comparing Classifier Accuracy

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Dictionary of classifiers
classifiers = {'Naive Bayes': GaussianNB(),
               'k Nearest': KNeighborsClassifier(),
               'Decision Tree': DecisionTreeClassifier(), 
               'Random Forest': RandomForestClassifier(n_estimators=100),
               'AdaBoost': AdaBoostClassifier(),
               'Support Vector Machine': SVC(gamma='scale')}

In [None]:
from sklearn.model_selection import KFold

'''Run train test k-fold times
   Returns an tuple of arrays, where arrays elements are actual/predicted labels 
   for each k-fold test'''
def train_test_k_fold(combined_data, n_splits, model):
    # Shuffle ensures we get a mix of terrains
    kf = KFold(n_splits=n_splits, shuffle=True)

    # Copy data to avoid modification - Testing removing this
    data = combined_data
    
    # Extract terrain labels
    labels = data.pop('Label')

    # Array of predicted labels for each k fold
    test_k_fold = []
    predict_k_fold = []

    # Split into n splits
    for train_index, test_index in kf.split(data):
        train, test = data.loc[train_index], data.loc[test_index]
        train_labels, test_labels = labels.loc[train_index], labels.loc[test_index]
        
        # Actual labels
        test_k_fold.append(test_labels)
        
        # Train and test model
        model.fit(train, train_labels)
        predict_k_fold.append(model.predict(test))
    
    # Put back terrain labels
    data.insert(0, 'Label', labels)
    
    return (test_k_fold, predict_k_fold)

In [None]:
'''Train a model on all the given test data and return it'''
def train_model_on_all(combined_data, model):
    # Copy data to avoid modification - Testing removing this
    data = combined_data
    labels = data.pop('Label')
    
    # Train the model
    model.fit(data, labels)
    
    # Return the labels to original position
    data.insert(0, 'Label', labels)
    
    # Return trained model
    return model

In [None]:
'''Create a table of accuracies and associated trained models for each feature vector and classifier'''
def create_accuracy_table(n_splits, power_type='Manual', user_name='All', separate_axes=False,
                          feat_selection='None', n_feats=None, verbose=False):
    
    # Power type affects which placements are available
    if power_type == 'Manual':
        placements = placements_manual
    else:
        placements = placements_power
    
    # Dataframe table of accuracies for each classifier for each placement
    if separate_axes:
        vector_indices = [p + ' ' + v + ' ' + a for p in placements for v in vectors for a in axes]
    else:
        vector_indices = [p + ' ' + v for p in placements for v in vectors]
    
    # Vector column contains each vector for each placement and axes
    accuracy_table = pd.DataFrame({'Vector': vector_indices})
    
    # Nested dictionary of trained models:
    ## Vector type
    ### Classifier name
    model_dict = {vector_index: {} for vector_index in vector_indices}

    # Calculate accuracy for each placement for each feature vector and classifier
    for classifier_name, classifier in classifiers.items():
        model = classifier

        # Row dictionary for given model
        accuracy_rows = {}
        
        # Add current classifier to row dictionary
        for placement in placements:
            for vector in vectors:
                # Extract data for above parameters
                data = power_dict[power_type][placement][vector][user_name].copy()
                
                # Iterate through axes if we want them separate, else just put in empty string
                if separate_axes:
                    itr_axes = axes
                else:
                    itr_axes = ['All']
                
                for axis in itr_axes:
                    index_name = placement + ' ' + vector
                    data_temp = data
                    
                    # If separate axes, get data with only columns matching current axes
                    if separate_axes:
                        index_name += ' ' + axis
                        matching_columns = [column for column in data.columns if axis in column]
                        matching_columns.insert(0, 'Label')
                        data_temp = data_temp[matching_columns]
                    
                    # Use only the top features using mRMR feature selection
                    if feat_selection == 'mRMR':
                        top_feats = power_dict_features[power_type][placement][vector][user_name].copy()
                        top_feats.insert(0, 'Label')
                        data_temp = data_temp[top_feats[:n_feats+1]]

                    # Run PCA on the data
                    elif feat_selection == 'PCA':
                        data_temp = get_pca_df(data_temp, n_components=n_feats)

                    # Extract predicted and actual labels for requested user
                    actual, predict = train_test_k_fold(data_temp, n_splits, model)

                    # Take mean accuracy of k fold testing
                    accuracies = []
                    for i in range(len(predict)):
                        accuracies.append(accuracy_score(actual[i], predict[i]))
                    
                    # Update row dictionary
                    accuracy_rows.update({index_name: np.mean(accuracies)})
                    
                    # Train model on entire feature vector data and return it for pickling
                    trained_model = train_model_on_all(data_temp, model)
                    model_dict[index_name].update({classifier_name: trained_model})
                    
                    if verbose:
                        print('Finished training {} on {} {}'.format(classifier_name, placement, vector))
                    
        # Update accuracy table with classifier column by mapping row names to indices
        accuracy_table[classifier_name] = accuracy_table['Vector'].map(accuracy_rows)
    
    return accuracy_table, model_dict

In [None]:
import pickle
models_path = '/home/caris/Wheelchair-Terrain-Classification/models/'

'''Pickle all the elements of a model dict to individual files'''
def pickle_all_model_dict(model_dict, file_tag=None):
    for feature_type, classifiers in model_dict.items():
        for classifier_name, trained_model in classifiers.items():
            # Get file name from parameters
            model_filename = models_path + classifier_name.replace(' ', '')
            model_filename += '_' + feature_type.replace(' ', '_')
            
            # Add file tag if given
            if file_tag is not None:
                model_filename += '_' + file_tag
            model_filename += '.pkl'
            
            # Pickle the file
            outfile = open(model_filename, 'wb')
            pickle.dump(trained_model, outfile)
            outfile.close()

### Part (a) - Combined Axes Accuracy Table
#### Part (i) - No Feature Selection

In [None]:
# Create accuracy table for 5 k-fold splits without any feature selection
accuracy_table, model_dict = create_accuracy_table(n_splits=5, power_type='Manual', user_name='All')

In [None]:
accuracy_table

In [None]:
accuracy_table.to_csv('tables/Accuracy_Table.csv', index=False)

In [None]:
# Pickle all the models
pickle_all_model_dict(model_dict)

#### Part (ii) - mRMR Feature Selection

In [None]:
# Create accuracy table for 5 k-fold splits with mRMR feature selection
accuracy_table_mRMR, model_dict_mRMR = create_accuracy_table(n_splits=5, 
                                                             power_type='Manual', user_name='All',
                                                             feat_selection='mRMR', n_feats=20, 
                                                             verbose=True)

In [None]:
accuracy_table_mRMR

In [None]:
accuracy_table_mRMR.to_csv('tables/Accuracy_Table_mRMR.csv', index=False)

#### Part (iii) - PCA Feature Selection

In [None]:
# Create accuracy table for 5 k-fold splits with mRMR feature selection
accuracy_table_PCA, model_dict_PCA = create_accuracy_table(n_splits=5, 
                                                           power_type='Manual', user_name='All',
                                                           feat_selection='PCA', n_feats=20)

In [None]:
accuracy_table_PCA

In [None]:
accuracy_table_PCA.to_csv('tables/Accuracy_Table_PCA.csv', index=False)

In [None]:
# Create accuracy table for 5 k-fold splits with mRMR feature selection
accuracy_table_PCA, model_dict_PCA = create_accuracy_table(n_splits=5, 
                                                           power_type='Manual', user_name='All',
                                                           feat_selection='PCA', n_feats=20)

In [None]:
# Pickle all the models
pickle_all_model_dict(model_dict_PCA, file_tag='PCA')

In [None]:
# Try loading pickled model
infile = open(models_path + 'RandomForest_Middle_TimeFeats_PCA.pkl', 'rb')
pickled_model = pickle.load(infile)
infile.close()

pickled_model.get_params()

### Part (b) - Separate Axes Accuracy Table

#### Part (i) - mRMR Feature Selection

In [None]:
# Create accuracy table for 5 k-fold splits with mRMR feature selection
accuracy_table_sep_PCA, model_dict_sep_PCA = create_accuracy_table(n_splits=5, 
                                                                   power_type='Manual', user_name='All',
                                                                   separate_axes=True, 
                                                                   feat_selection='PCA', n_feats=20)

In [None]:
accuracy_table_sep_PCA

In [None]:
accuracy_table_sep_PCA.to_csv('tables/Separated_Accuracy_Table_PCA.csv', index=False)

### Glossary

`Dataset` - Batch of data recorded on one terrain type

`Data Window` - Split up portion of a `Dataset`

`Direction / Axes` - Linear acceleration or gyroscope in $x,y$ or $z$

`Feature Vector` - Any feature of the data that can be used to classify terrain, e.g. Z Accel Mean, Y Accel FFT, etc

`Extracted Feature Vector` - Features that aren't from transforms, e.g. Z Accel Min, Y Accel Autocorrelation, etc

`Placement` - One of three IMU placements on the wheelchair, i.e. Middle, Left, or Right