# Configuration Model

In this notebook, I train a model to classify postures based on skeletal information.

In [6]:
# PATH
DATASET_BASE_DIR = 'trainingdata/'
MERGED_DF = 'trainingdata/posture_features_and_labels.tsv'

In [8]:
from os import listdir
from os.path import isfile, join
import numpy as np
import pandas as pd

In [94]:
#Prepare training data
#Convert txt files to CSV format
files = [f for f in listdir('trainingdata') if 'readme' not in f and isfile(join('trainingdata', f))]
features_and_labels = []
for file in files:
    if('standing' in file):
        with open(DATASET_BASE_DIR + file) as f:
            features = [w.replace('\n', '') for w in f.readlines()]
            for i in range(1,1+int(len(features)/10)):
                del features[i*7:(i*7)+3]
            features_with_label = []
            for i in range(int(len(features)/7)):
                features_with_label.append(features[i*7:(i*7)+7] + [0]) #Label 0
            features_and_labels = features_and_labels + features_with_label
    elif('sitting' in file):
        with open(DATASET_BASE_DIR + file) as f:
            features = [w.replace('\n', '') for w in f.readlines()]
            for i in range(1,1+int(len(features)/10)):
                del features[i*7:(i*7)+3]
            features_with_label = []
            for i in range(int(len(features)/7)):
                features_with_label.append(features[i*7:(i*7)+7] + [1]) #Label 1
            features_and_labels = features_and_labels + features_with_label
    elif('lying' in file):
        with open(DATASET_BASE_DIR + file) as f:
            features = [w.replace('\n', '') for w in f.readlines()]
            for i in range(1,1+int(len(features)/10)):
                del features[i*7:(i*7)+3]
            features_with_label = []
            for i in range(int(len(features)/7)):
                features_with_label.append(features[i*7:(i*7)+7] + [2]) #Label 2
            features_and_labels = features_and_labels + features_with_label

#Create Pandas dataframe now.
df = pd.DataFrame(columns=['Height', 'Angle1','Angle2','Angle3','Angle4','Angle5','Angle6','label'], data=features_and_labels)
print (df)

#Export it as CSV
df.to_csv(MERGED_DF, sep='\t', index=False)

        Height   Angle1   Angle2   Angle3   Angle4   Angle5   Angle6  label
0      1449.21  93.7099  89.9651      180  179.966  163.338  175.972      2
1       1475.8  110.801  104.773      180  179.972  165.175  172.429      2
2      1469.94  110.749  104.663      180      180  166.125  172.454      2
3      1499.01  116.487  108.382      180      180  171.196  175.473      2
4      1504.16  119.547  107.736      180      180  171.875  176.082      2
5      1487.45  112.784   103.15      180  179.972  171.467  175.775      2
6      1477.01    109.1  102.169  179.966  179.966  171.057   175.55      2
7      1462.61  107.283  99.6069  179.972      180  172.425  175.098      2
8      1469.86  110.012  104.006      180  179.966  175.406  176.261      2
9      1467.14  110.796  103.773  179.966  179.966  176.485  175.208      2
10     1452.27  111.067  104.776      180      180  175.591  174.016      2
11     1450.77  110.433  105.472      180  179.966  175.091  174.212      2
12     1453.

In [9]:
#Load CSV Data
df = pd.read_csv(MERGED_DF, sep='\t')
df.head()

Unnamed: 0,Height,Angle1,Angle2,Angle3,Angle4,Angle5,Angle6,label
0,1449.21,93.7099,89.9651,180.0,179.966,163.338,175.972,2
1,1475.8,110.801,104.773,180.0,179.972,165.175,172.429,2
2,1469.94,110.749,104.663,180.0,180.0,166.125,172.454,2
3,1499.01,116.487,108.382,180.0,180.0,171.196,175.473,2
4,1504.16,119.547,107.736,180.0,180.0,171.875,176.082,2


In [10]:
# Import dem libz
from json import load
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import precision_recall_fscore_support as score


## Model building

Ok, so, we shall build a Random forest to classify posture configurations.

In [11]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import LeaveOneOut
from sklearn.externals import joblib
from json import dumps
from sklearn import tree
import pandas as pd0
import numpy as np
import os
import graphviz

# Path to the dataset
MERGED_DF = 'trainingdata/posture_features_and_labels.tsv'

# Create a seed for reproducibility
SEED = 42

Let's define the training/testing pipeling for one configuration (flat). We will then be able to loop through the different configurations to train/test models.

In [12]:
from sklearn.ensemble import RandomForestClassifier
def tune_hyperparameters(X, y, num_folds=3):
    """Builds a tree and performs grid search to find the optimal hyperparameters in terms
    of F1 score.
    """
    
    # Instantiate the RF Model
    rfc = RandomForestClassifier(n_jobs=-1, oob_score = True, criterion='entropy')#max_features= 'sqrt' ,n_estimators=50, oob_score = True) 
    param_grid = {
        'bootstrap': [True],
        'max_depth': [1, 2, 4, 5, 7, 100],#, 110],
        'max_features': [0.33, 1, 3, 4, 5],#, 'auto', 'sqrt', 'log2'],#[2, 3],
        #'min_samples_leaf': [3, 4, 5],
        #'min_samples_split': [8, 10, 12],
        'n_estimators': [60, 100, 300]#,
  #      'criterion' :['gini', 'entropy']
    }
#         'n_estimators': [200, 500],
#     'max_features': ['auto', 'sqrt', 'log2'],
#     'max_depth' : [4,5,6,7,8],
#     'criterion' :['gini', 'entropy']
    CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=3,
                         scoring=['precision_macro', 'recall_macro', 'f1_macro', 'accuracy'], refit='f1_macro')
    CV_rfc.fit(X, y)
    
    #print CV_rfc.best_params_
    return CV_rfc.best_estimator_

In [13]:
def plot_tree(clf, features, output_file):
    data = tree.export_graphviz(clf, out_file=None, 
                         feature_names=features,  
                         class_names=['false', 'true'],  
                         filled=True, rounded=True,  
                         special_characters=True) 
        
    # Create the tree
    graph = graphviz.Source(data)
    
    # Print dat beautiful tree
    graph.render(output_file) 

In [14]:
def save_model(clf, output_file):
    joblib.dump(clf, output_file)

In [19]:
# Define the features
features = ['Height', 'Angle1','Angle2','Angle3','Angle4','Angle5','Angle6']
X = df[features]
y = df['label']

model_performances = []

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print(len(X_train))
print(len(X_test))

# Tune the hyperparameters with a grid search
print('Tuning')
tuned_model = tune_hyperparameters(X_train, y_train)

print('Testing')
y_true, y_pred = y_test, tuned_model.predict(X_test)
accuracy = tuned_model.score(X_test, y_test)
precision,recall,fscore,support=score(y_true,y_pred,average='weighted')
model_performances.append([accuracy,precision,recall,fscore])

model_output = 'posture_model.pkl'
graph_output = 'details/posture_model_tree_structure'

# Save dat model
save_model(tuned_model, model_output)

# Plot
#plot_tree(tuned_model, features, graph_output)

12187
3047
Tuning


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

Testing


In [20]:
performance = pd.DataFrame(model_performances)
performance.columns = ['Accuracy','Precision','Recall','F1score']
performance


Unnamed: 0,Accuracy,Precision,Recall,F1score
0,0.923203,0.924029,0.923203,0.923522


In [21]:
tuned_model

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=100, max_features=3, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=300, n_jobs=-1,
            oob_score=True, random_state=None, verbose=0, warm_start=False)

In [5]:
from sklearn.externals import joblib
loaded_model = joblib.load('posture_model.pkl')
inputVals = [[1449.21,93.7099,89.9651,180,179.966,163.338,175.972,10]]
prediction = loaded_model.predict(inputVals)
print(prediction)

[2]
