# Train Custom Model Using Scikit Learn

## 1 Read in Collected Data and Process

In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [42]:
df = pd.read_csv(r"C:\Users\hagar\OneDrive - mail.tau.ac.il\Desktop\Stage\LPC_2022\Hand decoder\Position\data\coords_position_automatic.csv")

In [43]:
df['d_x'] = df['x1'] - df['x17']
df['d_y'] = df['y1'] - df['y17']
df['d_z'] = df['z1'] - df['z17']

df_distance = df.filter(['class','d_x','d_y','d_z'], axis=1)
df_distance

Unnamed: 0,class,d_x,d_y,d_z
0,0,0.076945,-0.254911,0.483969
1,0,0.077011,-0.254582,0.466995
2,0,0.078072,-0.246687,0.442041
3,0,0.078010,-0.245539,0.410314
4,0,0.078374,-0.244910,0.383155
...,...,...,...,...
8496,4,0.074933,-0.306161,0.331149
8497,4,0.075176,-0.307166,0.310239
8498,4,0.075323,-0.307534,0.320078
8499,4,0.075438,-0.308516,0.322512


In [3]:
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z31,v31,x32,y32,z32,v32,x33,y33,z33,v33
0,0,0.479635,0.520285,-0.443096,0.999986,0.493242,0.491135,-0.414232,0.999964,0.500506,...,0.47098,1e-05,0.53014,1.980473,-0.008393,2.5e-05,0.447276,1.983681,0.163186,4e-05
1,0,0.480094,0.520171,-0.484116,0.999972,0.493659,0.490495,-0.448987,0.999933,0.501458,...,0.136555,1.8e-05,0.530131,1.996417,-0.045417,4.4e-05,0.445889,2.003625,-0.17214,5.8e-05
2,0,0.481186,0.520148,-0.506814,0.99996,0.494331,0.490121,-0.469188,0.999909,0.50228,...,0.130876,3.1e-05,0.532269,1.995225,-0.042667,8.6e-05,0.446423,2.00173,-0.161329,0.000106
3,0,0.48132,0.520231,-0.522004,0.99995,0.494372,0.490017,-0.487148,0.999888,0.502411,...,0.130204,3.9e-05,0.532246,1.995441,-0.046167,0.000111,0.444958,2.001297,-0.163311,0.000132
4,0,0.481643,0.520238,-0.549485,0.999938,0.494514,0.489836,-0.514656,0.999862,0.502614,...,0.110434,6.3e-05,0.530908,1.990157,-0.046453,0.000184,0.439531,1.995314,-0.174502,0.000232


In [44]:
# separate features fro target
X = df_distance.drop('class', axis=1) # features
y = df_distance['class'] # target value

In [45]:
# split train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## 2 Train Machine Learning Classification Model

In [46]:
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [47]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [48]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

In [49]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [50]:
fit_models['rc'].predict(X_test)

array([3, 4, 4, ..., 4, 3, 3], dtype=int64)

## 3 Evaluate and Serialize Model 

In [51]:
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle 

In [52]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

lr 0.9905919247353979
rc 0.7424539396315171
rf 0.9992159937279498
gb 0.9980399843198745


In [53]:
#put a specific model (rc/lr/rf/gb) in a pickle file for future use 
with open(r'C:\Users\hagar\OneDrive - mail.tau.ac.il\Desktop\Stage\LPC_2022\Hand decoder\Position\code\distance_position.pkl', 'wb') as f:
    pickle.dump(fit_models['rf'], f) # for some reason this is not writing the model to the file