In [2]:
# Read in Collected Data and Process

import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('../data/coords.csv')
X = df.drop('class', axis=1) # features
y = df['class'] # target value

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

In [3]:
# Train Machine Learning Classification Model

from sklearn.pipeline import make_pipeline  # build a ML pipeline
from sklearn.preprocessing import StandardScaler    # use to standardize the data

# use these four algorithm and pick the best result one
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

In [4]:
# Evaluate and Serialize Model

from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle

for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

with open('body_model.pkl', 'wb') as f:
    pickle.dump(fit_models['lr'], f)

lr 1.0
rc 1.0
rf 1.0
gb 1.0
