#Adquisición de datos

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('cords_balanced.csv')

In [None]:
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z31,v31,x32,y32,z32,v32,x33,y33,z33,v33
0,down,0.491681,0.146931,-0.629843,0.999992,0.498723,0.122416,-0.628143,0.999986,0.504355,...,0.011614,0.851506,0.54894,0.685142,-0.138369,0.922728,0.441276,0.679099,-0.124184,0.962703
1,down,0.491223,0.112327,-0.658368,0.999987,0.4983,0.086046,-0.655343,0.99998,0.503906,...,-0.014931,0.827303,0.548316,0.694312,-0.17468,0.915961,0.442682,0.689076,-0.157204,0.957617
2,down,0.490951,0.09413,-0.700939,0.999986,0.498258,0.067399,-0.695552,0.999978,0.503845,...,-0.015446,0.820611,0.548311,0.694243,-0.180046,0.913556,0.442863,0.68999,-0.165039,0.956413
3,down,0.490965,0.092406,-0.70539,0.999986,0.498464,0.065602,-0.700312,0.999978,0.503954,...,-0.014119,0.81546,0.548465,0.694522,-0.177772,0.912143,0.442891,0.690374,-0.164462,0.955755
4,up,0.502778,-0.088485,-0.580676,0.999289,0.511298,-0.102615,-0.564815,0.997112,0.516612,...,0.433625,0.898592,0.556493,0.998807,0.295826,0.983766,0.455504,0.999895,0.262883,0.986771


In [None]:
df[df['class']=='up']
X = df.drop('class', axis=1)
Y = df['class']

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(417, 132)
(417,)
(105, 132)
(105,)


#Entrenamiento con sklearn

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [None]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [None]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, Y_train)
    fit_models[algo] = model

In [None]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [None]:
fit_models['lr'].predict(X_test)

array(['up', 'up', 'up', 'down', 'down', 'up', 'down', 'down', 'down',
       'up', 'up', 'down', 'down', 'up', 'up', 'up', 'down', 'up', 'down',
       'down', 'up', 'up', 'up', 'up', 'up', 'up', 'up', 'down', 'up',
       'down', 'down', 'up', 'down', 'up', 'up', 'down', 'up', 'down',
       'up', 'down', 'down', 'down', 'down', 'up', 'down', 'up', 'down',
       'up', 'up', 'down', 'down', 'down', 'down', 'up', 'down', 'down',
       'down', 'down', 'up', 'up', 'up', 'down', 'up', 'up', 'down', 'up',
       'up', 'down', 'down', 'up', 'down', 'down', 'up', 'up', 'down',
       'down', 'down', 'up', 'down', 'up', 'up', 'down', 'up', 'up', 'up',
       'down', 'up', 'down', 'up', 'up', 'up', 'up', 'up', 'up', 'down',
       'up', 'down', 'down', 'up', 'up', 'up', 'down', 'up', 'down',
       'down'], dtype=object)

In [None]:
Y_test

Unnamed: 0,class
434,up
440,up
6,up
184,down
78,down
...,...
342,up
412,down
301,up
428,down


#Evaluación y exportado a pickle

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
import pickle

In [None]:
#Métricas para distintos modelos
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(Y_test.values, yhat),
          precision_score(Y_test.values, yhat, average="binary", pos_label="up"),
          recall_score(Y_test.values, yhat, average="binary", pos_label="up"))

lr 1.0 1.0 1.0
rc 1.0 1.0 1.0
rf 1.0 1.0 1.0
gb 1.0 1.0 1.0


In [None]:
yhat = fit_models['rf'].predict(X_test)
print(accuracy_score(Y_test, yhat))

1.0


In [None]:
yhat[:10]

array(['up', 'up', 'up', 'down', 'down', 'up', 'down', 'down', 'down',
       'up'], dtype=object)

In [None]:
Y_test

Unnamed: 0,class
434,up
440,up
6,up
184,down
78,down
...,...
342,up
412,down
301,up
428,down


In [None]:
with open('count.pkl', 'wb') as f:
  pickle.dump(fit_models['rf'], f)