# Modelos

En este notebook ejecutamos todos los modelos entrenados para el trabajo y comparamos sus resultados.

In [2]:
from sklearn.neighbors import KNeighborsClassifier  
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from scipy import stats

from sklearn import metrics
from os import path
from sklearn.svm import SVC
import _pickle as pickle
import gc
import numpy as np
from time import time
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

from auxiliar_functions import data_from_filesMCA
from auxiliar_functions import report
from auxiliar_functions import save_model
from auxiliar_functions import load_model

In [3]:
(X_train, X_test, X_val, y_train, y_test, y_val) = data_from_filesMCA()

loading data...

Loaded data:
Train shape:  (3673816, 36)
Train shape Y:  (3673816,)
Test shape:  (1836908, 36)
Test shape Y:  (1836908,)
Val shape:  (1836909, 36)
Test shape:  (1836909,)


## Linear discriminant Analysis

In [4]:
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train.values.ravel())
predicted = lda.predict(X_test)
acc = metrics.accuracy_score(y_test, predicted)
print("Accuracy:", acc)
print('Full report: \n', metrics.classification_report(y_test, predicted))
save_model("lda",lda)
del lda
gc.collect()

Accuracy: 0.624782514965
Full report: 
              precision    recall  f1-score   support

          0       0.63      0.60      0.61    911941
          1       0.62      0.65      0.63    924967

avg / total       0.62      0.62      0.62   1836908



7

## Quadratic discriminant Analysis

In [5]:
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train.values.ravel())
predicted = qda.predict(X_test)
acc = metrics.accuracy_score(y_test, predicted)
print("Accuracy:", acc)
print('Full report: \n', metrics.classification_report(y_test, predicted))
save_model("qda",qda)
del qda
gc.collect()

Accuracy: 0.585420717858
Full report: 
              precision    recall  f1-score   support

          0       0.64      0.37      0.47    911941
          1       0.56      0.79      0.66    924967

avg / total       0.60      0.59      0.57   1836908



0

## Regularized Discriminant analissi

In [6]:
parameter_distributions = {'reg_param': stats.uniform(0,1)}
rda = QuadraticDiscriminantAnalysis(priors=2)
random_search = RandomizedSearchCV(rda,param_distributions=parameter_distributions,n_iter=10,pre_dispatch=2, n_jobs=-1)

random_search.fit(X_train, y_train.values.ravel())

report(random_search.cv_results_)
predicted = random_search.predict(X_test)
acc = metrics.accuracy_score(y_test, predicted)
print("Accuracy:", acc)
print('Full report: \n', metrics.classification_report(y_test, predicted))

save_model("rdacv", random_search)
del random_search
gc.collect()

Model with rank: 1
Mean validation score: 0.612 (std: 0.000)
Parameters: {'reg_param': 0.63102792167103838}

Model with rank: 2
Mean validation score: 0.612 (std: 0.000)
Parameters: {'reg_param': 0.62336797560890123}

Model with rank: 3
Mean validation score: 0.612 (std: 0.000)
Parameters: {'reg_param': 0.68024006182280483}

Accuracy: 0.61210523336
Full report: 
              precision    recall  f1-score   support

          0       0.62      0.55      0.58    911941
          1       0.60      0.67      0.64    924967

avg / total       0.61      0.61      0.61   1836908



6

## Naive Bayes

In [7]:
gaussianNaiveBayes = GaussianNB()
gaussianNaiveBayes.fit(X_train, y_train.values.ravel())
predicted = gaussianNaiveBayes.predict(X_test)
acc = metrics.accuracy_score(y_test, predicted)
print("Accuracy:", acc)
print('Full report: \n', metrics.classification_report(y_test, predicted))
save_model("naiveBayes", gaussianNaiveBayes)
del gaussianNaiveBayes
gc.collect()

Accuracy: 0.581354645959
Full report: 
              precision    recall  f1-score   support

          0       0.57      0.61      0.59    911941
          1       0.59      0.55      0.57    924967

avg / total       0.58      0.58      0.58   1836908



0

# Logistic Regression

In [8]:
parameter_distributions = {'penalty':['l1','l2'], 'C':stats.expon(scale=100),
                           'fit_intercept':[True,False]}
lr = LogisticRegression(solver='saga')
random_search = RandomizedSearchCV(lr,param_distributions=parameter_distributions,n_iter=10,pre_dispatch=2, n_jobs=-1)

random_search.fit(X_train, y_train.values.ravel())

report(random_search.cv_results_)
predicted = random_search.predict(X_test)
acc = metrics.accuracy_score(y_test, predicted)
print("Accuracy:", acc)
print('Full report: \n', metrics.classification_report(y_test, predicted))

save_model("lrcv", random_search)
del random_search
gc.collect()

Model with rank: 1
Mean validation score: 0.496 (std: 0.000)
Parameters: {'fit_intercept': False, 'C': 0.56988715681253321, 'penalty': 'l1'}

Model with rank: 1
Mean validation score: 0.496 (std: 0.000)
Parameters: {'fit_intercept': False, 'C': 86.261195909917191, 'penalty': 'l1'}

Model with rank: 1
Mean validation score: 0.496 (std: 0.000)
Parameters: {'fit_intercept': False, 'C': 321.87386434939702, 'penalty': 'l1'}

Accuracy: 0.496465255745
Full report: 
              precision    recall  f1-score   support

          0       0.50      1.00      0.66    911941
          1       1.00      0.00      0.00    924967

avg / total       0.75      0.50      0.33   1836908



72

## KNN

In [9]:
params = {'n_neighbors':stats.randint(1, 30), 'weights':['distance','uniform']}
knc = KNeighborsClassifier(n_jobs=-1)
random_search = RandomizedSearchCV(lr,param_distributions=parameter_distributions,n_iter=10,pre_dispatch=2, n_jobs=-1)

random_search.fit(X_train, y_train.values.ravel())

report(random_search.cv_results_)
predicted = random_search.predict(X_test)
acc = metrics.accuracy_score(y_test, predicted)
print("Accuracy:", acc)
print('Full report: \n', metrics.classification_report(y_test, predicted))

save_model("knncv", random_search)
del random_search
gc.collect()

Model with rank: 1
Mean validation score: 0.496 (std: 0.000)
Parameters: {'fit_intercept': False, 'C': 36.408592509492713, 'penalty': 'l1'}

Model with rank: 1
Mean validation score: 0.496 (std: 0.000)
Parameters: {'fit_intercept': False, 'C': 72.01333885899524, 'penalty': 'l1'}

Model with rank: 1
Mean validation score: 0.496 (std: 0.000)
Parameters: {'fit_intercept': False, 'C': 73.855165492808084, 'penalty': 'l1'}

Model with rank: 1
Mean validation score: 0.496 (std: 0.000)
Parameters: {'fit_intercept': False, 'C': 128.25781336608739, 'penalty': 'l1'}

Model with rank: 1
Mean validation score: 0.496 (std: 0.000)
Parameters: {'fit_intercept': False, 'C': 683.04306293626951, 'penalty': 'l1'}

Accuracy: 0.496465255745
Full report: 
              precision    recall  f1-score   support

          0       0.50      1.00      0.66    911941
          1       1.00      0.00      0.00    924967

avg / total       0.75      0.50      0.33   1836908



72

## Random Forest

In [None]:
param_dist = {"max_depth": [3, None],
              "max_features": stats.randint(1, 11),
              "min_samples_split": stats.randint(2, 11),
              "min_samples_leaf": stats.randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

clf = RandomForestClassifier(n_estimators=20)

n_iter_search = 20
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=n_iter_search,pre_dispatch=3, n_jobs=-1)


random_search.fit(X_train, y_train.values.ravel())

report(random_search.cv_results_)
predicted = random_search.predict(X_test)
acc = metrics.accuracy_score(y_test, predicted)
print("Accuracy:", acc)
print('Full report: \n', metrics.classification_report(y_test, predicted))
save_model("rfcv", random_search)
del random_search
gc.collect()

## Perceptron