# Running all Classification Models to get the best predictions
## Then, moving on to Neural Networks to get "Better Predictions"

In [12]:
import pandas as pd
import numpy as np
import random

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier

from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import RidgeClassifierCV
from sklearn.linear_model import SGDClassifier

from sklearn.metrics import *

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

from sklearn.neighbors import KDTree
from sklearn.neighbors import KNeighborsClassifier

from sklearn.neural_network import MLPClassifier

from sklearn.svm import LinearSVC

from dummies_bins_test_train_cv import *
from clean_chess_game_log import *

from warnings import filterwarnings
filterwarnings('ignore')

In [14]:
df = pd.read_csv('../data/use_for_predictions.csv')

In [41]:
X_train, X_test, y_train, y_test, X, y, df_clean = get_Xy_train_test(df, .98, .99)

y Shape: (2091,)
X Shape: (2091, 20)
X_train Shape: (2056, 20)
X_test Shape: (35, 20)
y_train Shape: (2056,)
y_test Shape: (35,)


### Linear Discriminant Analysis

In [42]:
LDA_clf = LinearDiscriminantAnalysis(solver = 'lsqr', shrinkage='auto').fit(X_train, y_train)
LDA_scores = cross_validation_process(LDA_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=88.64%
Standard_Deviation=0.208
True_Score(Mean/SD)=4.261

Prediction_Confusion_Matrix=[9|8]:[3|15]
Prediction_Accuracy=68.57%


### Quadratic Discriminant Analysis

In [43]:
QDA_clf = QuadraticDiscriminantAnalysis(reg_param=0.26055).fit(X_train, y_train)
QDA_scores = cross_validation_process(QDA_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=90.91%
Standard_Deviation=0.196
True_Score(Mean/SD)=4.636

Prediction_Confusion_Matrix=[9|8]:[1|17]
Prediction_Accuracy=74.29%


### Gaussian Process Classifier

In [44]:
gpc_rbf_clf = GaussianProcessClassifier(n_jobs=-2, n_restarts_optimizer=10,
                                        random_state=9).fit(X_train, y_train)
gpc_rbf_score = cross_validation_process(gpc_rbf_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=78.79%
Standard_Deviation=0.234
True_Score(Mean/SD)=3.367

Prediction_Confusion_Matrix=[11|6]:[3|15]
Prediction_Accuracy=74.29%


### Logistic Regression

In [45]:
lgst_reg_clf = LogisticRegression(penalty='l2', class_weight='balanced', random_state=9, 
                                  max_iter=5000, C=1e-3, solver='lbfgs', n_jobs=8, 
                                  multi_class='auto').fit(X_train, y_train)

lgst_reg_score = cross_validation_process(lgst_reg_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=87.88%
Standard_Deviation=0.179
True_Score(Mean/SD)=4.911

Prediction_Confusion_Matrix=[9|8]:[5|13]
Prediction_Accuracy=62.86%


### Logistic Regression CV

In [46]:
lgst_reg_cv_clf = LogisticRegressionCV(Cs=10, penalty='l2', cv=6,
                                       class_weight='balanced', random_state=9,
                                       solver='newton-cg', n_jobs=-2).fit(X_train, y_train)
lgst_reg_cv_score = cross_validation_process(lgst_reg_cv_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=86.36%
Standard_Deviation=0.225
True_Score(Mean/SD)=3.83

Prediction_Confusion_Matrix=[9|8]:[3|15]
Prediction_Accuracy=68.57%


### Ada Boost Classifier

In [47]:
ada_clf = AdaBoostClassifier(n_estimators=274, learning_rate=0.013,
                             random_state=9).fit(X_train, y_train)
ada_scores = cross_validation_process(ada_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=81.82%
Standard_Deviation=0.241
True_Score(Mean/SD)=3.402

Prediction_Confusion_Matrix=[10|7]:[6|12]
Prediction_Accuracy=62.86%


### SGD Classifier

In [48]:
SGD_clf = SGDClassifier(loss='hinge', penalty='l2', max_iter=1e3,
                        shuffle=False, n_jobs=8, random_state=9,
                        class_weight='balanced').fit(X_train, y_train)
SGD_score = cross_validation_process(SGD_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=80.3%
Standard_Deviation=0.236
True_Score(Mean/SD)=3.396

Prediction_Confusion_Matrix=[9|8]:[6|12]
Prediction_Accuracy=60.0%


### Random Forest Classifier

In [49]:
rand_frst_clf = RandomForestClassifier(n_estimators=1000, criterion='entropy', n_jobs=-2,
                                       min_samples_leaf=2, random_state=9,
                                       class_weight='balanced').fit(X_train, y_train)

rand_frst_score = cross_validation_process(rand_frst_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=93.18%
Standard_Deviation=0.17
True_Score(Mean/SD)=5.49

Prediction_Confusion_Matrix=[11|6]:[3|15]
Prediction_Accuracy=74.29%


### Ridge Classifier

In [50]:
ridge_clf = RidgeClassifier(class_weight='balanced', random_state=9
                           ).fit(X_train, y_train)
ridge_score = cross_validation_process(ridge_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=84.85%
Standard_Deviation=0.248
True_Score(Mean/SD)=3.418

Prediction_Confusion_Matrix=[9|8]:[3|15]
Prediction_Accuracy=68.57%


### Ridge Classifier CV

In [51]:
ridge_cv_clf = RidgeClassifierCV(scoring='average_precision', cv=20,
                                 class_weight='balanced').fit(X_train, y_train)
ridge_cv_score = cross_validation_process(ridge_cv_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=84.85%
Standard_Deviation=0.248
True_Score(Mean/SD)=3.418

Prediction_Confusion_Matrix=[9|8]:[3|15]
Prediction_Accuracy=68.57%


### K Neighbors Classifier

In [52]:
KNN_clf = KNeighborsClassifier(n_neighbors=19, leaf_size=88, n_jobs=8).fit(X_train, y_train)
KNN_score = cross_validation_process(KNN_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=71.21%
Standard_Deviation=0.193
True_Score(Mean/SD)=3.698

Prediction_Confusion_Matrix=[10|7]:[5|13]
Prediction_Accuracy=65.71%


### Multi-layer Perceptron classifier

In [53]:
MLP_clf = MLPClassifier(hidden_layer_sizes=(64,), activation='logistic', solver='lbfgs', 
                        alpha=0.0001, batch_size=8, max_iter=5000,
                        random_state=9, validation_fraction=0.1,
                        verbose=True).fit(X_train, y_train)

MLP_score = cross_validation_process(MLP_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=85.61%
Standard_Deviation=0.239
True_Score(Mean/SD)=3.588

Prediction_Confusion_Matrix=[11|6]:[3|15]
Prediction_Accuracy=74.29%


In [46]:
# classification_scoring = ['accuracy', 'balanced_accuracy', 'average_precision',
#                           'brier_score_loss', 'f1', 'f1_micro', 'f1_macro',
#                           'f1_weighted', 'neg_log_loss', 'precision',
#                           'recall', 'roc_auc']