# Running all Classification Models to get the best predictions
## Then, moving on to Neural Networks to get "Better Predictions"

In [59]:
import pandas as pd
import numpy as np

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier

from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import RidgeClassifierCV
from sklearn.linear_model import SGDClassifier

from sklearn.metrics import *

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

from sklearn.neighbors import KDTree
from sklearn.neighbors import KNeighborsClassifier

from sklearn.neural_network import MLPClassifier

from sklearn.svm import LinearSVC

from dummies_bins_test_train_cv import *

from warnings import filterwarnings
filterwarnings('ignore')

In [60]:
df = pd.read_csv('../data/use_for_analysis.csv')

In [106]:
X_train, X_test, y_train, y_test = get_Xy_train_test(df, .8, .9)

y Shape: (2048,)
X Shape: (2048, 31)
X_train Shape: (1642, 31)
X_test Shape: (406, 31)
y_train Shape: (1642,)
y_test Shape: (406,)


### Linear Discriminant Analysis

In [107]:
LDA_clf = LinearDiscriminantAnalysis(solver = 'lsqr', shrinkage='auto').fit(X_train, y_train)
LDA_scores = cross_validation_process(LDA_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=72.18%
Standard_Deviation=0.065
Scores(average_precision)=[0.741 0.728 0.803 0.778 0.83  0.696 0.676 0.599 0.753 0.671 0.665]
No Feature Importances
Prediction_Confusion_Matrix=
[[141  63]
 [ 51 151]]
Prediction_Accuracy=71.92%


### Quadratic Discriminant Analysis

Best results with:
- reg_param = 0.26055
- reg_param = 0.36968
- reg_param = 0.96127
- reg_param = 0.83353
- reg_param = 0.77121

In [108]:
reg_param = [0.96127, 0.83353, 0.77121, 0.26055, 0.36968]
for num in reg_param:
    print(num)
    QDA_clf = QuadraticDiscriminantAnalysis(reg_param=num).fit(X_train, y_train)
    QDA_scores = cross_validation_process(QDA_clf, X_test, y_test, cv=11)

0.96127
Average_Accuracy(average_precision)=81.0%
Standard_Deviation=0.059
Scores(average_precision)=[0.817 0.805 0.902 0.711 0.845 0.74  0.86  0.858 0.825 0.716 0.832]
No Feature Importances
Prediction_Confusion_Matrix=
[[142  62]
 [ 53 149]]
Prediction_Accuracy=71.67%
0.83353
Average_Accuracy(average_precision)=81.17%
Standard_Deviation=0.056
Scores(average_precision)=[0.817 0.802 0.899 0.711 0.841 0.77  0.86  0.858 0.825 0.716 0.829]
No Feature Importances
Prediction_Confusion_Matrix=
[[143  61]
 [ 54 148]]
Prediction_Accuracy=71.67%
0.77121
Average_Accuracy(average_precision)=80.82%
Standard_Deviation=0.061
Scores(average_precision)=[0.811 0.802 0.899 0.684 0.841 0.77  0.86  0.858 0.825 0.711 0.829]
No Feature Importances
Prediction_Confusion_Matrix=
[[143  61]
 [ 54 148]]
Prediction_Accuracy=71.67%
0.26055
Average_Accuracy(average_precision)=75.23%
Standard_Deviation=0.062
Scores(average_precision)=[0.743 0.757 0.841 0.706 0.861 0.724 0.673 0.768 0.821 0.675 0.707]
No Feature Impo

### Gaussian Process Classifier

In [109]:
gpc_rbf_clf = GaussianProcessClassifier(n_jobs=-2, max_iter_predict=100, n_restarts_optimizer=10,
                                        random_state=9).fit(X_train, y_train)
gpc_rbf_score = cross_validation_process(gpc_rbf_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=77.47%
Standard_Deviation=0.087
Scores(average_precision)=[0.566 0.937 0.781 0.764 0.864 0.784 0.79  0.749 0.749 0.731 0.808]
No Feature Importances
Prediction_Confusion_Matrix=
[[153  51]
 [ 52 150]]
Prediction_Accuracy=74.63%


### Logistic Regression

In [110]:
lgst_reg_clf = LogisticRegression(penalty='l2', class_weight='balanced', random_state=9, 
                                  solver='newton-cg', n_jobs=-2, multi_class='auto').fit(X_train, y_train)

lgst_reg_score = cross_validation_process(lgst_reg_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=77.61%
Standard_Deviation=0.05
Scores(average_precision)=[0.761 0.742 0.841 0.719 0.895 0.759 0.79  0.745 0.805 0.732 0.748]
No Feature Importances
Prediction_Confusion_Matrix=
[[143  61]
 [ 52 150]]
Prediction_Accuracy=72.17%


### Logistic Regression CV

Best results with:

- Cs = 10, cv = 6
- Cs = 16, cv = 4
- Cs = 19, cv = 6
- Cs = 19, cv = 12
- Cs = 25, cv = 6
- Cs = 25, cv = 12

In [104]:
Cs_cv = {10: 6, 16: 4, 19: [6, 12], 25: [6, 12]}
for k, v in Cs_cv.items():
    if type(v) != list:
        lgst_reg_cv_clf = LogisticRegressionCV(Cs=k, penalty='l2', cv=v,
                                               class_weight='balanced', random_state=9,
                                               solver='newton-cg', n_jobs=-2).fit(X_train, y_train)
        lgst_reg_cv_score = cross_validation_process(
            lgst_reg_cv_clf, X_test, y_test, cv=11)
    else:
        for num in v:
            lgst_reg_cv_clf = LogisticRegressionCV(Cs=k, penalty='l2', cv=num,
                                                   class_weight='balanced', random_state=9,
                                                   solver='newton-cg', n_jobs=-2).fit(X_train, y_train)
            lgst_reg_cv_score = cross_validation_process(
                lgst_reg_cv_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=96.21%
Standard_Deviation=0.12
Scores(average_precision)=[1.    0.583 1.    1.    1.    1.    1.    1.    1.    1.    1.   ]
No Feature Importances
Prediction_Confusion_Matrix=
[[3 2]
 [4 9]]
Prediction_Accuracy=66.67%
Average_Accuracy(average_precision)=96.21%
Standard_Deviation=0.12
Scores(average_precision)=[1.    0.583 1.    1.    1.    1.    1.    1.    1.    1.    1.   ]
No Feature Importances
Prediction_Confusion_Matrix=
[[ 3  2]
 [ 3 10]]
Prediction_Accuracy=72.22%
Average_Accuracy(average_precision)=96.21%
Standard_Deviation=0.12
Scores(average_precision)=[1.    0.583 1.    1.    1.    1.    1.    1.    1.    1.    1.   ]
No Feature Importances
Prediction_Confusion_Matrix=
[[3 2]
 [4 9]]
Prediction_Accuracy=66.67%


ValueError: n_splits=12 cannot be greater than the number of members in each class.

### Ada Boost Classifier 

Best results with:

- n_estimators=235, learning_rate=1.442
- n_estimators=52, learning_rate=0.155
- n_estimators=274, learning_rate=0.013
- n_estimators=162, learning_rate=0.767

In [111]:
n_estimators_learning_rate = {235: 1.442, 52:0.155, 274:0.013, 162:0.767}
for k, v in n_estimators_learning_rate.items():
    ada_clf = AdaBoostClassifier(n_estimators=k, learning_rate=v, 
                                 random_state=9).fit(X_train, y_train)
    ada_scores = cross_validation_process(ada_clf, X_test, y_test, cv=5)

Average_Accuracy(average_precision)=74.54%
Standard_Deviation=0.036
Scores(average_precision)=[0.746 0.731 0.689 0.797 0.765]
Feature importance = [0.01702128 0.79574468 0.00851064 0.00425532 0.00425532 0.00425532
 0.00425532 0.00851064 0.00425532 0.00851064 0.00425532 0.00851064
 0.00851064 0.00851064 0.00851064 0.00425532 0.00425532 0.00851064
 0.00425532 0.00851064 0.00425532 0.00425532 0.01276596 0.00851064
 0.00851064 0.00425532 0.00425532 0.         0.00851064 0.01276596
 0.00425532]
Prediction_Confusion_Matrix=
[[146  58]
 [ 54 148]]
Prediction_Accuracy=72.41%
Average_Accuracy(average_precision)=78.75%
Standard_Deviation=0.019
Scores(average_precision)=[0.814 0.754 0.785 0.796 0.789]
Feature importance = [0.09615385 0.69230769 0.         0.         0.01923077 0.
 0.         0.         0.         0.         0.         0.05769231
 0.         0.         0.         0.         0.03846154 0.
 0.03846154 0.         0.05769231 0.         0.         0.
 0.         0.         0.         0

### SGD Classifier

Best results with:

- max_iter = 28782, n_iter_no_change = 50
- max_iter = 32421, n_iter_no_change = 54
- max_iter = 43631, n_iter_no_change = 74
- max_iter = 37278, n_iter_no_change = 70
- max_iter = 38125, n_iter_no_change = 122

In [112]:
max_iter_no_change = {28782: 50, 32421: 54, 43631: 74, 37278: 70, 38125: 122}
for k, v in max_iter_no_change.items():
    SGD_clf = SGDClassifier(loss='squared_hinge', max_iter=k, shuffle=False,
                            n_jobs=-2, random_state=9, n_iter_no_change=v,
                            class_weight='balanced').fit(X_train, y_train)

    SGD_score = cross_validation_process(SGD_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=79.03%
Standard_Deviation=0.061
Scores(average_precision)=[0.789 0.745 0.847 0.663 0.89  0.711 0.803 0.838 0.789 0.796 0.823]
No Feature Importances
Prediction_Confusion_Matrix=
[[150  54]
 [ 54 148]]
Prediction_Accuracy=73.4%
Average_Accuracy(average_precision)=79.0%
Standard_Deviation=0.061
Scores(average_precision)=[0.794 0.745 0.847 0.659 0.89  0.711 0.808 0.834 0.792 0.796 0.814]
No Feature Importances
Prediction_Confusion_Matrix=
[[150  54]
 [ 54 148]]
Prediction_Accuracy=73.4%
Average_Accuracy(average_precision)=78.87%
Standard_Deviation=0.062
Scores(average_precision)=[0.793 0.748 0.843 0.654 0.893 0.711 0.8   0.831 0.792 0.806 0.804]
No Feature Importances
Prediction_Confusion_Matrix=
[[148  56]
 [ 54 148]]
Prediction_Accuracy=72.91%
Average_Accuracy(average_precision)=78.91%
Standard_Deviation=0.062
Scores(average_precision)=[0.795 0.748 0.847 0.654 0.89  0.711 0.806 0.834 0.792 0.796 0.806]
No Feature Importances
Prediction_Confusion_Matri

### Extra Trees Classifier

In [113]:
XTsC_clf = ExtraTreesClassifier(n_estimators=1000, criterion='entropy',  n_jobs=-2, 
                                random_state=9, class_weight='balanced').fit(X_train, y_train)
XTsC_score = cross_validation_process(XTsC_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=68.45%
Standard_Deviation=0.068
Scores(average_precision)=[0.622 0.777 0.635 0.658 0.829 0.694 0.648 0.598 0.692 0.636 0.741]
Feature importance = [0.03278258 0.61256464 0.01697444 0.0178955  0.00387358 0.00102736
 0.00172665 0.00261806 0.00616034 0.00975212 0.00849698 0.01267351
 0.01171283 0.01117057 0.01224095 0.01285297 0.00870145 0.01441823
 0.01107123 0.01157128 0.00928118 0.01089327 0.00974335 0.01021005
 0.0084434  0.02550935 0.02370963 0.022785   0.01953123 0.01854422
 0.02106406]
Prediction_Confusion_Matrix=
[[116  88]
 [ 69 133]]
Prediction_Accuracy=61.33%


### Random Forest Classifier

In [114]:
rand_frst_clf = RandomForestClassifier(n_estimators=4000, criterion='entropy', n_jobs=-2, 
                                       random_state=9).fit(X_train, y_train)

rand_frst_score = cross_validation_process(rand_frst_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=75.58%
Standard_Deviation=0.077
Scores(average_precision)=[0.651 0.896 0.756 0.784 0.86  0.759 0.668 0.716 0.748 0.659 0.817]
Feature importance = [0.03896967 0.53205951 0.01767907 0.02193352 0.00509787 0.00090894
 0.00157552 0.0029848  0.00591752 0.01255108 0.01172073 0.01630022
 0.01398996 0.01282607 0.01377878 0.01532679 0.01366621 0.01783172
 0.01477573 0.0155923  0.01334679 0.01366169 0.0113848  0.01257791
 0.01104185 0.02918496 0.02806254 0.02356712 0.02338254 0.02317912
 0.02512467]
Prediction_Confusion_Matrix=
[[135  69]
 [ 59 143]]
Prediction_Accuracy=68.47%


### Ridge Classifier

Best results with:

- alpha = 10.13
- alpha = 7.37
- alpha = 13.69

In [115]:
alphas = [10.13, 7.37, 13.69]
for a in alphas:
    ridge_clf = RidgeClassifier(alpha=a, class_weight='balanced', solver='auto',
                                random_state=9).fit(X_train, y_train)
    ridge_score = cross_validation_process(ridge_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=75.02%
Standard_Deviation=0.059
Scores(average_precision)=[0.751 0.736 0.833 0.728 0.877 0.717 0.732 0.673 0.802 0.708 0.695]
No Feature Importances
Prediction_Confusion_Matrix=
[[136  68]
 [ 48 154]]
Prediction_Accuracy=71.43%
Average_Accuracy(average_precision)=74.09%
Standard_Deviation=0.062
Scores(average_precision)=[0.751 0.741 0.83  0.722 0.882 0.72  0.708 0.663 0.763 0.7   0.671]
No Feature Importances
Prediction_Confusion_Matrix=
[[136  68]
 [ 49 153]]
Prediction_Accuracy=71.18%
Average_Accuracy(average_precision)=76.11%
Standard_Deviation=0.053
Scores(average_precision)=[0.755 0.752 0.837 0.731 0.875 0.725 0.762 0.708 0.801 0.717 0.707]
No Feature Importances
Prediction_Confusion_Matrix=
[[137  67]
 [ 48 154]]
Prediction_Accuracy=71.67%


### Ridge Classifier CV

In [116]:
ridge_cv_clf = RidgeClassifierCV(scoring='average_precision', cv=20,
                                 class_weight='balanced').fit(X_train, y_train)
ridge_cv_score = cross_validation_process(ridge_cv_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=75.02%
Standard_Deviation=0.059
Scores(average_precision)=[0.751 0.736 0.833 0.728 0.877 0.717 0.732 0.673 0.802 0.708 0.695]
No Feature Importances
Prediction_Confusion_Matrix=
[[136  68]
 [ 48 154]]
Prediction_Accuracy=71.43%


### K Neighbors Classifier

Best results with:

- n_neighbors = 19, leaf_size = 28
- n_neighbors = 17, leaf_size = 135
- n_neighbors = 19, leaf_size = 88
- n_neighbors = 14, leaf_size = 88
- n_neighbors = 18, leaf_size = 88

In [117]:
n_neighbors_leaf_size = {19:[28, 88], 17:88, 14:88, 18:88}
for k, v in n_neighbors_leaf_size.items():
    if type(v) != list:
        KNN_clf = KNeighborsClassifier(n_neighbors=k, leaf_size=v, n_jobs=-2).fit(X_train, y_train)
        KNN_score = cross_validation_process(KNN_clf, X_test, y_test, cv=11)
    else:
        for num in v:
            KNN_clf = KNeighborsClassifier(n_neighbors=k, leaf_size=num, n_jobs=-2).fit(X_train, y_train)
            KNN_score = cross_validation_process(KNN_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=80.35%
Standard_Deviation=0.05
Scores(average_precision)=[0.766 0.857 0.848 0.747 0.859 0.762 0.85  0.833 0.772 0.71  0.835]
No Feature Importances
Prediction_Confusion_Matrix=
[[150  54]
 [ 50 152]]
Prediction_Accuracy=74.38%
Average_Accuracy(average_precision)=80.66%
Standard_Deviation=0.05
Scores(average_precision)=[0.762 0.866 0.852 0.75  0.853 0.763 0.847 0.845 0.79  0.715 0.83 ]
No Feature Importances
Prediction_Confusion_Matrix=
[[148  56]
 [ 53 149]]
Prediction_Accuracy=73.15%
Average_Accuracy(average_precision)=80.04%
Standard_Deviation=0.051
Scores(average_precision)=[0.741 0.837 0.869 0.763 0.853 0.76  0.84  0.831 0.768 0.709 0.834]
No Feature Importances
Prediction_Confusion_Matrix=
[[147  57]
 [ 50 152]]
Prediction_Accuracy=73.65%
Average_Accuracy(average_precision)=78.96%
Standard_Deviation=0.057
Scores(average_precision)=[0.739 0.843 0.849 0.688 0.85  0.747 0.843 0.825 0.765 0.717 0.82 ]
No Feature Importances
Prediction_Confusion_Matr

### Multi-layer Perceptron classifier

In [118]:
MLP_clf = MLPClassifier(hidden_layer_sizes=(200,), activation='logistic', solver='lbfgs', 
                        alpha=0.0001, batch_size=8, learning_rate='constant', max_iter=500, 
                        shuffle=False, random_state=9, validation_fraction=0.05, 
                        n_iter_no_change=30).fit(X_train, y_train)

MLP_score = cross_validation_process(MLP_clf, X_test, y_test, cv=11)

Average_Accuracy(average_precision)=73.67%
Standard_Deviation=0.072
Scores(average_precision)=[0.727 0.726 0.842 0.738 0.844 0.652 0.643 0.752 0.798 0.617 0.766]
No Feature Importances
Prediction_Confusion_Matrix=
[[143  61]
 [ 52 150]]
Prediction_Accuracy=72.17%


### Best for small dataset:

15 for test:
1. KNN = 83% [[4 1][2 11]]
2. Random Forest = 83% [[5 0][3 10]]
3. Ada Boost Classifier = 83% [[4 1][2 11]]
4. Gaussian Process Classifier = 83% [[4 1][2 11]]

In [76]:
# classification_scoring = ['accuracy', 'balanced_accuracy', 'average_precision',
#                           'brier_score_loss', 'f1', 'f1_micro', 'f1_macro',
#                           'f1_weighted', 'neg_log_loss', 'precision',
#                           'recall', 'roc_auc']

In [77]:
# classifiers = [
#     KNeighborsClassifier(3),
#     SVC(kernel="linear", C=0.025),
#     SVC(gamma=2, C=1),
#     GaussianProcessClassifier(1.0 * RBF(1.0)),
#     DecisionTreeClassifier(max_depth=5),
#     RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
#     MLPClassifier(alpha=1),
#     AdaBoostClassifier(),
#     GaussianNB(),
#     QuadraticDiscriminantAnalysis()]

In [78]:
# clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial',
#                           random_state=1)
# clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
# clf3 = GaussianNB()

# eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')

# for clf, label in zip([clf1, clf2, clf3, eclf], ['Logistic Regression', 'Random Forest', 'naive Bayes', 'Ensemble']):
#     scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
#     print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

In [79]:
# clf = DecisionTreeClassifier(max_depth=None, min_samples_split=2,
#     random_state=0)
# scores = cross_val_score(clf, X, y, cv=5)
# scores.mean()


# clf = RandomForestClassifier(n_estimators=10, max_depth=None,
#     min_samples_split=2, random_state=0)
# scores = cross_val_score(clf, X, y, cv=5)
# scores.mean()


# clf = ExtraTreesClassifier(n_estimators=10, max_depth=None,
#     min_samples_split=2, random_state=0)
# scores = cross_val_score(clf, X, y, cv=5)
# scores.mean() > 0.999

In [None]:
# est = GradientBoostingRegressor
# mean_squared_error(y_test, est.predict(X_test))  
# clf.score(X_test, y_test)