In [1]:
#standard ds imports
import pandas as pd
import numpy as np
#viz and stats
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
# .py imports
#import wranglerer as wr
#import modeling as md
import os
#sklearn imports
from sklearn.model_selection import train_test_split,cross_val_score, GridSearchCV
import sklearn.preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score,confusion_matrix, plot_confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
#CATboost imports
from catboost import CatBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import explore_r as ex
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

### This notebook builds upon the previous notebook (model_r) and incorporates GridSearch CrossValidation to explore each algorithm with a small subset of possible hyperparameter combinations.

In [2]:
df = pd.read_csv('prepped_data.csv')

In [3]:
df = df.drop(columns=['date', 'day_of_week', 'start_time','home_score',
       'home_wins', 'away_score', 'away_wins','total_scores'])
df['spread'] = abs(df['spread'])

In [4]:
df.head()

Unnamed: 0,week_num,stadium,temp,humidity,wind,spread,ou,is_under,abnormal_start,is_playoff,playoff_implications,is_turf,is_outdoor
0,19,State Farm Stadium,72,0,0,1.0,51.0,0,0,1,1,0,0
1,19,Lincoln Financial Field,52,48,14,2.5,45.5,1,0,1,1,0,1
2,19,GEHA Field at Arrowhead Stadium,22,55,13,1.5,48.0,1,0,1,1,0,1
3,19,Highmark Stadium,32,10,0,5.5,49.0,1,0,1,1,1,1
4,19,Levi's Stadium,55,47,19,4.0,47.0,1,0,1,1,0,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10811 entries, 0 to 10810
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   week_num              10811 non-null  int64  
 1   stadium               10811 non-null  object 
 2   temp                  10811 non-null  int64  
 3   humidity              10811 non-null  int64  
 4   wind                  10811 non-null  int64  
 5   spread                10811 non-null  float64
 6   ou                    10811 non-null  float64
 7   is_under              10811 non-null  int64  
 8   abnormal_start        10811 non-null  int64  
 9   is_playoff            10811 non-null  int64  
 10  playoff_implications  10811 non-null  int64  
 11  is_turf               10811 non-null  int64  
 12  is_outdoor            10811 non-null  int64  
dtypes: float64(2), int64(10), object(1)
memory usage: 1.1+ MB


## ESTABLISH BASELINE 50.7%

In [6]:
df.is_under.value_counts(normalize=True)

1    0.507354
0    0.492646
Name: is_under, dtype: float64

In [7]:
df['baseline'] = 1

In [8]:
baseline_accuracy = (df.baseline == df.is_under).mean()
print(f'baseline accuracy: {baseline_accuracy:.2%}')

baseline accuracy: 50.74%


In [9]:
subset = df[df.is_under == 1]
baseline_recall = (subset.baseline == subset.is_under).mean()
print(f'baseline recall: {baseline_recall:.2%}')

baseline recall: 100.00%


In [10]:
subset = df[df.baseline == 1]
baseline_precision = (subset.baseline == subset.is_under).mean()
print(f'baseline precision: {baseline_precision:.2%}')

baseline precision: 50.74%


In [11]:
df.drop(columns='baseline',inplace=True)

In [12]:
df_no_stadium = df.drop(columns='stadium')

In [13]:
df_no_stadium.head()

Unnamed: 0,week_num,temp,humidity,wind,spread,ou,is_under,abnormal_start,is_playoff,playoff_implications,is_turf,is_outdoor
0,19,72,0,0,1.0,51.0,0,0,1,1,0,0
1,19,52,48,14,2.5,45.5,1,0,1,1,0,1
2,19,22,55,13,1.5,48.0,1,0,1,1,0,1
3,19,32,10,0,5.5,49.0,1,0,1,1,1,1
4,19,55,47,19,4.0,47.0,1,0,1,1,0,1


In [14]:
X_train, y_train, X_validate, y_validate, X_test, y_test = ex.train_validate_test(df_no_stadium,'is_under')

In [15]:
X_train.shape, y_train.shape, X_validate.shape, y_validate.shape, X_test.shape, y_test.shape 

((6471, 11), (6471,), (2394, 11), (2394,), (1946, 11), (1946,))

In [16]:
X_train.head()

Unnamed: 0,week_num,temp,humidity,wind,spread,ou,abnormal_start,is_playoff,playoff_implications,is_turf,is_outdoor
1713,12,72,0,0,4.0,49.0,0,0,1,1,0
2164,17,72,0,0,7.5,38.0,0,0,1,0,0
2554,9,72,0,0,1.0,42.5,1,0,0,0,0
3117,6,58,52,10,12.5,40.0,0,0,0,0,1
9819,13,51,49,14,5.0,39.5,0,0,1,0,1


# MODELING

## DTC VANILLA

In [17]:
# create the Decision Tree Classifier model 
dtc = DecisionTreeClassifier()

In [18]:
# fit the model to the TRAIN dataset:
dtc.fit(X_train, y_train)

In [19]:
# use the model by calling for the predictions made via the TRAIN dataset
dtc_preds = dtc.predict(X_train)
pd.crosstab(dtc_preds,y_train) # a confusion matrix with ACTUALS as columns and PREDICTIONS as rows

is_under,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,3180,20
1,0,3271


In [20]:
print(f'Accuracy-Train {round(dtc.score(X_train,y_train),4)}')
print(f'Accuracy-Validate {round(dtc.score(X_validate,y_validate),4)}')
print(classification_report(y_train,dtc_preds))
print(classification_report(y_validate,dtc.predict(X_validate)))

Accuracy-Train 0.9969
Accuracy-Validate 0.5205
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      3180
           1       1.00      0.99      1.00      3291

    accuracy                           1.00      6471
   macro avg       1.00      1.00      1.00      6471
weighted avg       1.00      1.00      1.00      6471

              precision    recall  f1-score   support

           0       0.51      0.50      0.51      1178
           1       0.53      0.54      0.53      1216

    accuracy                           0.52      2394
   macro avg       0.52      0.52      0.52      2394
weighted avg       0.52      0.52      0.52      2394



## DTC maxDepth = X

In [21]:
# create the Decision Tree Classifier model 
dtc = DecisionTreeClassifier(max_depth=5,min_samples_leaf=5)

In [22]:
# fit the model to the TRAIN dataset:
dtc.fit(X_train, y_train)

In [23]:
# use the model by calling for the predictions made via the TRAIN dataset
dtc_preds = dtc.predict(X_train)
pd.crosstab(dtc_preds,y_train) # a confusion matrix with ACTUALS as columns and PREDICTIONS as rows

is_under,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1604,1365
1,1576,1926


In [24]:
print(f'Accuracy-Train {round(dtc.score(X_train,y_train),4)}')
print(f'Accuracy-Validate {round(dtc.score(X_validate,y_validate),4)}')
print(classification_report(y_train,dtc_preds))
print(classification_report(y_validate,dtc.predict(X_validate)))

Accuracy-Train 0.5455
Accuracy-Validate 0.5029
              precision    recall  f1-score   support

           0       0.54      0.50      0.52      3180
           1       0.55      0.59      0.57      3291

    accuracy                           0.55      6471
   macro avg       0.55      0.54      0.54      6471
weighted avg       0.55      0.55      0.54      6471

              precision    recall  f1-score   support

           0       0.49      0.46      0.48      1178
           1       0.51      0.55      0.53      1216

    accuracy                           0.50      2394
   macro avg       0.50      0.50      0.50      2394
weighted avg       0.50      0.50      0.50      2394



## DTC grid_search CROSS_VALIDATION k=5

In [25]:
cross_val_score(dtc, X_train, y_train, cv=10)

array([0.50308642, 0.52550232, 0.46367852, 0.47449768, 0.50695518,
       0.51931994, 0.51931994, 0.51468315, 0.51468315, 0.5007728 ])

In [26]:
param_grid = {
    'max_depth': [None,18, 12, 10, 5],
    'min_samples_leaf': [1, 5, 10, 20],
    'criterion': ['gini', 'entropy'],
}

In [27]:
gr_search = GridSearchCV(DecisionTreeClassifier(),
                      param_grid)

In [28]:
gr_search

In [29]:
gr_search.fit(X_train, y_train)

In [30]:
results = gr_search.cv_results_

In [31]:
results_df_init = pd.DataFrame(results)

In [32]:
results_df_init.shape

(40, 16)

In [33]:
params = pd.DataFrame(results['params'])

In [34]:
params

Unnamed: 0,criterion,max_depth,min_samples_leaf
0,gini,,1
1,gini,,5
2,gini,,10
3,gini,,20
4,gini,18.0,1
5,gini,18.0,5
6,gini,18.0,10
7,gini,18.0,20
8,gini,12.0,1
9,gini,12.0,5


In [35]:
results_df_init.sort_values(by='rank_test_score').head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_min_samples_leaf,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
2,0.01075,0.000431,0.000548,1.1e-05,gini,,10,"{'criterion': 'gini', 'max_depth': None, 'min_...",0.501931,0.502318,0.499227,0.517002,0.535549,0.511205,0.013672,1
37,0.00485,4.6e-05,0.00046,4e-06,entropy,5.0,5,"{'criterion': 'entropy', 'max_depth': 5, 'min_...",0.491892,0.510046,0.522411,0.521638,0.508501,0.510898,0.011096,2
6,0.010485,0.000335,0.000531,7e-06,gini,18.0,10,"{'criterion': 'gini', 'max_depth': 18, 'min_sa...",0.505019,0.506955,0.490726,0.508501,0.540185,0.510277,0.016239,3
38,0.004803,4.7e-05,0.000461,6e-06,entropy,5.0,10,"{'criterion': 'entropy', 'max_depth': 5, 'min_...",0.492664,0.504637,0.520866,0.51932,0.507728,0.509043,0.010342,4
17,0.004745,4.8e-05,0.000457,5e-06,gini,5.0,5,"{'criterion': 'gini', 'max_depth': 5, 'min_sam...",0.494981,0.506955,0.510046,0.520093,0.511592,0.508733,0.008139,5


## RFC grid_search CROSS_VALIDATION k=5

In [36]:
param_grid = {
    'max_depth': [None,18, 12, 10, 5],
    'n_estimators': [55, 101, 201],
    'criterion': ['gini', 'entropy'],
}
gr_search = GridSearchCV(RandomForestClassifier(),
                      param_grid)

In [37]:
gr_search

In [39]:
gr_search.fit(X_train, y_train)

In [40]:
results = gr_search.cv_results_
results_df_init = pd.DataFrame(results)
results_df_init.shape

(30, 16)

In [41]:
params = pd.DataFrame(results['params'])
results_df_init.sort_values(by='rank_test_score').head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
14,0.306615,0.000875,0.013049,0.00021,gini,5,201,"{'criterion': 'gini', 'max_depth': 5, 'n_estim...",0.538224,0.517002,0.523957,0.518547,0.534003,0.526346,0.008409,1
13,0.154892,0.000545,0.006861,0.000109,gini,5,101,"{'criterion': 'gini', 'max_depth': 5, 'n_estim...",0.535135,0.517774,0.530912,0.513138,0.534003,0.526192,0.008995,2
29,0.31182,0.000635,0.012963,0.000213,entropy,5,201,"{'criterion': 'entropy', 'max_depth': 5, 'n_es...",0.527413,0.52473,0.52473,0.516229,0.537094,0.526039,0.00669,3
26,0.484041,0.001502,0.02012,0.000398,entropy,10,201,"{'criterion': 'entropy', 'max_depth': 10, 'n_e...",0.518919,0.520866,0.534776,0.527048,0.517002,0.523722,0.006475,4
28,0.160986,0.004445,0.006945,0.000308,entropy,5,101,"{'criterion': 'entropy', 'max_depth': 5, 'n_es...",0.532046,0.517002,0.532457,0.51391,0.523184,0.52372,0.007581,5


## NBC grid_search CROSS_VALIDATION k=5

In [42]:
param_grid = {
    'var_smoothing': [.000000001,.000000002,.000000003]
}
gr_search = GridSearchCV(GaussianNB(),
                      param_grid)

In [43]:
gr_search

In [44]:
gr_search.fit(X_train, y_train)

In [45]:
results = gr_search.cv_results_
results_df_init = pd.DataFrame(results)
results_df_init.shape

(3, 14)

In [46]:
params = pd.DataFrame(results['params'])
results_df_init.sort_values(by='rank_test_score').head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_var_smoothing,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.00344,0.00061,0.00148,0.000386,0.0,{'var_smoothing': 1e-09},0.51583,0.503091,0.522411,0.512365,0.532457,0.517231,0.009841,1
1,0.00226,0.000253,0.00101,0.000107,0.0,{'var_smoothing': 2e-09},0.51583,0.503091,0.522411,0.512365,0.532457,0.517231,0.009841,1
2,0.001736,0.000104,0.00078,3.9e-05,0.0,{'var_smoothing': 3e-09},0.51583,0.503091,0.522411,0.512365,0.532457,0.517231,0.009841,1


## GBC grid_search CROSS_VALIDATION k=5

In [47]:
param_grid = {
    'learning_rate': [0.1,0.2,0.5,1.0, 5.0],
    'n_estimators': [55, 101, 201],
    'min_samples_leaf': [1,5,10,20],
}
gr_search = GridSearchCV(GradientBoostingClassifier(),
                      param_grid)

In [48]:
gr_search

In [49]:
gr_search.fit(X_train, y_train)

In [50]:
results = gr_search.cv_results_
results_df_init = pd.DataFrame(results)
results_df_init.shape

(60, 16)

In [51]:
params = pd.DataFrame(results['params'])
results_df_init.sort_values(by='rank_test_score').head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_min_samples_leaf,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
3,0.157319,0.000294,0.001116,3.9e-05,0.1,5,55,"{'learning_rate': 0.1, 'min_samples_leaf': 5, ...",0.528958,0.527821,0.512365,0.509274,0.532457,0.522175,0.009448,1
15,0.161512,0.000212,0.001131,1.3e-05,0.2,5,55,"{'learning_rate': 0.2, 'min_samples_leaf': 5, ...",0.500386,0.531685,0.520866,0.518547,0.52473,0.519243,0.010427,2
0,0.164985,0.015162,0.001423,0.000689,0.1,1,55,"{'learning_rate': 0.1, 'min_samples_leaf': 1, ...",0.520463,0.522411,0.510819,0.513138,0.529366,0.519239,0.006667,3
4,0.28821,0.000634,0.001573,1.6e-05,0.1,5,101,"{'learning_rate': 0.1, 'min_samples_leaf': 5, ...",0.52278,0.522411,0.516229,0.517774,0.51391,0.518621,0.003473,4
7,0.299076,0.005508,0.001771,0.000115,0.1,10,101,"{'learning_rate': 0.1, 'min_samples_leaf': 10,...",0.52973,0.51932,0.513138,0.502318,0.521638,0.517229,0.009156,5


## CATboost grid_search CROSS_VALIDATION k=5

In [73]:
# Create and fit the thing
CATb = CatBoostClassifier(verbose=False,depth=10,learning_rate=1.0)
CATb.fit(X_train,y_train)
CATb_preds = CATb.predict(X_train)
pd.crosstab(CATb_preds,y_train) # a confusion matrix with ACTUALS as columns and PREDICTIONS as rows

is_under,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,3167,7
1,13,3284


In [74]:
print(f'Accuracy-Train {round(CATb.score(X_train,y_train),4)}')
print(f'Accuracy-Validate {round(CATb.score(X_validate,y_validate),4)}')
print(classification_report(y_train,CATb_preds))
print(classification_report(y_validate,CATb.predict(X_validate)))

Accuracy-Train 0.9969
Accuracy-Validate 0.4992
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3180
           1       1.00      1.00      1.00      3291

    accuracy                           1.00      6471
   macro avg       1.00      1.00      1.00      6471
weighted avg       1.00      1.00      1.00      6471

              precision    recall  f1-score   support

           0       0.49      0.51      0.50      1178
           1       0.51      0.49      0.50      1216

    accuracy                           0.50      2394
   macro avg       0.50      0.50      0.50      2394
weighted avg       0.50      0.50      0.50      2394



In [52]:
param_grid = {
    'verbose': [False],
    'depth': [5, 10, 12],
    'learning_rate': [None,0.1,0.2, 0.5],
}
gr_search = GridSearchCV(CatBoostClassifier(),
                      param_grid)

In [53]:
gr_search

In [54]:
gr_search.fit(X_train, y_train)

KeyError: 'learning_rate'

In [None]:
results = gr_search.cv_results_
results_df_init = pd.DataFrame(results)
results_df_init.shape

In [None]:
params = pd.DataFrame(results['params'])
results_df_init.sort_values(by='rank_test_score').head()

## SCALE for Logistic regression, MLP, etc.

In [75]:
sc_X = StandardScaler()
X_train_scaled = sc_X.fit_transform(X_train)
X_validate_scaled = sc_X.transform(X_validate)
X_test_scaled = sc_X.transform(X_test)

In [76]:
mlp = MLPClassifier(hidden_layer_sizes=(1024,512,128),activation="relu",random_state=2013,
                    batch_size=200,solver='adam')
mlp.fit(X_train_scaled, y_train)
mlp_preds = mlp.predict(X_train_scaled)
print(mlp.score(X_train_scaled, y_train))
pd.crosstab(mlp_preds,y_train) # a confusion matrix with ACTUALS as columns and PREDICTIONS as rows

0.9313861845155308


is_under,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,3025,289
1,155,3002


In [77]:
mlp.n_layers_

5

In [78]:
print(f'Accuracy-Train {round(mlp.score(X_train_scaled,y_train),4)}')
print(f'Accuracy-Validate {round(mlp.score(X_validate_scaled,y_validate),4)}')
print(classification_report(y_train,mlp_preds))
print(classification_report(y_validate,mlp.predict(X_validate_scaled)))

Accuracy-Train 0.9314
Accuracy-Validate 0.5004
              precision    recall  f1-score   support

           0       0.91      0.95      0.93      3180
           1       0.95      0.91      0.93      3291

    accuracy                           0.93      6471
   macro avg       0.93      0.93      0.93      6471
weighted avg       0.93      0.93      0.93      6471

              precision    recall  f1-score   support

           0       0.49      0.53      0.51      1178
           1       0.51      0.47      0.49      1216

    accuracy                           0.50      2394
   macro avg       0.50      0.50      0.50      2394
weighted avg       0.50      0.50      0.50      2394



## KNN grid_search CROSS_VALIDATION k=5

In [79]:
param_grid = {
    'n_neighbors': [70],
    'weights': ['uniform','distance'],
    'algorithm':['ball_tree','brute'],
    'leaf_size': [3,4,5,6,7,8,9],
}
gr_search = GridSearchCV(KNeighborsClassifier(),
                      param_grid)

In [80]:
gr_search

In [81]:
gr_search.fit(X_train, y_train)

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [82]:
results = gr_search.cv_results_
results_df_init = pd.DataFrame(results)
results_df_init.shape

(28, 17)

In [83]:
params = pd.DataFrame(results['params'])
results_df_init.sort_values(by='rank_test_score').head(11)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_algorithm,param_leaf_size,param_n_neighbors,param_weights,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
27,0.000783,1e-05,0.023592,0.00085,brute,9,70,distance,"{'algorithm': 'brute', 'leaf_size': 9, 'n_neig...",0.535135,0.510819,0.509274,0.545595,0.53323,0.526811,0.014329,1
25,0.000789,2e-05,0.024107,0.001406,brute,8,70,distance,"{'algorithm': 'brute', 'leaf_size': 8, 'n_neig...",0.535135,0.510819,0.509274,0.545595,0.53323,0.526811,0.014329,1
23,0.000792,1.4e-05,0.02458,0.001206,brute,7,70,distance,"{'algorithm': 'brute', 'leaf_size': 7, 'n_neig...",0.535135,0.510819,0.509274,0.545595,0.53323,0.526811,0.014329,1
21,0.000804,2.2e-05,0.024106,0.000739,brute,6,70,distance,"{'algorithm': 'brute', 'leaf_size': 6, 'n_neig...",0.535135,0.510819,0.509274,0.545595,0.53323,0.526811,0.014329,1
19,0.000814,1.8e-05,0.024321,0.000554,brute,5,70,distance,"{'algorithm': 'brute', 'leaf_size': 5, 'n_neig...",0.535135,0.510819,0.509274,0.545595,0.53323,0.526811,0.014329,1
17,0.000801,1.6e-05,0.024401,0.000952,brute,4,70,distance,"{'algorithm': 'brute', 'leaf_size': 4, 'n_neig...",0.535135,0.510819,0.509274,0.545595,0.53323,0.526811,0.014329,1
15,0.000808,2.5e-05,0.024721,0.001111,brute,3,70,distance,"{'algorithm': 'brute', 'leaf_size': 3, 'n_neig...",0.535135,0.510819,0.509274,0.545595,0.53323,0.526811,0.014329,1
1,0.002282,9.7e-05,0.046436,0.000578,ball_tree,3,70,distance,"{'algorithm': 'ball_tree', 'leaf_size': 3, 'n_...",0.534363,0.510819,0.510046,0.544822,0.531685,0.526347,0.013718,8
3,0.00224,1.8e-05,0.04637,0.000714,ball_tree,4,70,distance,"{'algorithm': 'ball_tree', 'leaf_size': 4, 'n_...",0.534363,0.510819,0.510046,0.544822,0.531685,0.526347,0.013718,8
5,0.002281,0.000132,0.045752,0.000601,ball_tree,5,70,distance,"{'algorithm': 'ball_tree', 'leaf_size': 5, 'n_...",0.534363,0.510819,0.510046,0.544822,0.531685,0.526347,0.013718,8


## LOG grid_search CROSS_VALIDATION k=5

In [None]:
param_grid = {
    'penalty': ['none','l1','l2','elasticnet'],
    'class_weight': ['balanced',None],
    'max_iter':[100,50,200]
}
gr_search = GridSearchCV(LogisticRegression(),
                      param_grid)

In [None]:
gr_search

In [None]:
gr_search.fit(X_train, y_train)

In [None]:
results = gr_search.cv_results_
results_df_init = pd.DataFrame(results)
results_df_init.shape

In [None]:
params = pd.DataFrame(results['params'])
results_df_init.sort_values(by='rank_test_score').head(11)

## SVM grid_search CROSS_VALIDATION k=5

In [None]:
param_grid = {
    'shrinking': [True,False],
    'C':[1,5,10]
}
gr_search = GridSearchCV(SVC(),
                      param_grid)

In [None]:
gr_search

In [None]:
gr_search.fit(X_train, y_train)

In [None]:
results = gr_search.cv_results_
results_df_init = pd.DataFrame(results)
results_df_init.shape

In [None]:
params = pd.DataFrame(results['params'])
results_df_init.sort_values(by='rank_test_score').head(11)