# Importing functions

In [1]:
from common import load_image, show_image, load_data, split_data, validation_scores

In [14]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline

# Load data
df = load_data()

# Split data
X_train, X_test, y_train, y_test = split_data(df)
# Preview shape of split data
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(5335, 256) (5335,) (1334, 256) (1334,)


## Standard Scaling, Bagging Ensemble

In [12]:
## Simple example of running an experiment
# Create a pipeline
pipe_standard_bagging = make_pipeline(
    StandardScaler(),
    BaggingClassifier(DecisionTreeClassifier(random_state=0), random_state=0)
)
# Define parameters and options
param_grid = {
    'baggingclassifier__estimator__max_depth': [1, 2, 3, 4, 5]
}
# Get results for grid search with each of the 3 required scoring metrics 
results = validation_scores(pipe_standard_bagging, param_grid, X_train, y_train)
# Take a look at the validation results
print('Validation results:')
print(results)



Validation results:
{'accuracy':    mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0       0.766201      0.015764         0.018084        0.000398   
1       1.455849      0.022725         0.018587        0.001676   
2       2.159789      0.029079         0.018826        0.000298   
3       3.004223      0.073618         0.019327        0.001132   
4       3.768255      0.098088         0.022448        0.004612   

   param_baggingclassifier__estimator__max_depth  \
0                                              1   
1                                              2   
2                                              3   
3                                              4   
4                                              5   

                                           params  split0_test_score  \
0  {'baggingclassifier__estimator__max_depth': 1}           0.483508   
1  {'baggingclassifier__estimator__max_depth': 2}           0.884558   
2  {'baggingclassifier__estimator__

## Minmax Scaling, Bagging Ensemble

In [None]:
pipe_minmax_bagging = make_pipeline(
    MinMaxScaler(),
    BaggingClassifier(DecisionTreeClassifier(random_state=0), random_state=0)
)

param_grid = {
    'baggingclassifier__estimator__max_depth': [1, 2, 3, 4, 5]
}

results = validation_scores(pipe_minmax_bagging, param_grid, X_train, y_train)

print('Validation results:')
print(results)



Validation results:
{'accuracy':    mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0       0.799149      0.008396         0.018544        0.002638   
1       1.528467      0.057602         0.017985        0.004623   
2       2.234790      0.061774         0.020761        0.005306   
3       2.926166      0.026455         0.017296        0.003347   
4       3.581940      0.030318         0.017532        0.004345   

   param_baggingclassifier__estimator__max_depth  \
0                                              1   
1                                              2   
2                                              3   
3                                              4   
4                                              5   

                                           params  split0_test_score  \
0  {'baggingclassifier__estimator__max_depth': 1}           0.483508   
1  {'baggingclassifier__estimator__max_depth': 2}           0.884558   
2  {'baggingclassifier__estimator__

## MinMax Scaling, Adaboost Ensemble

In [18]:
pipe_minmax_adaboost = make_pipeline(
    MinMaxScaler(),
    AdaBoostClassifier(DecisionTreeClassifier(random_state=0), random_state=0)
)

param_grid = {
    'adaboostclassifier__estimator__max_depth': [1, 2, 3, 4, 5]
}

results = validation_scores(pipe_minmax_adaboost, param_grid, X_train, y_train)

print('Validation results:')
print(results)



Validation results:
{'accuracy':    mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0       6.327216      0.156044         0.039535        0.001590   
1      12.431345      0.160781         0.042543        0.002702   
2      18.461126      0.214114         0.040453        0.002504   
3      24.878528      0.640188         0.052767        0.016835   
4      37.424052      1.833996         0.057637        0.008563   

   param_adaboostclassifier__estimator__max_depth  \
0                                               1   
1                                               2   
2                                               3   
3                                               4   
4                                               5   

                                            params  split0_test_score  \
0  {'adaboostclassifier__estimator__max_depth': 1}           0.624438   
1  {'adaboostclassifier__estimator__max_depth': 2}           0.740630   
2  {'adaboostclassifier__e

## Standard Scaler, Adaboost Ensemble

In [19]:
pipe_standard_adaboost = make_pipeline(
    StandardScaler(),
    AdaBoostClassifier(DecisionTreeClassifier(random_state=0), random_state=0)
)

param_grid = {
    'adaboostclassifier__estimator__max_depth': [1, 2, 3, 4, 5]
}

results = validation_scores(pipe_standard_adaboost, param_grid, X_train, y_train)

print('Validation results:')
print(results)



Validation results:
{'accuracy':    mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0       6.469312      0.052822         0.045915        0.004332   
1      12.895718      0.191243         0.058071        0.020100   
2      19.300928      0.417355         0.046103        0.004290   
3      25.297743      0.574914         0.044304        0.005515   
4      30.848286      0.184355         0.047210        0.004262   

   param_adaboostclassifier__estimator__max_depth  \
0                                               1   
1                                               2   
2                                               3   
3                                               4   
4                                               5   

                                            params  split0_test_score  \
0  {'adaboostclassifier__estimator__max_depth': 1}           0.624438   
1  {'adaboostclassifier__estimator__max_depth': 2}           0.740630   
2  {'adaboostclassifier__e