In [35]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import imblearn

from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImPipeline

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import normalize,StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge
from sklearn.metrics import log_loss
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier 
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import roc_curve, roc_auc_score


In [4]:
features_df = pd.read_csv('../Data/training_set_features.csv', index_col="respondent_id")
labels_df = pd.read_csv('../Data/training_set_labels.csv', index_col="respondent_id")
joined_df = features_df.join(labels_df, how = 'inner')

In [5]:
joined_df.head()

Unnamed: 0_level_0,h1n1_concern,h1n1_knowledge,behavioral_antiviral_meds,behavioral_avoidance,behavioral_face_mask,behavioral_wash_hands,behavioral_large_gatherings,behavioral_outside_home,behavioral_touch_face,doctor_recc_h1n1,...,rent_or_own,employment_status,hhs_geo_region,census_msa,household_adults,household_children,employment_industry,employment_occupation,h1n1_vaccine,seasonal_vaccine
respondent_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,Own,Not in Labor Force,oxchjgsf,Non-MSA,0.0,0.0,,,0,0
1,3.0,2.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,...,Rent,Employed,bhuqouqj,"MSA, Not Principle City",0.0,0.0,pxcmvdjn,xgwztkwe,0,1
2,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,...,Own,Employed,qufhixun,"MSA, Not Principle City",2.0,0.0,rucpziij,xtkaffoo,0,0
3,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,...,Rent,Not in Labor Force,lrircsnp,"MSA, Principle City",0.0,0.0,,,0,1
4,2.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,...,Own,Employed,qufhixun,"MSA, Not Principle City",1.0,0.0,wxleyezf,emcorrxb,0,0


In [6]:
# an all-in-one data cleaning function. Do this BEFORE OHE
# Maybe this should be a class and worked into the pipeline?
def datacleaner(maindataframe):
    #For dropping whole columns 
    def columndrop(dataframe, column_list):
        dataframe.drop(column_list, axis = 1, inplace=True)
    #For dropping rows with na values
    def basicdropna(dataframe, column_list):
        dataframe.dropna(subset=column_list, inplace=True)
    #For special case imputation
    def impute_missing_data(dataframe, column_list, fillvalue):
        for column in column_list:
            dataframe[column].fillna(fillvalue, inplace = True)
    #This creates a number of lists of columns that fall into a few different 
    #categories, that will be processed in different ways. See notes below on how
    #these choices were made.
    drop_columns =  ['employment_industry',  'employment_occupation']       
        
    general_dropna = ['health_worker', 'education','income_poverty', 'marital_status', 
                    'rent_or_own', 'employment_status', 'household_adults', 
                    'household_children' ]
        
    survey_col = ['opinion_h1n1_vacc_effective', 'opinion_h1n1_risk', 'opinion_h1n1_sick_from_vacc',
         'opinion_seas_vacc_effective', 'opinion_seas_risk','opinion_seas_sick_from_vacc']

    behavior_col = ['behavioral_antiviral_meds', 'behavioral_face_mask',
                'behavioral_large_gatherings','behavioral_outside_home']

    behavior_col_2 = ['behavioral_avoidance', 
                'behavioral_wash_hands','behavioral_touch_face']

    doc_rec = ['doctor_recc_h1n1','doctor_recc_seasonal']
    
    basicdropna(maindataframe, general_dropna)
    columndrop(maindataframe, drop_columns)
    impute_missing_data(maindataframe, survey_col, 3)
    impute_missing_data(maindataframe, ['h1n1_concern'], 2)
    impute_missing_data(maindataframe, ['h1n1_knowledge'], 0)
    impute_missing_data(maindataframe, behavior_col, 0)
    impute_missing_data(maindataframe, behavior_col_2, 1)
    impute_missing_data(maindataframe, doc_rec, 0)
    impute_missing_data(maindataframe, ['chronic_med_condition'], 0)
    impute_missing_data(maindataframe, ['child_under_6_months'], 0)

    

In [7]:
datacleaner(joined_df)

In [None]:
# Finding class imbalances
# Per Google (the company, not the search engine) 
# https://developers.google.com/machine-learning/data-prep/construct/sampling-splitting/imbalanced-data
# Degree of imbalance 	Proportion of Minority Class
# Mild 	20-40% of the data set
# Moderate 	1-20% of the data set
# Extreme 	<1% of the data set

imbal_none = []
imbal_mild = []
imbal_moderate = []
imbal_extreme = []
for column in joined_df.columns:
    if joined_df[column].value_counts().min()/len(joined_df[column]) <= .01:
        imbal_extreme.append(column)
      
    elif joined_df[column].value_counts().min()/len(joined_df[column]) <= .2:
        imbal_moderate.append(column)
        
    elif joined_df[column].value_counts().min()/len(joined_df[column]) <= .4:
        imbal_mild.append(column)
      
    else:
        imbal_none.append(column)

print('Imbalance - None')
print(imbal_none)

print('Imbalance - Mild')
print(imbal_mild)

print('Imbalance - Moderate')
print(imbal_moderate)

print('Imbalance - Extreme')
print(imbal_extreme)

In [None]:
joined_df.opinion_h1n1_sick_from_vacc.value_counts()

In [None]:
joined_df.opinion_seas_sick_from_vacc.value_counts()

In [8]:
X=joined_df.drop(['h1n1_vaccine','seasonal_vaccine'], axis=1)
y=joined_df[['h1n1_vaccine','seasonal_vaccine']]

# Train test split, do this before OHE

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42)

In [None]:
# Alright, at this point, fuck SMOTE
# SMOTE for the extreme imbalances
# sm = imblearn.over_sampling.SMOTEN(sampling_strategy='auto', random_state=42)

#X_train_smitten = sm.fit_resample(X_train, y_train['h1n1_vaccine'])
#X_train_smitten


In [9]:
# create OHE for objects, do this before imputer

cat_col_list = [i for i in X_train.select_dtypes(include='object').columns]

# Fits OHE on a subset of columns, then reintegrates them into the
# Origional dataframe. Do this after initial cleaning, before 
# health insurace imputation.
nb_list_for_ohe = ['h1n1_concern', 'h1n1_knowledge', 'opinion_h1n1_vacc_effective',
'opinion_h1n1_risk', 'opinion_h1n1_sick_from_vacc', 'opinion_seas_vacc_effective',
'opinion_seas_risk', 'opinion_seas_sick_from_vacc']

ohe = OneHotEncoder(drop='first', sparse=False)

def fit_trans_ohe(X_dataframe, columns):
        
    dums = ohe.fit_transform(X_dataframe[columns])
    dums_df = pd.DataFrame(dums,
                       columns=ohe.get_feature_names(),
                       index=X_dataframe.index)
    df_cat_dropped = X_dataframe.drop(cat_col_list, axis = 1)
    dums_df_concated = pd.concat([df_cat_dropped, dums_df], axis=1)
    return dums_df_concated

#We should end up with a fitted ohe instance called 'ohe'

In [10]:
X_train_ohe = fit_trans_ohe(X_train, cat_col_list+nb_list_for_ohe)

print(X_train_ohe.shape)
X_train_ohe.head()

(15304, 89)


Unnamed: 0_level_0,h1n1_concern,h1n1_knowledge,behavioral_antiviral_meds,behavioral_avoidance,behavioral_face_mask,behavioral_wash_hands,behavioral_large_gatherings,behavioral_outside_home,behavioral_touch_face,doctor_recc_h1n1,...,x17_5.0,x18_bhuqouqj,x18_dqpwygqj,x18_fpwskwrf,x18_kbazzjca,x18_lrircsnp,x18_lzgpxyit,x18_mlyzmhmf,x18_oxchjgsf,x18_qufhixun
respondent_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
26356,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1925,2.0,2.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2668,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
5325,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
841,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [12]:
X_train_ohe.columns

Index(['h1n1_concern', 'h1n1_knowledge', 'behavioral_antiviral_meds',
       'behavioral_avoidance', 'behavioral_face_mask', 'behavioral_wash_hands',
       'behavioral_large_gatherings', 'behavioral_outside_home',
       'behavioral_touch_face', 'doctor_recc_h1n1', 'doctor_recc_seasonal',
       'chronic_med_condition', 'child_under_6_months', 'health_worker',
       'health_insurance', 'opinion_h1n1_vacc_effective', 'opinion_h1n1_risk',
       'opinion_h1n1_sick_from_vacc', 'opinion_seas_vacc_effective',
       'opinion_seas_risk', 'opinion_seas_sick_from_vacc', 'household_adults',
       'household_children', 'x0_35 - 44 Years', 'x0_45 - 54 Years',
       'x0_55 - 64 Years', 'x0_65+ Years', 'x1_< 12 Years',
       'x1_College Graduate', 'x1_Some College', 'x2_Hispanic',
       'x2_Other or Multiple', 'x2_White', 'x3_Male', 'x4_> $75,000',
       'x4_Below Poverty', 'x5_Not Married', 'x6_Rent',
       'x7_Not in Labor Force', 'x7_Unemployed', 'x8_bhuqouqj', 'x8_dqpwygqj',
       'x8_

In [13]:
# Fitting an imputer for Health Insurance using all features, 
# pulling from a dataframe that has already been OneHotEncoded

#ks_h_i_imputer_knn = KNNImputer()

#def kitchen_sink_KNN_imputer(encoder, dataframe):
#    output_df = pd.DataFrame(encoder.fit_transform(dataframe), 
#                                         columns = dataframe.columns,
#                                        index=dataframe.index)
#    output_df.health_insurance = output_df.health_insurance.round() 
#    return output_df
    
    
socio_economic_column_list = ["x0_35 - 44 Years","x0_45 - 54 Years","x0_55 - 64 Years","x0_65+ Years",
                              "x1_< 12 Years","x1_College Graduate","x1_Some College","x2_Hispanic",
                              "x2_Other or Multiple","x2_White","x3_Male", "x4_> $75,000", "x4_Below Poverty",
                              "x5_Not Married", "x6_Rent", "x7_Not in Labor Force","x7_Unemployed",
                              "x9_MSA, Principle City",'x9_Non-MSA', 'health_insurance']

# Fitting an imputer for Health Insurance using socio-economic features, 
# pulling from a dataframe that has already been OneHotEncoded


soc_eco_h_i_imputer_knn = KNNImputer()

def soc_eco_KNN_imputer(imputer, dataframe, column_list):
    soc_econ_base = dataframe[column_list]
    soc_econ_imputed = pd.DataFrame(imputer.fit_transform(soc_econ_base), 
                                         columns = soc_econ_base.columns,
                                        index=soc_econ_base.index)
    remainder_df = dataframe.drop(column_list, axis = 1)
    output_df = remainder_df.join(soc_econ_imputed)
    output_df.health_insurance = output_df.health_insurance.round() 

    return output_df


X_train_imputed = soc_eco_KNN_imputer(soc_eco_h_i_imputer_knn, X_train_ohe, socio_economic_column_list)




In [17]:
print(X_train_imputed.health_insurance.value_counts(normalize=True))
X_train.health_insurance.value_counts(normalize= True)

1.0    0.912507
0.0    0.087493
Name: health_insurance, dtype: float64


1.0    0.882751
0.0    0.117249
Name: health_insurance, dtype: float64

In [None]:
# Status check: 
#    At this point we have: 
#        X train data that is OHE encoded and has no missing values
#        An imputer trained on X train
#    We need to:
#        To OHE X test data with from X train - check
#        To transform X test data with imputer from X train - check
#        To scale/normalize. Presumably fit with train data
#    Then we can:
#        Throw X train into models, score via cvs
#    Finally, validate good models vs. X test.

In [18]:
# The OHE for the test set only, takes X test dataframe and list of columns to encoded:
def trans_ohe(X_dataframe, columns):
    dums = ohe.transform(X_dataframe[columns])
    dums_df = pd.DataFrame(dums,
                       columns=ohe.get_feature_names(),
                       index=X_dataframe.index)
    df_cat_dropped = X_dataframe.drop(cat_col_list, axis = 1)
    dums_df_concated = pd.concat([df_cat_dropped, dums_df], axis=1)
    return dums_df_concated


In [19]:
X_test_ohe = trans_ohe(X_test, cat_col_list+nb_list_for_ohe)


In [20]:
# For use on test data
# Takes a previously fit imputer, a dataframe, and a previously established list of columns

def imputer_transform_only(imputer, dataframe, column_list):
    soc_econ_base = dataframe[column_list]
    soc_econ_imputed = pd.DataFrame(imputer.transform(soc_econ_base), 
                                         columns = soc_econ_base.columns,
                                        index=soc_econ_base.index)
    remainder_df = dataframe.drop(column_list, axis = 1)
    output_df = remainder_df.join(soc_econ_imputed)
    output_df.health_insurance = output_df.health_insurance.round()
    
    return output_df

X_test_imputed = imputer_transform_only(soc_eco_h_i_imputer_knn, X_test_ohe, socio_economic_column_list)

In [21]:
X_test_imputed.health_insurance.value_counts()

1.0    5966
0.0     593
Name: health_insurance, dtype: int64

In [22]:
X_test_imputed.shape

(6559, 89)

In [23]:
X_train_imputed.shape

(15304, 89)

In [24]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier




In [25]:

def adadboost(train_feature, train_target, n_est, learn_rate):
    model = AdaBoostClassifier(random_state = 14, n_estimators= n_est, learning_rate = learn_rate)
    model.fit(train_feature, train_target)
    output = cross_val_score(model, train_feature, train_target, scoring= 'roc_auc')
    return output

In [28]:
#baby's first GridSearch
testH1N11 = adadboost(X_train_imputed, y_train['h1n1_vaccine'], 100, .1)
print(testH1N11)
testH1N12 = adadboost(X_train_imputed, y_train['h1n1_vaccine'], 200, .5)
print(testH1N12)
testH1N13 = adadboost(X_train_imputed, y_train['h1n1_vaccine'], 300, 1)
print(testH1N13)
testseasonal1 = adadboost(X_train_imputed, y_train['seasonal_vaccine'], 100, .1)
print(testseasonal1)
testseasonal2 = adadboost(X_train_imputed, y_train['seasonal_vaccine'], 200, .5)
print(testseasonal2)
testseasonal3 = adadboost(X_train_imputed, y_train['seasonal_vaccine'], 300, 1)
print(testseasonal3)

[0.83573531 0.83087276 0.84293032 0.81942751 0.83888796]
[0.83919659 0.83738963 0.85304496 0.82410556 0.84515167]
[0.8386236  0.83733239 0.85239082 0.82389925 0.84465214]
[0.85055471 0.85145744 0.83818033 0.85682753 0.84606888]
[0.85525718 0.85648439 0.84975936 0.86217682 0.85686331]
[0.85505914 0.85663356 0.84951974 0.86223812 0.85658964]


In [29]:
print(testH1N11.mean())
print(testH1N12.mean())
print(testH1N13.mean())
print(testseasonal1.mean())
print(testseasonal2.mean())
print(testseasonal3.mean())

0.8335707722520553
0.8397776828859087
0.8393796396938275
0.8486177775306581
0.8561082121375309
0.8560080420907805


In [30]:
gbc = GradientBoostingClassifier()

In [31]:
def gbcmodel(train_feature, train_target):
    model = GradientBoostingClassifier(random_state = 14)
    model.fit(train_feature, train_target)
    output = cross_val_score(model, train_feature, train_target, scoring='roc_auc')
    print(f'Mean of cross val scores ={output.mean()}')
    return output

In [32]:
gbcdefaultH1N1 = gbcmodel(X_train_imputed, y_train['h1n1_vaccine'])
print(gbcdefaultH1N1)
gbcdefaultseasonal = gbcmodel(X_train_imputed, y_train['seasonal_vaccine'])
print(gbcdefaultseasonal)

Mean of cross val scores =0.841838564245665
[0.84342985 0.83802176 0.85469069 0.82420431 0.84884622]
Mean of cross val scores =0.8580574434539189
[0.85969732 0.85773669 0.84905723 0.86598513 0.85781085]


In [39]:
gbcgriddict ={
    'loss': ['deviance','exponential'],
    'learning_rate': [0.1,.3,.5],
    'n_estimators': [100, 200, 500],
    'max_depth': [3,5,7],
    'random_state': [14],
    'max_features': ['auto', 'sqrt', 'log2']}

In [43]:
gbcgrid = GridSearchCV(gbc, param_grid= gbcgriddict, scoring= 'roc_auc', verbose = 3)

In [44]:
gbcgrid.fit(X_train_imputed, y_train['h1n1_vaccine'])

Fitting 5 folds for each of 162 candidates, totalling 810 fits
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=100, random_state=14 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=100, random_state=14, score=0.843, total=   2.1s
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=100, random_state=14 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.0s remaining:    0.0s


[CV]  learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=100, random_state=14, score=0.839, total=   2.0s
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=100, random_state=14 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    4.0s remaining:    0.0s


[CV]  learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=100, random_state=14, score=0.844, total=   2.1s
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=100, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=100, random_state=14, score=0.832, total=   2.1s
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=100, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=100, random_state=14, score=0.838, total=   2.1s
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=200, random_state=14, score=0.845, total=   4.1s
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, 

[CV]  learning_rate=0.1, loss=deviance, max_depth=3, max_features=log2, n_estimators=200, random_state=14, score=0.840, total=   0.6s
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=log2, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=3, max_features=log2, n_estimators=200, random_state=14, score=0.831, total=   0.6s
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=log2, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=3, max_features=log2, n_estimators=200, random_state=14, score=0.836, total=   0.6s
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=3, max_features=log2, n_estimators=500, random_state=14, score=0.847, total=   1.5s
[CV] learning_rate=0.1, loss=deviance, max_depth=3, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, 

[CV]  learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, n_estimators=500, random_state=14, score=0.839, total=   2.9s
[CV] learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, n_estimators=500, random_state=14, score=0.826, total=   2.9s
[CV] learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, n_estimators=500, random_state=14, score=0.835, total=   2.9s
[CV] learning_rate=0.1, loss=deviance, max_depth=5, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=5, max_features=log2, n_estimators=100, random_state=14, score=0.846, total=   0.5s
[CV] learning_rate=0.1, loss=deviance, max_depth=5, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, 

[CV]  learning_rate=0.1, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=100, random_state=14, score=0.839, total=   1.1s
[CV] learning_rate=0.1, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=100, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=100, random_state=14, score=0.827, total=   1.1s
[CV] learning_rate=0.1, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=100, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=100, random_state=14, score=0.836, total=   1.1s
[CV] learning_rate=0.1, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14, score=0.837, total=   2.2s
[CV] learning_rate=0.1, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=deviance, 

[CV]  learning_rate=0.1, loss=exponential, max_depth=3, max_features=auto, n_estimators=200, random_state=14, score=0.836, total=   4.1s
[CV] learning_rate=0.1, loss=exponential, max_depth=3, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=3, max_features=auto, n_estimators=200, random_state=14, score=0.843, total=   4.1s
[CV] learning_rate=0.1, loss=exponential, max_depth=3, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=3, max_features=auto, n_estimators=200, random_state=14, score=0.833, total=   4.1s
[CV] learning_rate=0.1, loss=exponential, max_depth=3, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=3, max_features=auto, n_estimators=200, random_state=14, score=0.837, total=   4.1s
[CV] learning_rate=0.1, loss=exponential, max_depth=3, max_features=auto, n_estimators=500, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.1, loss=exponential, max_depth=3, max_features=log2, n_estimators=500, random_state=14, score=0.845, total=   1.4s
[CV] learning_rate=0.1, loss=exponential, max_depth=3, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=3, max_features=log2, n_estimators=500, random_state=14, score=0.839, total=   1.4s
[CV] learning_rate=0.1, loss=exponential, max_depth=3, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=3, max_features=log2, n_estimators=500, random_state=14, score=0.844, total=   1.4s
[CV] learning_rate=0.1, loss=exponential, max_depth=3, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=3, max_features=log2, n_estimators=500, random_state=14, score=0.836, total=   1.4s
[CV] learning_rate=0.1, loss=exponential, max_depth=3, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.1, loss=exponential, max_depth=5, max_features=sqrt, n_estimators=500, random_state=14, score=0.839, total=   3.0s
[CV] learning_rate=0.1, loss=exponential, max_depth=5, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=5, max_features=log2, n_estimators=100, random_state=14, score=0.846, total=   0.5s
[CV] learning_rate=0.1, loss=exponential, max_depth=5, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=5, max_features=log2, n_estimators=100, random_state=14, score=0.832, total=   0.5s
[CV] learning_rate=0.1, loss=exponential, max_depth=5, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=5, max_features=log2, n_estimators=100, random_state=14, score=0.840, total=   0.5s
[CV] learning_rate=0.1, loss=exponential, max_depth=5, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.1, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=100, random_state=14, score=0.828, total=   1.1s
[CV] learning_rate=0.1, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=100, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=100, random_state=14, score=0.837, total=   1.1s
[CV] learning_rate=0.1, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14, score=0.840, total=   2.1s
[CV] learning_rate=0.1, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14, score=0.831, total=   2.1s
[CV] learning_rate=0.1, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.3, loss=deviance, max_depth=3, max_features=auto, n_estimators=200, random_state=14, score=0.842, total=   4.0s
[CV] learning_rate=0.3, loss=deviance, max_depth=3, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=3, max_features=auto, n_estimators=200, random_state=14, score=0.830, total=   4.0s
[CV] learning_rate=0.3, loss=deviance, max_depth=3, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=3, max_features=auto, n_estimators=200, random_state=14, score=0.842, total=   4.0s
[CV] learning_rate=0.3, loss=deviance, max_depth=3, max_features=auto, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=3, max_features=auto, n_estimators=500, random_state=14, score=0.832, total=  10.0s
[CV] learning_rate=0.3, loss=deviance, max_depth=3, max_features=auto, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, 

[CV]  learning_rate=0.3, loss=deviance, max_depth=3, max_features=log2, n_estimators=500, random_state=14, score=0.835, total=   1.4s
[CV] learning_rate=0.3, loss=deviance, max_depth=3, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=3, max_features=log2, n_estimators=500, random_state=14, score=0.828, total=   1.4s
[CV] learning_rate=0.3, loss=deviance, max_depth=3, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=3, max_features=log2, n_estimators=500, random_state=14, score=0.833, total=   1.4s
[CV] learning_rate=0.3, loss=deviance, max_depth=5, max_features=auto, n_estimators=100, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=5, max_features=auto, n_estimators=100, random_state=14, score=0.830, total=   3.4s
[CV] learning_rate=0.3, loss=deviance, max_depth=5, max_features=auto, n_estimators=100, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, 

[CV]  learning_rate=0.3, loss=deviance, max_depth=5, max_features=log2, n_estimators=100, random_state=14, score=0.830, total=   0.5s
[CV] learning_rate=0.3, loss=deviance, max_depth=5, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=5, max_features=log2, n_estimators=100, random_state=14, score=0.824, total=   0.5s
[CV] learning_rate=0.3, loss=deviance, max_depth=5, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=5, max_features=log2, n_estimators=100, random_state=14, score=0.830, total=   0.5s
[CV] learning_rate=0.3, loss=deviance, max_depth=5, max_features=log2, n_estimators=200, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=5, max_features=log2, n_estimators=200, random_state=14, score=0.830, total=   1.0s
[CV] learning_rate=0.3, loss=deviance, max_depth=5, max_features=log2, n_estimators=200, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, 

[CV]  learning_rate=0.3, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14, score=0.827, total=   2.1s
[CV] learning_rate=0.3, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14, score=0.822, total=   2.2s
[CV] learning_rate=0.3, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14, score=0.819, total=   2.2s
[CV] learning_rate=0.3, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14, score=0.823, total=   5.4s
[CV] learning_rate=0.3, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=deviance, 

[CV]  learning_rate=0.3, loss=exponential, max_depth=3, max_features=auto, n_estimators=500, random_state=14, score=0.823, total=  11.0s
[CV] learning_rate=0.3, loss=exponential, max_depth=3, max_features=auto, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=3, max_features=auto, n_estimators=500, random_state=14, score=0.835, total=  10.6s
[CV] learning_rate=0.3, loss=exponential, max_depth=3, max_features=auto, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=3, max_features=auto, n_estimators=500, random_state=14, score=0.824, total=  10.5s
[CV] learning_rate=0.3, loss=exponential, max_depth=3, max_features=auto, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=3, max_features=auto, n_estimators=500, random_state=14, score=0.834, total=  10.6s
[CV] learning_rate=0.3, loss=exponential, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.3, loss=exponential, max_depth=5, max_features=auto, n_estimators=100, random_state=14, score=0.836, total=   3.5s
[CV] learning_rate=0.3, loss=exponential, max_depth=5, max_features=auto, n_estimators=100, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=5, max_features=auto, n_estimators=100, random_state=14, score=0.832, total=   3.5s
[CV] learning_rate=0.3, loss=exponential, max_depth=5, max_features=auto, n_estimators=100, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=5, max_features=auto, n_estimators=100, random_state=14, score=0.837, total=   3.5s
[CV] learning_rate=0.3, loss=exponential, max_depth=5, max_features=auto, n_estimators=100, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=5, max_features=auto, n_estimators=100, random_state=14, score=0.821, total=   3.5s
[CV] learning_rate=0.3, loss=exponential, max_depth=5, max_features=auto, n_estimators=100, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.3, loss=exponential, max_depth=5, max_features=log2, n_estimators=100, random_state=14, score=0.834, total=   0.5s
[CV] learning_rate=0.3, loss=exponential, max_depth=5, max_features=log2, n_estimators=200, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=5, max_features=log2, n_estimators=200, random_state=14, score=0.837, total=   1.0s
[CV] learning_rate=0.3, loss=exponential, max_depth=5, max_features=log2, n_estimators=200, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=5, max_features=log2, n_estimators=200, random_state=14, score=0.824, total=   1.0s
[CV] learning_rate=0.3, loss=exponential, max_depth=5, max_features=log2, n_estimators=200, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=5, max_features=log2, n_estimators=200, random_state=14, score=0.836, total=   1.0s
[CV] learning_rate=0.3, loss=exponential, max_depth=5, max_features=log2, n_estimators=200, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.3, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14, score=0.818, total=   2.3s
[CV] learning_rate=0.3, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=200, random_state=14, score=0.825, total=   2.3s
[CV] learning_rate=0.3, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14, score=0.819, total=   5.7s
[CV] learning_rate=0.3, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14 
[CV]  learning_rate=0.3, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14, score=0.821, total=   5.6s
[CV] learning_rate=0.3, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.5, loss=deviance, max_depth=3, max_features=auto, n_estimators=500, random_state=14, score=0.826, total=  10.4s
[CV] learning_rate=0.5, loss=deviance, max_depth=3, max_features=auto, n_estimators=500, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=3, max_features=auto, n_estimators=500, random_state=14, score=0.816, total=  10.2s
[CV] learning_rate=0.5, loss=deviance, max_depth=3, max_features=auto, n_estimators=500, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=3, max_features=auto, n_estimators=500, random_state=14, score=0.827, total=  10.3s
[CV] learning_rate=0.5, loss=deviance, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14, score=0.842, total=   0.4s
[CV] learning_rate=0.5, loss=deviance, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, 

[CV]  learning_rate=0.5, loss=deviance, max_depth=5, max_features=auto, n_estimators=100, random_state=14, score=0.822, total=   3.4s
[CV] learning_rate=0.5, loss=deviance, max_depth=5, max_features=auto, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=5, max_features=auto, n_estimators=100, random_state=14, score=0.811, total=   3.4s
[CV] learning_rate=0.5, loss=deviance, max_depth=5, max_features=auto, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=5, max_features=auto, n_estimators=100, random_state=14, score=0.825, total=   3.4s
[CV] learning_rate=0.5, loss=deviance, max_depth=5, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=5, max_features=auto, n_estimators=200, random_state=14, score=0.817, total=   6.7s
[CV] learning_rate=0.5, loss=deviance, max_depth=5, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, 

[CV]  learning_rate=0.5, loss=deviance, max_depth=5, max_features=log2, n_estimators=200, random_state=14, score=0.829, total=   1.0s
[CV] learning_rate=0.5, loss=deviance, max_depth=5, max_features=log2, n_estimators=200, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=5, max_features=log2, n_estimators=200, random_state=14, score=0.805, total=   1.0s
[CV] learning_rate=0.5, loss=deviance, max_depth=5, max_features=log2, n_estimators=200, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=5, max_features=log2, n_estimators=200, random_state=14, score=0.820, total=   1.0s
[CV] learning_rate=0.5, loss=deviance, max_depth=5, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=5, max_features=log2, n_estimators=500, random_state=14, score=0.811, total=   2.5s
[CV] learning_rate=0.5, loss=deviance, max_depth=5, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, 

[CV]  learning_rate=0.5, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14, score=0.826, total=   5.3s
[CV] learning_rate=0.5, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14, score=0.811, total=   5.3s
[CV] learning_rate=0.5, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14, score=0.823, total=   5.3s
[CV] learning_rate=0.5, loss=deviance, max_depth=7, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, max_depth=7, max_features=log2, n_estimators=100, random_state=14, score=0.812, total=   0.9s
[CV] learning_rate=0.5, loss=deviance, max_depth=7, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=deviance, 

[CV]  learning_rate=0.5, loss=exponential, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14, score=0.834, total=   0.4s
[CV] learning_rate=0.5, loss=exponential, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14, score=0.843, total=   0.4s
[CV] learning_rate=0.5, loss=exponential, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14, score=0.834, total=   0.4s
[CV] learning_rate=0.5, loss=exponential, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=3, max_features=sqrt, n_estimators=100, random_state=14, score=0.836, total=   0.4s
[CV] learning_rate=0.5, loss=exponential, max_depth=3, max_features=sqrt, n_estimators=200, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.5, loss=exponential, max_depth=5, max_features=auto, n_estimators=200, random_state=14, score=0.816, total=   6.6s
[CV] learning_rate=0.5, loss=exponential, max_depth=5, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=5, max_features=auto, n_estimators=200, random_state=14, score=0.817, total=   6.7s
[CV] learning_rate=0.5, loss=exponential, max_depth=5, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=5, max_features=auto, n_estimators=200, random_state=14, score=0.826, total=   6.6s
[CV] learning_rate=0.5, loss=exponential, max_depth=5, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=5, max_features=auto, n_estimators=200, random_state=14, score=0.813, total=   6.6s
[CV] learning_rate=0.5, loss=exponential, max_depth=5, max_features=auto, n_estimators=200, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.5, loss=exponential, max_depth=5, max_features=log2, n_estimators=200, random_state=14, score=0.826, total=   1.0s
[CV] learning_rate=0.5, loss=exponential, max_depth=5, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=5, max_features=log2, n_estimators=500, random_state=14, score=0.817, total=   2.4s
[CV] learning_rate=0.5, loss=exponential, max_depth=5, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=5, max_features=log2, n_estimators=500, random_state=14, score=0.814, total=   2.4s
[CV] learning_rate=0.5, loss=exponential, max_depth=5, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=5, max_features=log2, n_estimators=500, random_state=14, score=0.812, total=   2.5s
[CV] learning_rate=0.5, loss=exponential, max_depth=5, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.5, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14, score=0.811, total=   5.4s
[CV] learning_rate=0.5, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=7, max_features=sqrt, n_estimators=500, random_state=14, score=0.822, total=   5.4s
[CV] learning_rate=0.5, loss=exponential, max_depth=7, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=7, max_features=log2, n_estimators=100, random_state=14, score=0.818, total=   0.9s
[CV] learning_rate=0.5, loss=exponential, max_depth=7, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_rate=0.5, loss=exponential, max_depth=7, max_features=log2, n_estimators=100, random_state=14, score=0.819, total=   0.9s
[CV] learning_rate=0.5, loss=exponential, max_depth=7, max_features=log2, n_estimators=100, random_state=14 
[CV]  learning_r

[Parallel(n_jobs=1)]: Done 810 out of 810 | elapsed: 56.7min finished


GridSearchCV(estimator=GradientBoostingClassifier(),
             param_grid={'learning_rate': [0.1, 0.3, 0.5],
                         'loss': ['deviance', 'exponential'],
                         'max_depth': [3, 5, 7],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'n_estimators': [100, 200, 500],
                         'random_state': [14]},
             verbose=3)

In [45]:
gbcgrid.best_params_

{'learning_rate': 0.1,
 'loss': 'exponential',
 'max_depth': 3,
 'max_features': 'log2',
 'n_estimators': 500,
 'random_state': 14}

In [50]:
new_param_grid = {'learning_rate': [0.1, .05],
 'loss': ['exponential'],
 'max_depth': [1,3],
 'max_features': ['log2'],
 'n_estimators': [500,700,900],
 'random_state': [14]}

In [51]:
best_from_the_grid_grid = GridSearchCV(gbc, param_grid= new_param_grid, scoring= 'roc_auc', verbose = 3)

In [52]:
best_from_the_grid_grid.fit(X_train_imputed, y_train['h1n1_vaccine'])

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14, score=0.838, total=   0.7s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.6s remaining:    0.0s


[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14, score=0.836, total=   0.7s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.3s remaining:    0.0s


[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14, score=0.851, total=   0.7s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14, score=0.823, total=   0.7s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14, score=0.842, total=   0.7s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14, score=0.839, total=   0.9s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14, score=0.834, total=   0.9s
[CV] learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14 
[CV]  learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14, score=0.850, total=   0.9s
[CV] learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14 
[CV]  learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14, score=0.821, total=   0.9s
[CV] learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14 
[CV]  learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14, score=0.840, total=   0.9s
[CV] learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=900, random_state=14 
[CV]  le

[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:  1.5min finished


GridSearchCV(estimator=GradientBoostingClassifier(),
             param_grid={'learning_rate': [0.1, 0.05], 'loss': ['exponential'],
                         'max_depth': [1, 3], 'max_features': ['log2'],
                         'n_estimators': [500, 700, 900],
                         'random_state': [14]},
             scoring='roc_auc', verbose=3)

In [53]:
best_from_the_grid_grid.best_params_

{'learning_rate': 0.05,
 'loss': 'exponential',
 'max_depth': 3,
 'max_features': 'log2',
 'n_estimators': 700,
 'random_state': 14}

In [57]:
final_h1n1_GD_model = GradientBoostingClassifier(loss= 'exponential', learning_rate= .05, random_state = 14,
                                                max_depth=3, max_features='log2', n_estimators=700)

In [58]:
final_h1n1_GD_model.fit(X_train_imputed, y_train['h1n1_vaccine'])

GradientBoostingClassifier(learning_rate=0.05, loss='exponential',
                           max_features='log2', n_estimators=700,
                           random_state=14)

In [60]:
gbccvs = cross_val_score(final_h1n1_GD_model, X_train_imputed, y_train['h1n1_vaccine'], scoring='roc_auc')

In [61]:
gbccvs.mean()

0.8432044849512603

In [63]:
seasonalgbcgrid = GridSearchCV(gbc, param_grid = new_param_grid, scoring= 'roc_auc', verbose = 3)

In [66]:
seasonalgbcgrid.fit(X_train_imputed, y_train['seasonal_vaccine'])

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14, score=0.854, total=   0.7s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.6s remaining:    0.0s


[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14, score=0.855, total=   0.7s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.3s remaining:    0.0s


[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14, score=0.849, total=   0.7s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14, score=0.863, total=   0.7s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=500, random_state=14, score=0.856, total=   0.7s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14 
[CV]  learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14, score=0.855, total=   0.9s
[CV] learning_rate=0.1, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14 
[CV]  learning_r

[CV]  learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14, score=0.854, total=   0.9s
[CV] learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14 
[CV]  learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14, score=0.847, total=   0.9s
[CV] learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14 
[CV]  learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14, score=0.862, total=   0.9s
[CV] learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14 
[CV]  learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=700, random_state=14, score=0.855, total=   0.9s
[CV] learning_rate=0.05, loss=exponential, max_depth=1, max_features=log2, n_estimators=900, random_state=14 
[CV]  le

[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:  1.4min finished


GridSearchCV(estimator=GradientBoostingClassifier(),
             param_grid={'learning_rate': [0.1, 0.05], 'loss': ['exponential'],
                         'max_depth': [1, 3], 'max_features': ['log2'],
                         'n_estimators': [500, 700, 900],
                         'random_state': [14]},
             scoring='roc_auc', verbose=3)

In [68]:
seasonalgbcgrid.best_params_

{'learning_rate': 0.05,
 'loss': 'exponential',
 'max_depth': 3,
 'max_features': 'log2',
 'n_estimators': 700,
 'random_state': 14}

In [69]:
seasonalgbcbest = GradientBoostingClassifier(loss= 'exponential', learning_rate= .05, random_state = 14,
                                                max_depth=3, max_features='log2', n_estimators=700)

In [70]:
seasonalgbcbest.fit(X_train_imputed, y_train['seasonal_vaccine'])

GradientBoostingClassifier(learning_rate=0.05, loss='exponential',
                           max_features='log2', n_estimators=700,
                           random_state=14)

In [71]:
gbccvssv = cross_val_score(seasonalgbcbest, X_train_imputed, y_train['seasonal_vaccine'], scoring='roc_auc')

In [73]:
gbccvssv.mean()

0.8587753631525341