In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier


# Load Wisconsin Breast Cancer Dataset

In [2]:
data = load_breast_cancer()

# Train/Test Split

In [3]:
X_train, X_test, y_train, y_test = train_test_split(data.data, 1-data.target)

# Tuning AdaBoostClassifier

In [10]:
ada = AdaBoostClassifier()
ada_params = {
    'n_estimators': [45,55,55],
    'learning_rate':[.9, 1.0, 1.1]
}
gs = GridSearchCV(ada, param_grid = ada_params)
gs.fit(X_train, y_train)
print(gs.best_score_)
print(gs.best_params_)



0.953051643192
{'learning_rate': 1.0, 'n_estimators': 55}


In [7]:

gs.score(X_test, y_test)

0.93706293706293708

# GradientBoostingClassifier

In [29]:
#gradient boost is only for decision trees
gb = GradientBoostingClassifier()
gb_params = {
    'loss': ['deviance', 'exponential'],
    'max_depth': [2,3,4],
    'min_samples_leaf': [1,2,3],
    'min_samples_split':[1.0, 0.5]
    
}
gs_gb = GridSearchCV(gb, param_grid = gb_params)
gs_gb.fit(X_train,y_train)
print(gs_gb.best_score_)
print(gs_gb.best_params_)

0.957746478873
{'loss': 'deviance', 'max_depth': 3, 'min_samples_leaf': 3, 'min_samples_split': 0.5}


In [12]:
gs_gb.score(X_test, y_test)

0.97902097902097907

# VotingClassifier - takes a list of models (works similar to a pipeline)

In [34]:
vote = VotingClassifier([
    ('ada', AdaBoostClassifier()),
    ('gb', GradientBoostingClassifier())
], weights = [.75, .25])
#this can be dropped into gridsearch
vote_params = {
    'ada__n_estimators': [50, 75], 
    'gb__n_estimators': [100, 110]
}
gs = GridSearchCV(vote, param_grid = vote_params)
gs.fit(X_train, y_train)
print(gs.best_score_)
print(gs.best_params_)

0.962441314554
{'ada__n_estimators': 50, 'gb__n_estimators': 100}
