In [2]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split


In [3]:
SEED = 1

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, stratify = y, reandom_state = SEED)

In [4]:
dt = DecisionTreeClassifier(max_depth = 1, random_state = SEED)

In [6]:
adb_clf = AdaBoostClassifier(base_estimator = dt, n_estimators = 100 # consists of 100 decision_stumps)

In [None]:
adb_clf.fit(X_train, y_train)

In [None]:
# Predict the probability of a positive class
y_pred_proba = adb_clf.predict_proba(X_test)[:,1] # This enables you to evaluate the ROC_AUC score

In [None]:
# Evaluate test-set roc_auc_score
adb_clf_roc_auc_score = roc_auc_score(y_test, y_pred_proba)

In [None]:
# print adb_clf_roc_auc_score
print('ROC AUC score : {:.2f}'.format(adb_clf_roc_auc_score))

# **Gradient Boosting**

In [8]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE

In [None]:
SEED = 1 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, reandom_state = SEED)

In [9]:
gdb = GradientBoostingRegressor(n_estimators = 300, max_depth = 1, random_state = SEED)

In [None]:
gdb.fit(X_train, y_train)

In [None]:
y_pred = gdb.predict(X_test)

In [None]:
rmse_test = MSE(y_test, y_pred)**(1/2)

In [None]:
print('Test set RMSE: {:.2f}'.format(rmse_test))

# **Stochastig Gradient Boosting**

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE

In [None]:
SEED = 1 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, reandom_state = SEED)

In [None]:
# Instantiate a stochastic GradientBoostingRegressor 'sgbt'
sgbr = GradientBoostingRegressor(max_depth = 1, subsample = 0.8, max_features = 0.2, n_estimators = 300, random_state = SEED)

subsample isset to 0.8 in order for each tree to sample 80% of the data for the training. 
max_features is set to 0.2 so that each tree uses 20% of available features to perform the best split

In [None]:
sgbr.fit(X_train, y_train)

In [None]:
y_pred = sgbr.predict(X_test)

In [None]:
rmse_test = MSE(y_test, y_pred)**(1/2)

In [None]:
print('Test set RMSE: {:.2f}'.format(rmse_test))