## Gradient Boosting
Accuracy comparison of 5 ensemble methods:
 - Bagging
 - Random subspace
 - Random forest
 - AdaBoost
 - Gradient boosting  
   
 Assessment is done using repeated k-fold cross validation.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.model_selection import cross_val_score, RepeatedKFold

In [None]:
hotel_pd = pd.read_csv('HotelRevHelpfulness.csv')
hotel_pd.head()

In [None]:
hotel_pd.pop('hotelId').values
y = hotel_pd.pop('reviewHelpfulness').values
X = hotel_pd.values
X.shape

In [None]:
n_est = 100
models = {}
models['Bagging'] = BaggingClassifier(n_estimators=n_est, max_samples = 1.0,
                            bootstrap = True)
models['RandomSS'] = BaggingClassifier(n_estimators = n_est,
                            max_samples=1.0, bootstrap = False,
                            max_features=0.5)
models['RandomForest'] = RandomForestClassifier(n_estimators=n_est)
models['AdaBoost'] = AdaBoostClassifier(n_estimators=n_est, algorithm = 'SAMME')
models['GradBoost'] = GradientBoostingClassifier(n_estimators=n_est, max_leaf_nodes = 4)

### Testing the 5 Ensemble Models
The evaluation entails repeated k-fold cross validation (10x10) so this evaluation takes a few minutes to run. 

In [None]:
folds = 10
n_reps = 10
res_d = {}
for model in models.keys():
    scores = cross_val_score(models[model], X, y, n_jobs = -1,
                             cv=RepeatedKFold(n_splits=folds, n_repeats=n_reps))
    print(model, scores.mean())
    res_d[model] = scores.mean()  

In [None]:
names = res_d.keys()
colours = ['g','g','b','r','r']
y_pos = np.arange(len(names))
f = plt.figure(figsize=(5,4))
plt.bar(y_pos, res_d.values(), align='center', alpha=0.5, color = colours)  #width = 0.3
plt.xticks(y_pos, names,  rotation=45)
plt.ylabel('Accuracy')
plt.ylim((0.5,0.75))
plt.grid(axis = 'y')
#plt.legend(loc = 'center right')
plt.show()
f.savefig('AllEns.pdf', bbox_inches = 'tight')