In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import datasets
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import time
# Creating RandomForest Classifier
clf = RandomForestClassifier()

# Loading the Digits dataset
digits = datasets.load_digits()

### GridSearch Random Forest

In [6]:
param_grid = {'bootstrap': [True, False], 'n_estimators': [10,50,100,200,500],'max_features': [1,2,3,4,5,6,7,8]}

clf_grid = GridSearchCV(estimator=clf, param_grid=param_grid, scoring=sklearn.metrics.make_scorer(sklearn.metrics.f1_score, average = 'macro'))

start_time_grid = time.time()
clf_grid.fit(digits.data, digits.target)
end_time_grid = time.time()

print("Best parameter settings for RF: ",clf_grid.best_params_)

Best parameter settings for RF:  {'bootstrap': False, 'max_features': 3, 'n_estimators': 500}


### GridSearch XGBoost

In [10]:
clf_x = GradientBoostingClassifier()

param_grid = {'learning_rate': [0.1, 0.2, 1], 'n_estimators': [10,50,100,200,500],'max_depth': [1,2,3]}

clf_grid_xg = GridSearchCV(estimator=clf_x, param_grid=param_grid, scoring=sklearn.metrics.make_scorer(sklearn.metrics.f1_score, average = 'macro'))

start_time_grid_xg = time.time()
clf_grid_xg.fit(digits.data, digits.target)
end_time_grid_xg = time.time()

print("Best parameter settings for Gradient Boosting: ",clf_grid_xg.best_params_)

Best parameter settings for Gradient Boosting:  {'learning_rate': 0.2, 'max_depth': 2, 'n_estimators': 500}


### RandomizedSearch Random Forest

In [11]:
param_grid = {'bootstrap': [True, False], 'n_estimators': [10,50,100,200,500],'max_features': [1,2,3,4,5,6,7,8]}

clf_random = RandomizedSearchCV(estimator=clf,param_distributions=param_grid, scoring=sklearn.metrics.make_scorer(sklearn.metrics.f1_score, average = 'macro'))

start_time_random = time.time()
clf_random.fit(digits.data, digits.target)
end_time_random = time.time()

print("Best parameter settings for RF: ",clf_random.best_params_)

Best parameter settings for RF:  {'n_estimators': 100, 'max_features': 2, 'bootstrap': False}


### RandomizedSearch XGBoost

In [12]:
param_grid = {'learning_rate': [0.1, 0.2, 1], 'n_estimators': [10,50,100,200,500],'max_depth': [1,2,3]}

clf_rand_xg = RandomizedSearchCV(estimator=clf_x,param_distributions=param_grid, scoring=sklearn.metrics.make_scorer(sklearn.metrics.f1_score, average = 'macro'))

start_time_grid_xg_rand = time.time()
clf_rand_xg.fit(digits.data, digits.target)
end_time_grid_xg_rand = time.time()

print("Best parameter settings for Gradient Boosting: ",clf_rand_xg.best_params_)

Best parameter settings for Gradient Boosting:  {'n_estimators': 500, 'max_depth': 2, 'learning_rate': 0.2}


### Time comparisions

In [15]:
print("GridSearch RF time: ",end_time_grid-start_time_grid)
print("GridSearch XG time: ",end_time_grid_xg-start_time_grid_xg)
print("RandomizedSearch RF time: ", end_time_random-start_time_random)
print("RandomizedSearch XG time: ", end_time_grid_xg_rand-start_time_grid_xg_rand)

GridSearch RF time:  153.96263718605042
GridSearch XG time:  1343.6701998710632
RandomizedSearch RF time:  5.196333885192871
RandomizedSearch XG time:  404.846200466156
