In [2]:
# -----------------------------------------------------------------
# Perform RandomizedSearchCV for hyperparameter tuning
# -----------------------------------------------------------------

# Import libraries
import pandas as pd

# Read dataset
data = pd.read_csv('hpt_small.csv')


In [3]:
# Create Dummy variables
data_prep = pd.get_dummies(data, drop_first=True)

# Create X and Y Variables
X = data_prep.iloc[:, :-1]
Y = data_prep.iloc[:, -1]

# Import and create Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state=1234)

# Import RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV

# define parameters for Random Forest
rfc_param = {'n_estimators':[10,15,20], 
            'min_samples_split':[8,16],
            'min_samples_leaf':[1,2,3,4,5]
            }


In [5]:

# The parameters results in 3 x 2 x 5 = 30 different combinations

# Create the RandomizedSearchCV object
rfc_rs = RandomizedSearchCV(estimator=rfc, 
                        param_distributions=rfc_param,
                        scoring='accuracy',
                        cv=10,
                        n_iter=10,
                        return_train_score=True,
                        random_state=1234)

# n_iter selects 10 combinations out of 30 possible
# Now 10 x 10 = 100 jobs will be executed

# Fit the data to RandomizedSearchCV object
rfc_rs_fit = rfc_rs.fit(X, Y)

# Get the results of RandomizedSearch
cv_results_rfc_rs = pd.DataFrame.from_dict(rfc_rs_fit.cv_results_)

# Print the best parameters of Randomized Search for Random Forest
print('\n The best Parameters are : ')
print(rfc_rs_fit.best_params_)



 The best Parameters are : 
{'n_estimators': 15, 'min_samples_split': 16, 'min_samples_leaf': 5}


In [6]:
cv_results_rfc_rs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_min_samples_split,param_min_samples_leaf,params,split0_test_score,split1_test_score,...,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,0.066283,0.006893,0.010246,0.007215,15,8,2,"{'n_estimators': 15, 'min_samples_split': 8, '...",0.74,0.88,...,0.877778,0.884444,0.868889,0.875556,0.866667,0.893333,0.884444,0.882222,0.879333,0.007374
1,0.073029,0.01111,0.008748,0.00953,15,16,2,"{'n_estimators': 15, 'min_samples_split': 16, ...",0.76,0.84,...,0.853333,0.848889,0.844444,0.848889,0.853333,0.868889,0.851111,0.86,0.853556,0.010997
2,0.077118,0.010149,0.00616,0.004778,15,16,1,"{'n_estimators': 15, 'min_samples_split': 16, ...",0.78,0.84,...,0.873333,0.871111,0.864444,0.864444,0.864444,0.871111,0.871111,0.877778,0.869778,0.006061
3,0.076951,0.011995,0.011478,0.005517,15,8,1,"{'n_estimators': 15, 'min_samples_split': 8, '...",0.74,0.84,...,0.911111,0.906667,0.917778,0.908889,0.904444,0.913333,0.904444,0.917778,0.909556,0.006049
4,0.077141,0.009908,0.008507,0.008133,15,16,5,"{'n_estimators': 15, 'min_samples_split': 16, ...",0.8,0.84,...,0.842222,0.846667,0.831111,0.831111,0.835556,0.842222,0.837778,0.831111,0.837111,0.005626
5,0.102267,0.018827,0.013572,0.004858,20,8,2,"{'n_estimators': 20, 'min_samples_split': 8, '...",0.76,0.8,...,0.88,0.882222,0.882222,0.882222,0.875556,0.893333,0.884444,0.882222,0.883111,0.004683
6,0.053123,0.007655,0.0,0.0,10,16,1,"{'n_estimators': 10, 'min_samples_split': 16, ...",0.76,0.84,...,0.871111,0.882222,0.857778,0.857778,0.864444,0.871111,0.875556,0.86,0.867111,0.008766
7,0.092185,0.004687,0.00625,0.007654,20,16,4,"{'n_estimators': 20, 'min_samples_split': 16, ...",0.78,0.84,...,0.857778,0.84,0.844444,0.835556,0.84,0.853333,0.848889,0.842222,0.844222,0.007266
8,0.09887,0.011332,0.011482,0.006333,20,8,3,"{'n_estimators': 20, 'min_samples_split': 8, '...",0.76,0.84,...,0.871111,0.866667,0.857778,0.862222,0.866667,0.877778,0.864444,0.877778,0.866667,0.00674
9,0.074645,0.008998,0.006386,0.006851,15,8,3,"{'n_estimators': 15, 'min_samples_split': 8, '...",0.74,0.84,...,0.862222,0.862222,0.862222,0.868889,0.862222,0.871111,0.866667,0.871111,0.865778,0.00424
