<a href="https://colab.research.google.com/github/PrettyCharity/Machine_Learning_Practice/blob/main/Random_Forest_Hyperparameter_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Installing packages
!pip install scikit-optimize
!pip install optuna

In [2]:
#@title Preparing the data
# Importing libraries
import pandas as pd
import numpy as np
pd.set_option("display.precision", 4)
# Metrics
from sklearn.metrics import f1_score
# Model
from sklearn.ensemble import RandomForestClassifier

# Loading the data
df = pd.read_csv('Data.csv')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Encoding the Dependent Variable
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

# Splitting the data into train and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size = 0.2,
                                                    random_state = 42)

# Feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)



In [3]:
#@title Parameter List
params = {
          'criterion' : ('gini', 'entropy'),
          'n_estimators' : list(range(1, 500, 10)),
          'max_depth' : list(range(5, 25, 5)),
          'min_samples_split': list(range(2, 10)),
          'max_features' : ('sqrt', 'log2')
          }


In [4]:
#@title RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV
model = RandomForestClassifier(random_state=42)
clf = RandomizedSearchCV(model, params).fit(X_train, y_train)
y_pred = clf.predict(X_test)
f1_random = f1_score(y_test, y_pred)

In [None]:
#@title BayesSearchCV
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
model = RandomForestClassifier(random_state=42)
clf = BayesSearchCV(
    model,
    {
          'criterion' : Categorical(['gini', 'entropy']),
          'n_estimators' : Integer(10, 500),
          'max_depth' : Integer(5, 25),
          'min_samples_split': Integer(2, 10),
          'max_features' : Categorical(['sqrt', 'log2'])
    },
    n_iter = 100,
    random_state = 42,
    verbose = -1
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
f1_bayes = f1_score(y_test, y_pred)

In [9]:
#@title Optuna
import optuna
def objective(trial):

    params = {
        'criterion' : trial.suggest_categorical('criterion', ['gini', 'entropy']),
        'n_estimators' : trial.suggest_int('n_estimators', 1, 500),
        'max_depth' : trial.suggest_int('max_depth', 5, 25),
        'min_samples_split' : trial.suggest_int('min_samples_split', 2, 10),
        'max_features' : trial.suggest_categorical('max_features', ['sqrt', 'log2'])
      }

    model = RandomForestClassifier(**params, random_state=42)  
    
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    score = f1_score(y_test, y_pred)
    
    return score

study = optuna.create_study(direction='maximize', study_name = 'RFC Optuna')
study.optimize(objective, n_trials=100)

# Building the Random Forest model
best_params = study.best_trial.params
model = RandomForestClassifier(**best_params, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
f1_optuna = f1_score(y_test, y_pred)

[32m[I 2022-03-18 15:57:55,436][0m A new study created in memory with name: RFC Optuna[0m
[32m[I 2022-03-18 15:57:55,699][0m Trial 0 finished with value: 0.9272727272727272 and parameters: {'criterion': 'gini', 'n_estimators': 147, 'max_depth': 23, 'min_samples_split': 6, 'max_features': 'log2'}. Best is trial 0 with value: 0.9272727272727272.[0m
[32m[I 2022-03-18 15:57:56,478][0m Trial 1 finished with value: 0.9369369369369369 and parameters: {'criterion': 'gini', 'n_estimators': 461, 'max_depth': 9, 'min_samples_split': 9, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9369369369369369.[0m
[32m[I 2022-03-18 15:57:56,538][0m Trial 2 finished with value: 0.9272727272727272 and parameters: {'criterion': 'entropy', 'n_estimators': 30, 'max_depth': 11, 'min_samples_split': 8, 'max_features': 'log2'}. Best is trial 1 with value: 0.9369369369369369.[0m
[32m[I 2022-03-18 15:57:56,979][0m Trial 3 finished with value: 0.9272727272727272 and parameters: {'criterion': 'entr

In [10]:
#@title Results
scores = np.array([[f1_random, f1_bayes, f1_optuna]])
result = pd.DataFrame(data = scores,
                      columns = ['Random', 'Bayes', 'Optuna'],
                      index = ['Random Forest'])
result.style

Unnamed: 0,Random,Bayes,Optuna
Random Forest,0.9273,0.9369,0.9558
