# Random Forest Classification

In [2]:
from sklearn.datasets import  load_iris

In [3]:
iris = load_iris()
data =  iris.data
target = iris.target

In [4]:
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size = 0.3)

In [6]:
from sklearn.ensemble import RandomForestClassifier

In [7]:
model = RandomForestClassifier()

In [8]:
model.fit(X_train, y_train)

In [12]:
predictions = model.predict(X_test)

In [14]:
from sklearn.metrics import accuracy_score

In [17]:
accuracy = accuracy_score(y_test, predictions)

In [22]:
print(f"Accuracy: {accuracy}")

Accuracy: 0.9555555555555556


# Hyperparameter Tuning using GridSearchCV

In [25]:
from sklearn.model_selection import GridSearchCV

In [27]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

In [29]:
grid_search = GridSearchCV(model, param_grid, cv=3)

In [31]:
grid_search.fit(X_train, y_train)

In [33]:
grid_search.best_params_

{'bootstrap': True,
 'max_depth': 10,
 'min_samples_leaf': 4,
 'min_samples_split': 5,
 'n_estimators': 50}

In [35]:
best_model = grid_search.best_estimator_

In [37]:
best_model.fit(X_train, y_train)

In [39]:
predictions = best_model.predict(X_test)

In [41]:
accuracy = accuracy_score(y_test, predictions)

In [43]:
print(f"Accuracy: {accuracy}")

Accuracy: 1.0


# Hyperparameter Tuning using Optuna

In [46]:
import optuna

In [48]:
# Define objective function for Optuna
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300, step=50)
    max_depth = trial.suggest_int('max_depth', 5, 30, step=5)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20, step=2)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10, step=1)
    bootstrap = trial.suggest_categorical('bootstrap', [True, False])

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        bootstrap=bootstrap,
        random_state=42
    )
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Accuracy = accuracy_score(y_test, y_pred)
    return Accuracy

In [50]:
# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2025-03-01 14:09:33,016] A new study created in memory with name: no-name-59902aed-7ba1-4815-a36b-ff83a83963c2
[I 2025-03-01 14:09:33,267] Trial 0 finished with value: 0.9555555555555556 and parameters: {'n_estimators': 300, 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 10, 'bootstrap': False}. Best is trial 0 with value: 0.9555555555555556.
[I 2025-03-01 14:09:33,552] Trial 1 finished with value: 0.9555555555555556 and parameters: {'n_estimators': 250, 'max_depth': 15, 'min_samples_split': 16, 'min_samples_leaf': 4, 'bootstrap': True}. Best is trial 0 with value: 0.9555555555555556.
[I 2025-03-01 14:09:33,670] Trial 2 finished with value: 0.9555555555555556 and parameters: {'n_estimators': 100, 'max_depth': 30, 'min_samples_split': 6, 'min_samples_leaf': 6, 'bootstrap': True}. Best is trial 0 with value: 0.9555555555555556.
[I 2025-03-01 14:09:33,786] Trial 3 finished with value: 0.9555555555555556 and parameters: {'n_estimators': 100, 'max_depth': 5, 'min_samples_sp

In [52]:
study.best_params

{'n_estimators': 50,
 'max_depth': 25,
 'min_samples_split': 16,
 'min_samples_leaf': 1,
 'bootstrap': True}

In [54]:
study.best_value

0.9777777777777777

In [56]:
best_params = study.best_params

In [68]:
best_model1 = RandomForestClassifier(**best_params)

In [70]:
best_model1.fit(X_train, y_train)

In [72]:
predictions = best_model1.predict(X_test)

In [74]:
accuracy = accuracy_score(y_test, predictions)

In [76]:
print(f"Accuracy: {accuracy}")

Accuracy: 0.9777777777777777
