Exercise 3: Ensemble Methods & Hyperparameter Tuning

In [12]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score, mean_squared_error

from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV


Load Wine Dataset & Train-Test Split

In [13]:
# Load Wine dataset
wine = load_wine()
X = wine.data
y = wine.target

# Split data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


1. Classification Models

Train Decision Tree Classifier

In [14]:
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)

dt_preds = dt_clf.predict(X_test)
dt_f1 = f1_score(y_test, dt_preds, average="weighted")

print(f"Decision Tree Classifier F1 Score: {dt_f1:.4f}")


Decision Tree Classifier F1 Score: 0.9440


Train Random Forest Classifier

In [15]:
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train, y_train)

rf_preds = rf_clf.predict(X_test)
rf_f1 = f1_score(y_test, rf_preds, average="weighted")

print(f"Random Forest Classifier F1 Score: {rf_f1:.4f}")


Random Forest Classifier F1 Score: 1.0000


F1 Score Comparison

In [16]:
print("Model Comparison (F1 Score):")
print(f"Decision Tree Classifier: {dt_f1:.4f}")
print(f"Random Forest Classifier: {rf_f1:.4f}")


Model Comparison (F1 Score):
Decision Tree Classifier: 0.9440
Random Forest Classifier: 1.0000


2. Hyperparameter Tuning (Classification – GridSearchCV)

GridSearchCV for Random Forest Classifier

In [17]:
param_grid = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 5, 10],
    "min_samples_split": [2, 5, 10]
}

grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    scoring="f1_weighted",
    cv=5
)

grid_search.fit(X_train, y_train)


Best Parameters & Best Score

In [18]:
print("Best Hyperparameters (Classification):")
print(grid_search.best_params_)

print("\nBest F1 Score from GridSearch:")
print(grid_search.best_score_)


Best Hyperparameters (Classification):
{'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}

Best F1 Score from GridSearch:
0.9782952128219708


3. Regression Models

In [19]:
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train, y_train)

dt_reg_preds = dt_reg.predict(X_test)
dt_mse = mean_squared_error(y_test, dt_reg_preds)

print(f"Decision Tree Regressor MSE: {dt_mse:.4f}")


Decision Tree Regressor MSE: 0.1667


Train Random Forest Regressor

In [20]:
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train, y_train)

rf_reg_preds = rf_reg.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_reg_preds)

print(f"Random Forest Regressor MSE: {rf_mse:.4f}")


Random Forest Regressor MSE: 0.0648


RandomizedSearchCV for Random Forest Regressor

In [21]:
param_dist = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 5, 10, 20],
    "min_samples_split": [2, 5, 10]
}

random_search = RandomizedSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_distributions=param_dist,
    n_iter=10,
    scoring="neg_mean_squared_error",
    cv=5,
    random_state=42
)

random_search.fit(X_train, y_train)


Best Parameters & Best Score (Regression)

In [22]:
print("Best Hyperparameters (Regression):")
print(random_search.best_params_)

print("\nBest Score (Negative MSE):")
print(random_search.best_score_)


Best Hyperparameters (Regression):
{'n_estimators': 200, 'min_samples_split': 10, 'max_depth': 20}

Best Score (Negative MSE):
-0.04678888653548126
