In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score


In [2]:
# Load dataset
data = load_wine()
X, y = data.data, data.target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree Classifier
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)

# Train Random Forest Classifier
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train, y_train)

# Evaluate F1 scores
dt_f1 = f1_score(y_test, dt_clf.predict(X_test), average="weighted")
rf_f1 = f1_score(y_test, rf_clf.predict(X_test), average="weighted")

print(f"Decision Tree F1 Score: {dt_f1:.4f}")
print(f"Random Forest F1 Score: {rf_f1:.4f}")


Decision Tree F1 Score: 0.9440
Random Forest F1 Score: 1.0000


###2. Hyperparameter Tuning:

• Identify three hyperparameters of the Random Forest Classifier.

• Perform hyperparameter tuning using GridSearchCV to optimize these parameters.

• Take hints from the scikit-learn documentation to guide the implementation.

In [3]:
from sklearn.model_selection import GridSearchCV

# Define hyperparameters to tune
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    scoring='f1_weighted',
    cv=3,
    n_jobs=-1
)

# Perform grid search
grid_search.fit(X_train, y_train)

# Best hyperparameters and score
print(f"Best Hyperparameters: {grid_search.best_params_}")
print(f"Best F1 Score: {grid_search.best_score_:.4f}")


Best Hyperparameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Best F1 Score: 0.9859


###3.Implement Regression Model:

• Train a Decision Tree Regressor and a Random Forest Regressor using scikit-learn.

• Identify three parameters for Random Forest Regressio and Perform hyperparameter tuning using
RandomSearchCV to optimize these parameters.

In [4]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Train Decision Tree Regressor
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train, y_train)

# Train Random Forest Regressor
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train, y_train)

# Evaluate MSE
dt_mse = mean_squared_error(y_test, dt_reg.predict(X_test))
rf_mse = mean_squared_error(y_test, rf_reg.predict(X_test))

print(f"Decision Tree MSE: {dt_mse:.4f}")
print(f"Random Forest MSE: {rf_mse:.4f}")


Decision Tree MSE: 0.1667
Random Forest MSE: 0.0648


In [5]:
from sklearn.model_selection import RandomizedSearchCV

# Define hyperparameters to tune
param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_distributions=param_dist,
    n_iter=50,  # Number of random parameter combinations to try
    scoring='neg_mean_squared_error',
    cv=3,
    n_jobs=-1,
    random_state=42
)

# Perform random search
random_search.fit(X_train, y_train)

# Best hyperparameters and score
print(f"Best Hyperparameters: {random_search.best_params_}")
print(f"Best MSE: {-random_search.best_score_:.4f}")


Best Hyperparameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 30}
Best MSE: 0.0524
