In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, mean_squared_error


***Using the Wine Dataset from scikit-learn***

In [2]:
# Load the Iris dataset
data = load_wine()
X = data.data
y = data.target
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

***1. Implement Classification Models:***

• Train a Decision Tree Classifier and a Random Forest Classifier using scikit-learn.

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Training the Decision Tree classifier
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred_dt = dt_classifier.predict(X_test)

# Training the Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = rf_classifier.predict(X_test)

# Calculate accuracy for Decision Tree
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f"Decision Tree Accuracy: {accuracy_dt:.4f}")

# Calculate accuracy for Random Forest
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy: {accuracy_rf:.4f}")


Decision Tree Accuracy: 0.9444
Random Forest Accuracy: 1.0000


• Compare the models based on their F1 scores.

In [6]:
from sklearn.metrics import f1_score # Import f1_score

# Calculate F1 scores
f1_dt = f1_score(y_test, y_pred_dt, average='weighted') # Use weighted average for multi-class
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')

print(f"Decision Tree F1 Score: {f1_dt}")
print(f"Random Forest F1 Score: {f1_rf}")

# Compare the models
if f1_dt > f1_rf:
    print("Decision Tree performs better based on F1 score.")
elif f1_rf > f1_dt:
    print("Random Forest performs better based on F1 score.")
else:
    print("Both models have the same F1 score.")

Decision Tree F1 Score: 0.9439974457215836
Random Forest F1 Score: 1.0
Random Forest performs better based on F1 score.


***2. Hyperparameter Tuning:***

• Identify three hyperparameters of the Random Forest Classifier.

In [7]:
rf_params = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

• Perform hyperparameter tuning using GridSearchCV to optimize these parameters.

In [8]:
from sklearn.model_selection import GridSearchCV
grid_search_rf = GridSearchCV(RandomForestClassifier(random_state=42), rf_params, cv=5, scoring='f1_weighted')
grid_search_rf.fit(X_train, y_train)
print("Best Hyperparameters for Random Forest Classifier:", grid_search_rf.best_params_)

Best Hyperparameters for Random Forest Classifier: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}


***3. Implement Regression Model:***



• Train a Decision Tree Regressor and a Random Forest Regressor using scikit-learn.

In [9]:
#data split for the regression
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y, test_size=0.2, random_state=42)
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train_reg, y_train_reg)
y_pred_dt_reg = dt_reg.predict(X_test_reg)
mse_dt = mean_squared_error(y_test_reg, y_pred_dt_reg)
#random forest regressor
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train_reg, y_train_reg)
y_pred_rf_reg = rf_reg.predict(X_test_reg)
mse_rf = mean_squared_error(y_test_reg, y_pred_rf_reg)
#for error
#error
print("MSE - Decision Tree Regressor:", mse_dt)
print("MSE - Random Forest Regressor:", mse_rf)

MSE - Decision Tree Regressor: 0.16666666666666666
MSE - Random Forest Regressor: 0.06483333333333333


• Identify three parameters for Random Forest Regressio and Perform hyperparameter tuning using
RandomSearchCV to optimize these parameters.

In [10]:
#hyperparameter
rf_reg_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_leaf': [1, 2, 4]
}

random_search_rf_reg = RandomizedSearchCV(RandomForestRegressor(random_state=42), rf_reg_params, n_iter=20, cv=5, scoring='neg_mean_squared_error', random_state=42)
random_search_rf_reg.fit(X_train_reg, y_train_reg)
print("Best Hyperparameters for Random Forest Regressor:", random_search_rf_reg.best_params_)



Best Hyperparameters for Random Forest Regressor: {'n_estimators': 200, 'min_samples_leaf': 1, 'max_depth': 30}
