<a href="https://colab.research.google.com/github/SagonaAchhami/5CS037/blob/main/Sagona_workshop8_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import f1_score, r2_score

In [2]:
wine = load_wine()
X = wine.data
y = wine.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [4]:
dt_clf = DecisionTreeClassifier(max_depth=4, random_state=42)
dt_clf.fit(X_train, y_train)

dt_pred = dt_clf.predict(X_test)
dt_f1 = f1_score(y_test, dt_pred, average="weighted")

print("Decision Tree Classifier F1 Score:", dt_f1)

Decision Tree Classifier F1 Score: 0.9439974457215836


In [5]:
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, y_train)

rf_pred = rf_clf.predict(X_test)
rf_f1 = f1_score(y_test, rf_pred, average="weighted")

print("Random Forest Classifier F1 Score:", rf_f1)

Random Forest Classifier F1 Score: 1.0


In [6]:
print("Model Comparison (F1 Scores)")
print("Decision Tree:", dt_f1)
print("Random Forest:", rf_f1)

Model Comparison (F1 Scores)
Decision Tree: 0.9439974457215836
Random Forest: 1.0


In [7]:
param_grid = {
    "n_estimators": [50, 100, 150],
    "max_depth": [None, 5, 10],
    "min_samples_split": [2, 5, 10]
}

In [8]:
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    scoring="f1_weighted",
    cv=5,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

In [9]:
print("Best Parameters:", grid_search.best_params_)
print("Best F1 Score:", grid_search.best_score_)

Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Best F1 Score: 0.9782952128219708


In [10]:
y_reg = X[:, 0]     # Alcohol feature (continuous)
X_reg = X[:, 1:]    # Remaining features

In [11]:
Xr_train, Xr_test, yr_train, yr_test = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)

In [12]:
dt_reg = DecisionTreeRegressor(max_depth=5, random_state=42)
dt_reg.fit(Xr_train, yr_train)

dt_reg_pred = dt_reg.predict(Xr_test)
dt_r2 = r2_score(yr_test, dt_reg_pred)

print("Decision Tree Regressor R² Score:", dt_r2)

Decision Tree Regressor R² Score: 0.6242216679953971


In [13]:
rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
rf_reg.fit(Xr_train, yr_train)

rf_reg_pred = rf_reg.predict(Xr_test)
rf_r2 = r2_score(yr_test, rf_reg_pred)

print("Random Forest Regressor R² Score:", rf_r2)

Random Forest Regressor R² Score: 0.7416122628458712


In [14]:
param_dist = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 5, 10, 20],
    "min_samples_leaf": [1, 2, 4]
}

In [15]:
random_search = RandomizedSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_distributions=param_dist,
    n_iter=10,
    scoring="r2",
    cv=5,
    random_state=42,
    n_jobs=-1
)

random_search.fit(Xr_train, yr_train)

In [16]:
print("Best Regression Parameters:", random_search.best_params_)
print("Best R² Score:", random_search.best_score_)

Best Regression Parameters: {'n_estimators': 100, 'min_samples_leaf': 4, 'max_depth': 5}
Best R² Score: 0.5021280604513839
