<a href="https://colab.research.google.com/github/Ansubasnet-creator/2510322_ansu/blob/main/Workshop8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import f1_score, mean_squared_error, r2_score

from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor


In [2]:
X, y = load_wine(return_X_y=True)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [4]:
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)

dt_preds = dt_clf.predict(X_test)
dt_f1 = f1_score(y_test, dt_preds, average='weighted')
print("Decision Tree F1 Score:", dt_f1)


Decision Tree F1 Score: 0.9449614374099499


In [5]:
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train, y_train)

rf_preds = rf_clf.predict(X_test)
rf_f1 = f1_score(y_test, rf_preds, average='weighted')
print("Random Forest F1 Score:", rf_f1)


Random Forest F1 Score: 1.0


In [8]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    scoring='f1_weighted',
    cv=5,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)


In [9]:
best_rf_clf = grid_search.best_estimator_

best_preds = best_rf_clf.predict(X_test)
best_f1 = f1_score(y_test, best_preds, average='weighted')

print("Best Parameters:", grid_search.best_params_)
print("Tuned Random Forest F1 Score:", best_f1)


Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Tuned Random Forest F1 Score: 1.0


In [10]:
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train, y_train)

dt_reg_preds = dt_reg.predict(X_test)

dt_rmse = np.sqrt(mean_squared_error(y_test, dt_reg_preds))
dt_r2 = r2_score(y_test, dt_reg_preds)

print("Decision Tree Regressor RMSE:", dt_rmse)
print("Decision Tree Regressor R2:", dt_r2)


Decision Tree Regressor RMSE: 0.16666666666666666
Decision Tree Regressor R2: 0.9543147208121827


In [11]:
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train, y_train)

rf_reg_preds = rf_reg.predict(X_test)

rf_rmse = np.sqrt(mean_squared_error(y_test, rf_reg_preds))
rf_r2 = r2_score(y_test, rf_reg_preds)

print("Random Forest Regressor RMSE:", rf_rmse)
print("Random Forest Regressor R2:", rf_r2)


Random Forest Regressor RMSE: 0.08634555897992414
Random Forest Regressor R2: 0.9877380710659899


In [12]:
param_dist = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10]
}

random_search = RandomizedSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_distributions=param_dist,
    n_iter=10,
    scoring='r2',
    cv=5,
    random_state=42,
    n_jobs=-1
)

random_search.fit(X_train, y_train)


In [13]:
best_rf_reg = random_search.best_estimator_

best_reg_preds = best_rf_reg.predict(X_test)

best_rmse = np.sqrt(mean_squared_error(y_test, best_reg_preds))
best_r2 = r2_score(y_test, best_reg_preds)

print("Best Regression Parameters:", random_search.best_params_)
print("Tuned Random Forest RMSE:", best_rmse)
print("Tuned Random Forest R2:", best_r2)


Best Regression Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'max_depth': 10}
Tuned Random Forest RMSE: 0.08844332774281068
Tuned Random Forest R2: 0.9871350253807106
