<a href="https://colab.research.google.com/github/RoshitRana/Worksheet0/blob/main/Worsheet8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris, load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error

# Load and split the Iris dataset for Decision Tree Comparison
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Implement Custom Decision Tree (from scratch) - Simplified
class CustomDecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    def _build_tree(self, X, y, depth=0):
        unique_classes = np.unique(y)
        if len(unique_classes) == 1:
            return {"class": unique_classes[0]}
        if self.max_depth and depth >= self.max_depth:
            return {"class": np.bincount(y).argmax()}

        best_feature = np.random.randint(X.shape[1])
        best_threshold = np.median(X[:, best_feature])
        left_mask = X[:, best_feature] <= best_threshold
        right_mask = ~left_mask

        return {
            "feature_idx": best_feature,
            "threshold": best_threshold,
            "left": self._build_tree(X[left_mask], y[left_mask], depth + 1),
            "right": self._build_tree(X[right_mask], y[right_mask], depth + 1)
        }

    def predict(self, X):
        return [self._predict_single(x, self.tree) for x in X]

    def _predict_single(self, x, tree):
        if "class" in tree:
            return tree["class"]
        return self._predict_single(x, tree["left"] if x[tree["feature_idx"]] <= tree["threshold"] else tree["right"])

# Train and Evaluate Custom Decision Tree
custom_tree = CustomDecisionTree(max_depth=3)
custom_tree.fit(X_train, y_train)
y_pred_custom = custom_tree.predict(X_test)
accuracy_custom = accuracy_score(y_test, y_pred_custom)
print(f"Custom Decision Tree Accuracy: {accuracy_custom:.4f}")

# Train and Evaluate Scikit-learn Decision Tree
sklearn_tree = DecisionTreeClassifier(max_depth=3, random_state=42)
sklearn_tree.fit(X_train, y_train)
y_pred_sklearn = sklearn_tree.predict(X_test)
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
print(f"Scikit-learn Decision Tree Accuracy: {accuracy_sklearn:.4f}")

# Load and split Wine dataset for Ensemble Models
data = load_wine()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train and Compare Decision Tree and Random Forest Classifiers
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)

# Predictions and F1-score comparison
y_pred_dt = dt_classifier.predict(X_test)
y_pred_rf = rf_classifier.predict(X_test)
print(f"Decision Tree F1 Score: {f1_score(y_test, y_pred_dt, average='weighted'):.4f}")
print(f"Random Forest F1 Score: {f1_score(y_test, y_pred_rf, average='weighted'):.4f}")

# Hyperparameter Tuning for Random Forest Classifier
param_grid = {
    'n_estimators': [10, 50, 100],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
print(f"Best Random Forest Classifier Params: {grid_search.best_params_}")

# Train Decision Tree & Random Forest Regressors on Wine Data
dt_regressor = DecisionTreeRegressor(random_state=42)
dt_regressor.fit(X_train, y_train)
rf_regressor = RandomForestRegressor(random_state=42)
rf_regressor.fit(X_train, y_train)

# Predictions and Mean Squared Error comparison
y_pred_dt_reg = dt_regressor.predict(X_test)
y_pred_rf_reg = rf_regressor.predict(X_test)
print(f"Decision Tree MSE: {mean_squared_error(y_test, y_pred_dt_reg):.4f}")
print(f"Random Forest MSE: {mean_squared_error(y_test, y_pred_rf_reg):.4f}")

# Hyperparameter Tuning for Random Forest Regressor
param_dist = {
    'n_estimators': [10, 50, 100],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}
random_search = RandomizedSearchCV(RandomForestRegressor(random_state=42), param_dist, n_iter=5, cv=5, scoring='neg_mean_squared_error', random_state=42)
random_search.fit(X_train, y_train)
print(f"Best Random Forest Regressor Params: {random_search.best_params_}")


Custom Decision Tree Accuracy: 0.8333
Scikit-learn Decision Tree Accuracy: 1.0000
Decision Tree F1 Score: 0.9440
Random Forest F1 Score: 1.0000
Best Random Forest Classifier Params: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Decision Tree MSE: 0.1667
Random Forest MSE: 0.0648
Best Random Forest Regressor Params: {'n_estimators': 100, 'min_samples_split': 10, 'max_depth': None}
