In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor as SklearnDecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import fetch_california_housing, load_diabetes
from sklearn.preprocessing import StandardScaler
from supervised_learning.regression.DecisionTreeRegressor import DecisionTreeRegressor

def evaluate_model(X, y, dataset_name):
    # Standardize features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Split the dataset into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train our decision tree regressor
    our_model = DecisionTreeRegressor(min_samples_split=5, max_depth=5)
    our_model.fit(X_train, y_train)
    y_pred_our = our_model.predict(X_test)

    # Train sklearn's decision tree regressor
    sklearn_model = SklearnDecisionTreeRegressor(min_samples_split=5, max_depth=5)
    sklearn_model.fit(X_train, y_train)
    y_pred_sklearn = sklearn_model.predict(X_test)

    # Calculate evaluation metrics
    mse_our = mean_squared_error(y_test, y_pred_our)
    mse_sklearn = mean_squared_error(y_test, y_pred_sklearn)

    r2_our = r2_score(y_test, y_pred_our)
    r2_sklearn = r2_score(y_test, y_pred_sklearn)

    # Print evaluation metrics
    print(f"\n{dataset_name} Dataset Evaluation:")
    print(f"Our Model Mean Squared Error: {mse_our:.4f}")
    print(f"Scikit-Learn Model Mean Squared Error: {mse_sklearn:.4f}")

    print(f"Our Model R2 Score: {r2_our:.4f}")
    print(f"Scikit-Learn Model R2 Score: {r2_sklearn:.4f}")

# Evaluate on California Housing dataset
california_housing = fetch_california_housing()
evaluate_model(california_housing.data, california_housing.target, "California Housing")

# Evaluate on Diabetes dataset
diabetes = load_diabetes()
evaluate_model(diabetes.data, diabetes.target, "Diabetes")


California Housing Dataset Evaluation:
Our Model Mean Squared Error: 0.4954
Scikit-Learn Model Mean Squared Error: 0.5245
Our Model R2 Score: 0.6219
Scikit-Learn Model R2 Score: 0.5997

Diabetes Dataset Evaluation:
Our Model Mean Squared Error: 3978.4958
Scikit-Learn Model Mean Squared Error: 3416.2260
Our Model R2 Score: 0.2491
Scikit-Learn Model R2 Score: 0.3552
