In [6]:
import sys
sys.path.append("../")
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor as SklearnRandomForestRegressor
from supervised_learning.regression.RandomForestRegressor import RandomForestRegressor

def test_random_forest_on_diabetes():
    # Load dataset
    data = load_diabetes()
    X, y = data.data, data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train our custom RandomForestRegressor
    our_model = RandomForestRegressor(n_estimators=10, min_samples_split=2, max_depth=None, oob_score=True)
    our_model.fit(X_train, y_train)
    y_pred_our = our_model.predict(X_test)

    # Train sklearn's RandomForestRegressor
    sklearn_model = SklearnRandomForestRegressor(n_estimators=10, min_samples_split=2, max_depth=None, random_state=42)
    sklearn_model.fit(X_train, y_train)
    y_pred_sklearn = sklearn_model.predict(X_test)

    # Calculate evaluation metrics
    mse_our = mean_squared_error(y_test, y_pred_our)
    mse_sklearn = mean_squared_error(y_test, y_pred_sklearn)

    r2_our = r2_score(y_test, y_pred_our)
    r2_sklearn = r2_score(y_test, y_pred_sklearn)

    # Print evaluation metrics
    print(f"Diabetes Dataset Evaluation:")
    print(f"Our Model Mean Squared Error: {mse_our:.4f}")
    print(f"Scikit-Learn Model Mean Squared Error: {mse_sklearn:.4f}")

    print(f"Our Model R2 Score: {r2_our:.4f}")
    print(f"Scikit-Learn Model R2 Score: {r2_sklearn:.4f}")

# Run the test
test_random_forest_on_diabetes()


Diabetes Dataset Evaluation:
Our Model Mean Squared Error: 3096.4428
Scikit-Learn Model Mean Squared Error: 3135.2893
Our Model R2 Score: 0.4156
Scikit-Learn Model R2 Score: 0.4082
