In [8]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PowerTransformer
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the diabetes dataset from scikit-learn
diabetes = load_diabetes()
data = np.c_[diabetes.data, diabetes.target]
columns = np.append(diabetes.feature_names, "target")
df = pd.DataFrame(data, columns=columns)


# Split the dataset into features (X) and target variable (y)
X = df.drop('target', axis=1)
y = df['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Yeo-Johnson transformation
power_transformer = PowerTransformer(method='yeo-johnson')
X_train_yeojohnson = power_transformer.fit_transform(X_train)
X_test_yeojohnson = power_transformer.transform(X_test)

# Linear Regression
linear_reg = LinearRegression()
linear_reg.fit(X_train_yeojohnson, y_train)
linear_reg_pred = linear_reg.predict(X_test_yeojohnson)

# Decision Tree Regression
tree_reg = DecisionTreeRegressor()
tree_reg.fit(X_train_yeojohnson, y_train)
tree_reg_pred = tree_reg.predict(X_test_yeojohnson)

# Random Forest Regression
forest_reg = RandomForestRegressor()
forest_reg.fit(X_train_yeojohnson, y_train)
forest_reg_pred = forest_reg.predict(X_test_yeojohnson)

# Evaluate the models
def evaluate_model(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{model_name} Mean Squared Error: {mse:.4f}")
    print(f"{model_name} R^2 Score: {r2:.4f}")

print("\nLinear Regression Results:")
evaluate_model(y_test, linear_reg_pred, "Linear Regression")

print("\nDecision Tree Regression Results:")
evaluate_model(y_test, tree_reg_pred, "Decision Tree Regression")

print("\nRandom Forest Regression Results:")
evaluate_model(y_test, forest_reg_pred, "Random Forest Regression")



Linear Regression Results:
Linear Regression Mean Squared Error: 2982.3834
Linear Regression R^2 Score: 0.4371

Decision Tree Regression Results:
Decision Tree Regression Mean Squared Error: 5044.9551
Decision Tree Regression R^2 Score: 0.0478

Random Forest Regression Results:
Random Forest Regression Mean Squared Error: 2971.3582
Random Forest Regression R^2 Score: 0.4392
