In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression as SklearnLinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.datasets import fetch_california_housing
from supervised_learning.regression.LinearRegression import LinearRegression

# Load dataset
california_housing = fetch_california_housing()
df = pd.DataFrame(data=np.c_[california_housing['data'], california_housing['target']],
                  columns=list(california_housing['feature_names']) + ['target'])


X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train our linear regression model
our_model = LinearRegression()
our_model.fit(X_train, y_train)
y_pred_our = our_model.predict(X_test)

# Train sklearn's linear regression model
sklearn_model = SklearnLinearRegression()
sklearn_model.fit(X_train, y_train)
y_pred_sklearn = sklearn_model.predict(X_test)

# Evaluate models using mean squared error, R-squared, MAE, and MAPE
mse_our = mean_squared_error(y_test, y_pred_our)
mse_sklearn = mean_squared_error(y_test, y_pred_sklearn)
r2_our = r2_score(y_test, y_pred_our)
r2_sklearn = r2_score(y_test, y_pred_sklearn)
mae_our = mean_absolute_error(y_test, y_pred_our)
mae_sklearn = mean_absolute_error(y_test, y_pred_sklearn)
mape_our = np.mean(np.abs((y_test - y_pred_our) / y_test)) * 100
mape_sklearn = np.mean(np.abs((y_test - y_pred_sklearn) / y_test)) * 100

print(f"Our Model MSE: {mse_our}")
print(f"Scikit-Learn Model MSE: {mse_sklearn}")
print(f"Our Model R-squared: {r2_our}")
print(f"Scikit-Learn Model R-squared: {r2_sklearn}")
print(f"Our Model MAE: {mae_our}")
print(f"Scikit-Learn Model MAE: {mae_sklearn}")
print(f"Our Model MAPE: {mape_our}%")
print(f"Scikit-Learn Model MAPE: {mape_sklearn}%")

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Our Model MSE: 0.5558915986954801
Scikit-Learn Model MSE: 0.5558915986952425
Our Model R-squared: 0.5757877060322709
Scikit-Learn Model R-squared: 0.5757877060324521
Our Model MAE: 0.5332001304959607
Scikit-Learn Model MAE: 0.5332001304956989
Our Model MAPE: 31.95218741366428%
Scikit-Learn Model MAPE: 31.952187413621747%
