In [1]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import PolynomialFeatures

In [2]:
california_housing = fetch_california_housing()
df = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
df['MedHouseVal'] = california_housing.target

In [3]:
X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler().set_output(transform='pandas')
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
poly = PolynomialFeatures(2).set_output(transform='pandas')
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)

In [5]:
X_train_poly

Unnamed: 0,1,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedInc^2,...,Population^2,Population AveOccup,Population Latitude,Population Longitude,AveOccup^2,AveOccup Latitude,AveOccup Longitude,Latitude^2,Latitude Longitude,Longitude^2
14196,1.0,-0.326196,0.348490,-0.174916,-0.208365,0.768276,0.051376,-1.372811,1.272587,0.106404,...,0.590248,0.039471,-1.054698,0.977698,0.002640,-0.070530,0.065381,1.884611,-1.747021,1.619477
8267,1.0,-0.035843,1.618118,-0.402835,-0.128530,-0.098901,-0.117362,-0.876696,0.709162,0.001285,...,0.009781,0.011607,0.086706,-0.070137,0.013774,0.102891,-0.083229,0.768596,-0.621720,0.502911
17445,1.0,0.144701,-1.952710,0.088216,-0.257538,-0.449818,-0.032280,-0.460146,-0.447603,0.020939,...,0.202336,0.014520,0.206982,0.201340,0.001042,0.014853,0.014448,0.211735,0.205963,0.200349
14265,1.0,-1.017864,0.586545,-0.600015,-0.145156,-0.007434,0.077507,-1.382172,1.232698,1.036048,...,0.000055,-0.000576,0.010276,-0.009164,0.006007,-0.107128,0.095543,1.910399,-1.703801,1.519545
2271,1.0,-0.171488,1.142008,0.349007,0.086624,-0.485877,-0.068832,0.532084,-0.108551,0.029408,...,0.236077,0.033444,-0.258527,0.052743,0.004738,-0.036624,0.007472,0.283113,-0.057758,0.011783
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11284,1.0,1.307215,0.507194,0.290620,-0.393391,-0.675847,-0.005588,-0.872016,0.808883,1.708810,...,0.456769,0.003777,0.589349,-0.546681,0.000031,0.004873,-0.004520,0.760411,-0.705359,0.654292
11964,1.0,-0.436266,0.348490,0.600411,0.398898,0.287195,0.069722,-0.759688,1.073144,0.190328,...,0.082481,0.020024,-0.218178,0.308202,0.004861,-0.052967,0.074822,0.577125,-0.815255,1.151639
5390,1.0,-0.496973,0.586545,-0.606759,-0.039216,0.289833,0.020306,-0.755007,0.599469,0.246982,...,0.084003,0.005885,-0.218826,0.173746,0.000412,-0.015331,0.012173,0.570036,-0.452603,0.359363
860,1.0,0.965450,-1.079841,0.402175,-0.066265,0.308303,0.007076,0.906510,-1.185540,0.932095,...,0.095051,0.002182,0.279480,-0.365505,0.000050,0.006415,-0.008389,0.821761,-1.074704,1.405504


In [6]:
model = LinearRegression()
model.fit(X_train_poly, y_train)

y_pred = model.predict(X_test_poly)

rmse = root_mean_squared_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
accuracy = 1 - mape

print(f'RMSE: {rmse}')
print(f'MAPE: {mape}')
print(f'Accuracy: {accuracy}')

RMSE: 0.6813967448044721
MAPE: 0.26899311664723097
Accuracy: 0.731006883352769
