### Data 

In [39]:
from sklearn.datasets import fetch_california_housing
california = fetch_california_housing()
X, y = california.data, california.target

In [40]:
X.shape, y.shape

((20640, 8), (20640,))

In [41]:
import pandas as pd
import numpy as np

In [42]:
df = pd.DataFrame(X, columns=california.feature_names)
df['target'] = y

In [43]:
df

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422
...,...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09,0.781
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21,0.771
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22,0.923
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32,0.847


In [44]:
rand_state= 1000

In [45]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=rand_state)

### Training the model 

In [46]:
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV,Lasso, LassoCV, ElasticNet, ElasticNetCV

In [47]:
model_linear = LinearRegression()
model_ridge = Ridge()
model_lasso = Lasso()
model_net = ElasticNet()

In [48]:
# fitting the model 
y_hat_linear= model_linear.fit(X_train, y_train).predict(X_test)
y_hat_ridge = model_ridge.fit(X_train, y_train).predict(X_test)
y_hat_lasso = model_lasso.fit(X_train, y_train).predict(X_test)
y_hat_net   = model_net.fit(X_train, y_train).predict(X_test)

In [49]:
print(y_hat_linear)

[2.36556581 7.06322059 1.78179945 ... 0.69700626 2.20578497 2.06624148]
[2.36556581 7.06322059 1.78179945 ... 0.69700626 2.20578497 2.06624148]


In [50]:
df_predictions = pd.DataFrame({'y_test':y_test,
                               'y_hat_linear':y_hat_linear,
                               'y_hat_ridge':y_hat_ridge,
                               'y_hat_lasso':y_hat_lasso,
                               'y_hat_net':y_hat_net})
df_predictions.head(10)

Unnamed: 0,y_test,y_hat_linear,y_hat_ridge,y_hat_lasso,y_hat_net
0,2.373,2.365566,2.365359,2.140084,2.169419
1,5.00001,7.063221,7.062493,3.787249,5.02617
2,1.124,1.781799,1.782023,1.920522,1.816473
3,1.125,1.898165,1.897489,1.87944,1.734971
4,2.486,2.293145,2.293471,2.299494,2.447742
5,2.413,2.946311,2.946371,2.364473,2.568697
6,1.577,2.385586,2.385845,2.23608,2.318454
7,3.31,2.457739,2.457806,2.183218,2.254851
8,3.973,4.00811,4.008178,2.720379,3.178342
9,1.188,1.054305,1.054807,1.947606,1.844452


### performance 

In [52]:
model_linear.score(X_test, y_test)

0.5850013795994415

In [54]:
from sklearn.metrics import mean_squared_error
rmse = mean_squared_error(y_test, y_hat_linear)
print(rmse)

0.5365907371448468
0.5365907371448468


In [55]:
from pycaret.regression import *

In [60]:
exp1 = setup(df, target=df['target'], session_id=1000, train_size=0.8, 
fold_strategy='kfold', fold=5, normalize=True, normalize_method='zscore')

print(exp1)

<pycaret.regression.oop.RegressionExperiment object at 0x31086c640>
<pycaret.regression.oop.RegressionExperiment object at 0x31086c640>
