In [13]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler

In [14]:
data=fetch_california_housing()
df =pd.DataFrame(data.data,columns = data.feature_names)
df['target'] =data.target
print(df.head())

   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  target  
0    -122.23   4.526  
1    -122.22   3.585  
2    -122.24   3.521  
3    -122.25   3.413  
4    -122.25   3.422  


In [15]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   MedInc      20640 non-null  float64
 1   HouseAge    20640 non-null  float64
 2   AveRooms    20640 non-null  float64
 3   AveBedrms   20640 non-null  float64
 4   Population  20640 non-null  float64
 5   AveOccup    20640 non-null  float64
 6   Latitude    20640 non-null  float64
 7   Longitude   20640 non-null  float64
 8   target      20640 non-null  float64
dtypes: float64(9)
memory usage: 1.4 MB


In [16]:

Y = np.column_stack((data.target,data.data[:,6]))

In [17]:
X=df.drop(columns=['AveOccup','target'],inplace=False)



In [18]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [19]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   MedInc      20640 non-null  float64
 1   HouseAge    20640 non-null  float64
 2   AveRooms    20640 non-null  float64
 3   AveBedrms   20640 non-null  float64
 4   Population  20640 non-null  float64
 5   Latitude    20640 non-null  float64
 6   Longitude   20640 non-null  float64
dtypes: float64(7)
memory usage: 1.1 MB


In [20]:

x_train,x_test,y_train,y_test =train_test_split(X,Y,test_size=0.2,random_state=1)


In [21]:
X.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,37.85,-122.25


In [22]:
scaler_x = StandardScaler()
scaler_y = StandardScaler()

x_train = scaler_x.fit_transform(x_train)


x_test = scaler_x.transform(x_test)

y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)

In [23]:
print(x_train)

[[-0.36232605  1.85890297 -0.16877334 ... -0.4861138   0.97229046
  -1.42250942]
 [-0.14102329  1.06434823 -0.20665523 ... -0.40424308  1.08459626
  -1.38265919]
 [-0.66144956 -1.0014941   1.10658361 ... -1.25053723  1.06119922
  -0.8297373 ]
 ...
 [-1.45044201  1.06434823 -0.21055978 ... -0.57866332 -0.79652586
   0.65468363]
 [-0.65764311 -1.47822694 -0.17198179 ... -0.09099855 -0.89011402
   1.20262424]
 [-0.83136525  0.50815991 -0.31396782 ... -0.37042691  1.00972573
  -1.30794002]]


In [24]:
sgd = SGDRegressor(max_iter=1000,tol=1e-3)
multi_output_sgd =MultiOutputRegressor(sgd)

multi_output_sgd.fit(x_train,y_train)

y_prd= multi_output_sgd.predict(x_test)

In [25]:
y_prd

array([[ 0.03513832, -0.73564247],
       [-0.99550374,  0.5357434 ],
       [ 0.51624375,  0.9822631 ],
       ...,
       [ 0.74416007,  0.92253786],
       [ 0.47971773, -0.72341687],
       [-0.53187625, -0.76899464]])

In [26]:
y_prd = scaler_y.inverse_transform(y_prd)

y_test = scaler_y.inverse_transform(y_test)
mse = mean_squared_error(y_test,y_prd)
print("Mean squared error :",mse)

Mean squared error : 0.26509418973629206
