In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import SGDRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

#load the california housing dataset
dataset = fetch_california_housing()
df=pd.DataFrame(dataset.data,columns=dataset.feature_names)
df['HousingPrice']=dataset.target
print(df.head())

   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  HousingPrice  
0    -122.23         4.526  
1    -122.22         3.585  
2    -122.24         3.521  
3    -122.25         3.413  
4    -122.25         3.422  


In [2]:
# Use the first 3 features as inputs
X = df.drop(columns=['AveOccup', 'HousingPrice'])  # Features: 'MedInc', 'HouseAge', 'AveRooms'

# Use 'AveOccup' and 'HousingPrice' as output variables
Y = df[['AveOccup', 'HousingPrice']]  # Targets: 'AveOccup', 'HousingPrice'

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Scale the features and target variables
scaler_X = StandardScaler()
scaler_Y = StandardScaler()

X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)
Y_train = scaler_Y.fit_transform(Y_train)
Y_test = scaler_Y.transform(Y_test)


In [3]:
#initialize the SGDRegressor
sgd=SGDRegressor(max_iter=1000, tol=1e-3)

#Use MultiOutputRegressor to handle multiple output variables
multi_output_sgd= MultiOutputRegressor(sgd)

#train the model
multi_output_sgd.fit(X_train,Y_train)

#predict on the test data
Y_pred= multi_output_sgd.predict(X_test)

#inverse transform the predictions to get them back to the original scale
Y_pred=scaler_Y.inverse_transform(Y_pred)
Y_test=scaler_Y.inverse_transform(Y_test)

#evaluate the model using mean squared error
mse=mean_squared_error(Y_test,Y_pred)
print("Mean Squared Error:",mse)

#optionally, print some predictions
print("\nPredictions:\n",Y_pred[:5]) #print first 5 predictions

Mean Squared Error: 2.016573976367864

Predictions:
 [[3.14335647 0.74352306]
 [2.74764114 1.77603477]
 [3.874269   2.66715625]
 [2.46400218 2.82900289]
 [1.89905015 2.57792259]]
