In [36]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import math

In [4]:
df = pd.read_csv("housing.csv")

In [6]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [7]:
df[["housing_median_age", "total_rooms"]].describe()

Unnamed: 0,housing_median_age,total_rooms
count,20640.0,20640.0
mean,28.639486,2635.763081
std,12.585558,2181.615252
min,1.0,2.0
25%,18.0,1447.75
50%,29.0,2127.0
75%,37.0,3148.0
max,52.0,39320.0


In [11]:
scaler =  StandardScaler()
scaler.set_output(transform='pandas')

In [13]:
df_scaled =  scaler.fit_transform(df.drop(columns="ocean_proximity"))
df_scaled

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-1.327835,1.052548,0.982143,-0.804819,-0.970325,-0.974429,-0.977033,2.344766,2.129631
1,-1.322844,1.043185,-0.607019,2.045890,1.348276,0.861439,1.669961,2.332238,1.314156
2,-1.332827,1.038503,1.856182,-0.535746,-0.825561,-0.820777,-0.843637,1.782699,1.258693
3,-1.337818,1.038503,1.856182,-0.624215,-0.718768,-0.766028,-0.733781,0.932968,1.165100
4,-1.337818,1.038503,1.856182,-0.462404,-0.611974,-0.759847,-0.629157,-0.012881,1.172900
...,...,...,...,...,...,...,...,...,...
20635,-0.758826,1.801647,-0.289187,-0.444985,-0.388895,-0.512592,-0.443449,-1.216128,-1.115804
20636,-0.818722,1.806329,-0.845393,-0.888704,-0.920488,-0.944405,-1.008420,-0.691593,-1.124470
20637,-0.823713,1.778237,-0.924851,-0.174995,-0.125472,-0.369537,-0.174042,-1.142593,-0.992746
20638,-0.873626,1.778237,-0.845393,-0.355600,-0.305834,-0.604429,-0.393753,-1.054583,-1.058608


In [15]:
df_scaled[["housing_median_age", "total_rooms"]].describe()

Unnamed: 0,housing_median_age,total_rooms
count,20640.0,20640.0
mean,5.508083e-18,3.2015730000000005e-17
std,1.000024,1.000024
min,-2.19618,-1.207283
25%,-0.8453931,-0.5445698
50%,0.02864572,-0.2332104
75%,0.6643103,0.2348028
max,1.856182,16.81558


In [23]:
dfx =  df_scaled.fillna(0)

In [24]:
data_x = dfx.drop(columns='median_house_value')
data_y = dfx['median_house_value']

In [25]:
train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, test_size=0.2)

In [26]:
reg = LinearRegression()

In [27]:
reg.fit(train_x, train_y)

In [33]:
def regSummary(y_true, y_pred):
    return{
        "r2_score": r2_score(y_true, y_pred),
        "mae": mean_absolute_error(y_true, y_pred),
        "mse": mean_squared_error(y_true, y_pred),
        "rmse": math.sqrt(mean_squared_error(y_true, y_pred))
    }

In [34]:
y_pred = reg.predict(test_x)

In [35]:
regSummary(test_y, y_pred)

{'r2_score': 0.633628574985901,
 'mae': 0.43538625837785677,
 'mse': 0.36009884985664664,
 'rmse': 0.6000823692266309}

In [37]:
sgd_reg = SGDRegressor()

In [38]:
sgd_reg.fit(train_x, train_y)

In [39]:
y_pred =  sgd_reg.predict(test_x) 

In [40]:
regSummary(test_y, y_pred)

{'r2_score': 0.6335099830144286,
 'mae': 0.43579441388427204,
 'mse': 0.36021541143763725,
 'rmse': 0.6001794826863355}

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
5166,0.643731,-0.792107,1.141059,-0.657677,-0.583496,-0.517007,-0.647466,-0.567947
6009,0.903279,-0.735924,0.346478,-0.368893,-0.123098,0.176190,-0.072033,-0.987996
13296,0.958183,-0.726561,1.856182,-0.170411,-0.234638,-0.348344,-0.163579,-0.260910
10259,0.858357,-0.829562,-0.845393,-1.057390,-1.105597,-1.074214,-1.120891,0.426329
17493,-0.099974,-0.562695,-0.050812,0.312729,-0.229892,0.157646,-0.249894,3.111646
...,...,...,...,...,...,...,...,...
6235,0.803453,-0.731243,0.505394,0.083993,-0.021051,0.072872,-0.038030,0.390114
16625,-0.634044,-0.122600,0.425936,0.294851,0.417988,-0.025146,0.383083,-0.692435
17609,-1.173105,0.776318,0.425936,-0.775941,-0.956086,-0.924978,-0.943030,0.687360
3298,-1.527488,1.558190,-0.924851,-0.425275,-0.187174,-0.699799,-0.490530,-1.116274


In [None]:
|