In [81]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
np.random.seed(25)


In [82]:
house = fetch_california_housing(as_frame=True)
y = house.target
x = house.data
x


Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25
...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32


In [83]:
def minmax(x):
    return (x - x.min()) / (x.max() - x.min())


In [84]:
x_scaled = minmax(x)
y_scaled  = minmax(y)
x_scaled

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,0.539668,0.784314,0.043512,0.020469,0.008941,0.001499,0.567481,0.211155
1,0.538027,0.392157,0.038224,0.018929,0.067210,0.001141,0.565356,0.212151
2,0.466028,1.000000,0.052756,0.021940,0.013818,0.001698,0.564293,0.210159
3,0.354699,1.000000,0.035241,0.021929,0.015555,0.001493,0.564293,0.209163
4,0.230776,1.000000,0.038534,0.022166,0.015752,0.001198,0.564293,0.209163
...,...,...,...,...,...,...,...,...
20635,0.073130,0.470588,0.029769,0.023715,0.023599,0.001503,0.737513,0.324701
20636,0.141853,0.333333,0.037344,0.029124,0.009894,0.001956,0.738576,0.312749
20637,0.082764,0.313725,0.030904,0.023323,0.028140,0.001314,0.732200,0.311753
20638,0.094295,0.333333,0.031783,0.024859,0.020684,0.001152,0.732200,0.301793


In [85]:
x_train,x_test,y_train,y_test = train_test_split(x_scaled,y_scaled,test_size=0.2,random_state=42)

print(f"X_train: {x_train.shape} Y_train: {y_train.shape} X_test: {x_test.shape} Y_test: {y_test.shape}")
x_scaled


X_train: (16512, 8) Y_train: (16512,) X_test: (4128, 8) Y_test: (4128,)


Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,0.539668,0.784314,0.043512,0.020469,0.008941,0.001499,0.567481,0.211155
1,0.538027,0.392157,0.038224,0.018929,0.067210,0.001141,0.565356,0.212151
2,0.466028,1.000000,0.052756,0.021940,0.013818,0.001698,0.564293,0.210159
3,0.354699,1.000000,0.035241,0.021929,0.015555,0.001493,0.564293,0.209163
4,0.230776,1.000000,0.038534,0.022166,0.015752,0.001198,0.564293,0.209163
...,...,...,...,...,...,...,...,...
20635,0.073130,0.470588,0.029769,0.023715,0.023599,0.001503,0.737513,0.324701
20636,0.141853,0.333333,0.037344,0.029124,0.009894,0.001956,0.738576,0.312749
20637,0.082764,0.313725,0.030904,0.023323,0.028140,0.001314,0.732200,0.311753
20638,0.094295,0.333333,0.031783,0.024859,0.020684,0.001152,0.732200,0.301793


In [86]:
epochs = 20000
alpha = 0.1
w = np.random.rand(x_train.shape[1])
b = 0.0


In [87]:
def pred(x, w, b):
    return np.dot(x, w) + b   

In [88]:
for i in range(epochs):
    y_pred = pred(x_train, w, b)

    error = y_pred - y_train  # shape (n,)

    dw = (2 / len(x_train)) * np.dot(x_train.T, error)   
    db = (2 / len(x_train)) * np.sum(error)

    w -= alpha * dw
    b -= alpha * db

    if i % 100 == 0:
        mse = np.mean(error**2)
        print(f"Epoch {i}: Error {mse:.4f}")
    elif(i==1000):
        mse = np.mean(error**2)
        print(f"Epoch {i}: Error {mse:.4f}")        
        
    

Epoch 0: Error 0.3974
Epoch 100: Error 0.0349
Epoch 200: Error 0.0323
Epoch 300: Error 0.0309
Epoch 400: Error 0.0299
Epoch 500: Error 0.0292
Epoch 600: Error 0.0286
Epoch 700: Error 0.0281
Epoch 800: Error 0.0277
Epoch 900: Error 0.0273
Epoch 1000: Error 0.0270
Epoch 1100: Error 0.0266
Epoch 1200: Error 0.0264
Epoch 1300: Error 0.0261
Epoch 1400: Error 0.0258
Epoch 1500: Error 0.0256
Epoch 1600: Error 0.0254
Epoch 1700: Error 0.0252
Epoch 1800: Error 0.0250
Epoch 1900: Error 0.0249
Epoch 2000: Error 0.0247
Epoch 2100: Error 0.0246
Epoch 2200: Error 0.0245
Epoch 2300: Error 0.0243
Epoch 2400: Error 0.0242
Epoch 2500: Error 0.0241
Epoch 2600: Error 0.0240
Epoch 2700: Error 0.0239
Epoch 2800: Error 0.0239
Epoch 2900: Error 0.0238
Epoch 3000: Error 0.0237
Epoch 3100: Error 0.0236
Epoch 3200: Error 0.0236
Epoch 3300: Error 0.0235
Epoch 3400: Error 0.0235
Epoch 3500: Error 0.0234
Epoch 3600: Error 0.0234
Epoch 3700: Error 0.0233
Epoch 3800: Error 0.0233
Epoch 3900: Error 0.0233
Epoch 4000: 

In [89]:
x_test

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
20046,0.081468,0.470588,0.023720,0.020423,0.038930,0.002563,0.374070,0.531873
3024,0.140095,0.568627,0.029726,0.025499,0.043779,0.001599,0.276302,0.487052
15663,0.205528,1.000000,0.022196,0.025273,0.036632,0.000538,0.558980,0.190239
20484,0.361216,0.313725,0.037696,0.020362,0.047703,0.002215,0.184910,0.560757
9814,0.222418,0.647059,0.032942,0.020594,0.029709,0.001442,0.433581,0.241036
...,...,...,...,...,...,...,...,...
15362,0.283106,0.294118,0.043641,0.021730,0.037781,0.001848,0.087141,0.710159
16623,0.153563,0.529412,0.037471,0.027374,0.046162,0.001326,0.299681,0.350598
18086,0.602054,0.470588,0.045310,0.018197,0.044340,0.001688,0.506908,0.229084
2144,0.157591,0.686275,0.031496,0.019263,0.034306,0.001526,0.449522,0.457171


In [90]:
final_pred = pred(x_test,w,b)

In [91]:
y_min = y.min()
y_max = y.max()


y_test_real = y_test * (y_max - y_min) + y_min
final_pred_real = final_pred * (y_max - y_min) + y_min
from sklearn.metrics import r2_score
r2 = r2_score(y_test, final_pred)
print(f"R2 score:{r2:.2f}")

mae = np.mean(np.abs(y_test_real - final_pred_real)) * 100000
actual_error = np.mean(np.abs(y_test_real - final_pred_real))
print("Actual in dollars:", actual_error)

print("MAE in dollars:", mae)


R2 score:0.58
Actual in dollars: 0.5413651581863382
MAE in dollars: 54136.51581863382


In [92]:
final_pred = pd.DataFrame(final_pred,columns=["MedHouseVal"])
final_pred

Unnamed: 0,MedHouseVal
0,0.120139
1,0.330742
2,0.482784
3,0.562954
4,0.526618
...,...
4123,0.417724
4124,0.454883
4125,0.885229
4126,0.234340


In [93]:

y_test = pd.DataFrame(y_test, columns=["MedHouseVal"])
y_test

Unnamed: 0,MedHouseVal
20046,0.067424
3024,0.063507
15663,1.000000
20484,0.419794
9814,0.542268
...,...
15362,0.511959
16623,0.519175
18086,1.000000
2144,0.118146
