In [None]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler

In [None]:
# Download latest version
home = pd.read_csv("housing.csv")
home.fillna(0)

In [None]:
# Feauturize
num_cols = ["price",'area', 'bedrooms','bathrooms','stories','parking']
categorical_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']

# One-hot encode categorical features
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(home[categorical_cols])

# Normalize house data numerical features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(home[num_cols])
x = np.concatenate([encoded_features.toarray(), scaled_features], axis=1)
y = home['price']
X = x
Y = y

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=42)

In [None]:
# initialize weights
def initialize_weights(row):
    # create a zero vector same as the number of columns of the input
    w = np.zeros(row.shape[1]) 
    # bias b is a scalar value
    b = np.array([0], dtype = np.float64) 
    return w,b

In [None]:
# compute gradient w.r.t w
def gradient(x, y, w, b, n, reg_lambda):
    yhat = np.dot(x, w) + b #predicted values
    gradient_w = x.T.dot(yhat - y) + (reg_lambda * w / n) #Weight gradient
    gradient_b = np.sum(yhat - y) #Bias gradient
    return gradient_w, gradient_b

In [None]:
def model(X, y, epochs, lr, reg_lambda, batch_size = 1):
    # n: total number of datapoints
    n = len(X)
    # initialize weights
    w, b = initialize_weights(X)
    for epoch in range(epochs):
        # for each epoch we will perform SGD till we reach all the points.
        for idx in range(0, n, batch_size):
            # create a batch for X and y using the batch_size
            X_batch = X[idx:idx+batch_size,:]
            y_batch = y[idx:idx+batch_size]
            # Calculate the gradients of the loss function 
            dw, db = gradient(X_batch, y_batch, w, b, n, reg_lambda)
            dw = np.sum(dw, axis = 0)
            db = np.sum(db, axis = 0)
            # update the weights using the gradients derived above
            w -= lr*dw
            b -= lr*db
    return w,b

In [None]:
#hyperparemeters
epochs = 20
reg_lambda = 0.0001
lr =0.01

w,b = model(X_train, Y_train, epochs, lr, reg_lambda, batch_size = 1)

In [None]:
y_pred = np.dot(X_test,w) + b
mse = mean_squared_error(Y_test,y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(Y_test, y_pred)
r2 = r2_score(Y_test, y_pred)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2)

print()

for actual, predicted in zip(Y_test, y_pred):
    print("Actual Price:", actual)
    print("Predicted Price:", predicted)
    print()