In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression

In [46]:
def h(X, w_vec):
    return np.dot(X, w_vec)

In [51]:
def loss_function(X, w_vec, y):
    m = y.size
    cost = (1 / (2 * m)) * np.sum(np.square(h(X, w_vec) - y))
    return cost

In [52]:
def gradient(X, w_vec, y):
    m = y.size
    y_pred = h(X, w_vec)
    grad = X.T @ (y_pred - y)
    return grad / m

In [65]:
def grad_descent(X, w_vec, y, num_iterations, learning_rate):
    
    best_loss = np.inf
    loss_history = []
    for i in range(num_iterations):    
        w_vec -= learning_rate * gradient(X, w_vec, y)

        loss = loss_function(X, w_vec, y)
        loss_history.append(loss)
    
        if loss < best_loss:
            best_loss = loss
            best_weights = w.copy()

    return best_weights, loss_history

In [36]:
def norm(data):
    data_mean = data.mean(axis = 0)
    data_std = data.std(axis = 0)
    return (data - data_mean) / data_std

In [18]:
df = pd.read_csv("Housing.csv")

In [19]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [67]:
X = df[["area", "bedrooms", "bathrooms"]].values
y = df["price"].values.reshape(-1, 1)

X_norm = norm(X)
y_norm = norm(y)

X_norm = np.hstack((np.ones((y.size, 1)), X_norm))

n = X.shape[1]
w = np.zeros((n + 1, 1))

learning_rate = 0.01
num_iterations = 2000

w, loss_history = grad_descent(X_norm, w, y_norm, num_iterations, learning_rate)

In [68]:
X_norm_analytical = np.hstack((np.ones((y.size, 1)), X_norm[:, 1:]))
w_analytical = np.linalg.inv(
    X_norm_analytical.T @ X_norm_analytical
    ) @ X_norm_analytical.T @ y_norm

In [71]:
print(f"Mean squared error (gradient descent): {w_analytical}")
print(f"Mean squared error (analytical solution): {w}")

Mean squared error (gradient descent): [[5.53376789e-16]
 [4.39452085e-01]
 [1.60528660e-01]
 [3.72344423e-01]]
Mean squared error (analytical solution): [[3.03679609e-16]
 [4.39452124e-01]
 [1.60528959e-01]
 [3.72344106e-01]]
