In [21]:
import pandas as pd
import numpy as np

# Load the dataset


df = pd.read_csv('housing.csv')

In [14]:

def hypothesis(X, w):
    return np.dot(X, w)

In [15]:
def loss(X, y, w):
    m = len(y)
    predictions = hypothesis(X, w)
    errors = predictions - y
    return 1 / (2 * m) * np.sum(errors ** 2)

In [35]:

def gradient_descent_step(X, y, w, learning_rate, lambda_):
    m = len(y)
    predictions = hypothesis(X, w)
    errors = predictions - y
    gradient = 1 / m * np.dot(X.T, errors)
    # Add L2 regularization term to the gradient
    gradient += (2 * lambda_ / m) * w
    w -= learning_rate * gradient
    return w

In [34]:
def gradient_descent(X, y, learning_rate, num_iterations, lambda_):
    m, n = X.shape
    w = np.zeros(n)
    for _ in range(num_iterations):
        w = gradient_descent_step(X, y, w, learning_rate, lambda_)
    return w


In [11]:
def normalization(data):

  mean = np.mean(data)
  value_range = np.max(data) - np.min(data)
  result = []
  for x in data:
    norm_x = (x-mean)/value_range
    result.append(norm_x)

  return result

In [66]:
norm = pd.DataFrame()


norm['price'] = normalization(df.price)
norm['area'] = normalization(df.area)
norm['bathrooms'] = normalization(df.bathrooms)
norm['bedrooms'] = normalization(df.bedrooms)
print(norm.head(5))

X = norm[['area', 'bathrooms', 'bedrooms']]
y = norm['price']


X = X.values
y = y.values
lambda_ = 0.01
# Initialize weights
w_0 = 0
w_1 = 0

learning_rate = 0.8
num_iterations = 1000

# Gradient descent
w_gradient_descent = gradient_descent(X, y, learning_rate, num_iterations, lambda_)

w_analytical = np.linalg.pinv(X.T @ X) @ X.T @ y
print('Analytical Solution - Optimal w:', w_analytical)

print("Best parameters using gradient descent:", w_gradient_descent)


      price      area  bathrooms  bedrooms
0  0.738811  0.155977   0.237920  0.206972
1  0.647902  0.261818   0.904587  0.206972
2  0.647902  0.330547   0.237920  0.006972
3  0.644872  0.161475   0.237920  0.206972
4  0.575175  0.155977  -0.095413  0.206972
Analytical Solution - Optimal w: [0.47714269 0.36001286 0.17611257]
Best parameters using gradient descent: [0.47643498 0.3596789  0.17606596]
