In [17]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler


data = pd.read_csv('Housing.csv')
data.dropna(inplace=True)

X = data[['area', 'bathrooms', 'bedrooms']].values
y = data['price'].values


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
def hypothesis(X, w):
  return np.dot(X, w)

def compute_cost(X, y, w):
  m = len(y)
  h = hypothesis(X, w)
  cost = (1 / (2*m)) * np.sum((h - y) ** 2)
  return cost

In [9]:
def gradient_descent_step(X, y, w, learning_rate):
    m = len(y)
    h = hypothesis(X, w)
    gradient = np.dot(X.T, (h - y)) / m
    w -= learning_rate * gradient
    return w

In [16]:
model = LinearRegression()
model.fit(X_train_scaled, y_train)

inercept = model.intercept_
coefficients = model.coef_

(intercept, coefficients)

(4706527.385321101, array([760642.88074097, 678224.07261084, 269048.2662201 ]))

In [14]:

X_b = np.hstack([np.ones((X_train.shape[0], 1)), X_train_scaled])
X_test_b = np.hstack([np.ones((X_test.shape[0], 1)), X_test_scaled])


def normal_equation(X, y):
    return np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

w_ne = normal_equation(X_b, y_train)




In [18]:
w_gd = np.hstack([intercept, coefficients])


comparison_df = pd.DataFrame({
    "Gradient Descent": w_gd,
    "Normal Equation": w_ne
})

comparison_df

Unnamed: 0,Gradient Descent,Normal Equation
0,4706527.0,4706527.0
1,760642.9,760642.9
2,678224.1,678224.1
3,269048.3,269048.3
