In [124]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [125]:
df = pd.read_csv('Housing.csv')
df

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished


In [126]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [127]:
X = df[['area', 'bathrooms', 'bedrooms']].values
y = df['price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [128]:
def hypo(X, theta):
    return np.dot(X, theta)

In [129]:
def compute_cost(X, y, theta):
    m = len(y)
    pred = hypo(X, theta)
    cost = (1/(2*m)) * np.sum(np.square(pred-y))
    return cost

In [130]:
def grad_step(X, y, theta, lear_rate):
    m = len(y)
    pred = hypo(X, theta)
    grad = (1/m) * np.dot(X.T, (pred - y))
    theta = theta - lear_rate * grad
    return theta

In [131]:
def grad_des(X, y, theta, lear_rate = 0.01, iterations = 1000):
    cost_history = []
    for i in range(iterations):
        theta = grad_step(X, y, theta, lear_rate)
        cost = compute_cost(X, y, theta)
        cost_history.append(cost)
    return theta, cost_history

In [132]:
def skl_reg(X, y):
    model = LinearRegression()
    model.fit(X, y)
    return model.coef_, model.intercept_

In [133]:
theta = np.zeros(X_train_scaled.shape[1])
theta_final, cost_history = grad_des(X_train_scaled, y_train, theta, lear_rate=0.01, iterations=1000)
theta_final

array([760642.94315328, 677847.72569795, 269437.21847893])

In [146]:
y_pred = hypo(X_test_scaled, theta_final)
mse_test = compute_cost(X_test_scaled, y_test, theta_final)
mse_test

np.float64(13166967617611.596)

In [138]:
coef_, intercept_ = skl_reg(X_train_scaled, y_train)
coef_, intercept_


(array([760642.88074097, 678224.07261084, 269048.2662201 ]),
 np.float64(4706527.385321101))

In [139]:
X_b = np.c_[np.ones((X_train_scaled.shape[0],1)),X_train_scaled]
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)
theta_best

array([4706527.3853211 ,  760642.88074097,  678224.07261084,
        269048.2662201 ])

In [150]:
X_test_b = np.c_[np.ones((X_test_scaled.shape[0], 1)), X_test_scaled]

y_pred_matrix = X_test_b.dot(theta_best)

mse_test_matrix = np.mean((y_pred_matrix - y_test)**2)
mse_test_matrix

np.float64(2750040479309.0527)