In [200]:
import numpy as np
import pandas as pd
import random

hpdata = pd.read_csv("House Price.csv")

In [201]:
x = hpdata.drop(["Median Value of Houses in Lakhs"],axis=1).to_numpy()
y = hpdata["Median Value of Houses in Lakhs"].to_numpy()
train_index = random.sample(range(0,506), 354)
X_train = x[train_index]
y_train = y[train_index]

test_index = []
for i in range(0,506):
    if i not in train_index:
        test_index.append(i)
X_test = x[test_index]
y_test = y[test_index]

def linear_regression(X_train, y_train, X_test, y_test):
    X_train = np.c_[np.ones(X_train.shape[0]), X_train]
    X_test = np.c_[np.ones(X_test.shape[0]), X_test]
    w = np.linalg.inv(X_train.T @ X_train) @ X_train.T @ y_train
    y_pred = X_test @ w
    return y_pred

def sse(y_pred, y_test):
    return np.sum((y_pred - y_test)**2)

def mse(y_pred, y_test):
    return np.mean((y_pred - y_test)**2)

def r2(y_pred, y_test):
    return 1 - (np.sum((y_pred - y_test)**2)/np.sum((y_test - np.mean(y_test))**2))

y_pred = linear_regression(X_train, y_train, X_test, y_test)

print("SSE for Linear Regression: ", sse(y_pred, y_test))

def gradient_descent(X_train, y_train, X_test, y_test, learning_rate, epochs):
    X_train = np.c_[np.ones(X_train.shape[0]), X_train]
    X_test = np.c_[np.ones(X_test.shape[0]), X_test]
    w = np.zeros(X_train.shape[1])
    for i in range(epochs):
        y_pred = X_train @ w
        w = w - learning_rate * (X_train.T @ (y_pred - y_train))
    y_pred = X_test @ w
    return y_pred,w

y_pred_test,w = gradient_descent(X_train, y_train, X_test, y_test, 0.00000001, 100)
y_pred_train,w1 = gradient_descent(X_train, y_train, X_train, y_train, 0.00000001, 100)

print("\nFor 100 iterations")

print("SSE for Gradient Descent: ", sse(y_pred_test, y_test))

print("Coefficients of the Optimized model: ", w)
print("SSE for Training Data: ", sse(y_pred_train, y_train))
print("SSE for Testing Data: ", sse(y_pred_test, y_test))
print("MSE for train: ", mse(y_pred_train, y_train))
print("MSE for test: ", mse(y_pred_test, y_test))
print("R2 for train: ", r2(y_pred_train, y_train))
print("R2 for test: ", r2(y_pred_test, y_test))

y_pred_test,w = gradient_descent(X_train, y_train, X_test, y_test, 0.00000001, 1000000)
y_pred_train,w1 = gradient_descent(X_train, y_train, X_train, y_train, 0.00000001, 1000000)

print("\nFor 1000000 iterations")

print("SSE for Gradient Descent: ", sse(y_pred_test, y_test))

print("Coefficients of the Optimized model: ", w)
print("SSE for Training Data: ", sse(y_pred_train, y_train))
print("SSE for Testing Data: ", sse(y_pred_test, y_test))
print("MSE for train: ", mse(y_pred_train, y_train))
print("MSE for test: ", mse(y_pred_test, y_test))
print("R2 for train: ", r2(y_pred_train, y_train))
print("R2 for test: ", r2(y_pred_test, y_test))

SSE for Linear Regression:  3569.992625392454

For 100 iterations
SSE for Gradient Descent:  29414.60316119295
Coefficients of the Optimized model:  [ 0.00162501 -0.01604402  0.05447591 -0.00408422  0.00023888  0.00057082
  0.01229809  0.04429721  0.01043032 -0.01230131  0.03659058  0.02440911]
SSE for Training Data:  66051.77646652536
SSE for Testing Data:  29414.60316119295
MSE for train:  186.5869391709756
MSE for test:  193.51712606047994
R2 for train:  -1.2068483283716245
R2 for test:  -1.3238962719879805

For 1000000 iterations
SSE for Gradient Descent:  3850.3931771942734
Coefficients of the Optimized model:  [ 0.52096603 -0.18286438  0.04126938 -0.10129027  0.88833694  0.04902498
  6.94855258 -0.04724709 -1.04589486  0.24085107 -0.01530432 -0.48946061]
SSE for Training Data:  11598.410994918115
SSE for Testing Data:  3850.3931771942734
MSE for train:  32.76387286700032
MSE for test:  25.33153406048864
R2 for train:  0.6124868204131652
R2 for test:  0.695800269643793
