<a href="https://colab.research.google.com/github/AmaanAmythAlegend/ML-Stuff/blob/main/Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import copy, math

In [None]:
data = pd.read_csv("housing_data.csv")

In [None]:
data.head()

Unnamed: 0,Zip Code,Price,Beds,Baths,Living Space,Address,City,State,Zip Code Population,Zip Code Density,County,Median Household Income,Latitude,Longitude
0,10013,3999000.0,2,3,1967,74 GRAND ST APT 3,New York,New York,29563,20967.9,New York,370046.0,40.72001,-74.00472
1,10013,3999000.0,2,3,1967,74 GRAND ST APT 3,New York,New York,29563,20967.9,New York,370046.0,40.72001,-74.00472
2,10014,1650000.0,1,1,718,140 CHARLES ST APT 4D,New York,New York,29815,23740.9,New York,249880.0,40.73407,-74.00601
3,10014,760000.0,3,2,1538,38 JONES ST,New York,New York,29815,23740.9,New York,249880.0,40.73407,-74.00601
4,10014,1100000.0,1,1,600,81 BEDFORD ST APT 3F,New York,New York,29815,23740.9,New York,249880.0,40.73407,-74.00601


In [None]:
Y = data["Price"].values
X = data.iloc[:, [2,3,4,11]].values
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.95, random_state = 40)
Y_train

array([384900., 419900., 175000., ..., 179900., 325000., 250000.])

## Polynomial Regression and Feature Scaling

In [None]:
P_train = np.c_[X_train, X_train**2, X_train**3, X_train**4, X_train**5, X_train**6]

def zscore_normalize_features(X,rtn_ms=False):
    mu     = np.mean(X,axis=0)
    sigma  = np.std(X,axis=0)
    X_norm = (X - mu)/sigma

    if rtn_ms:
        return(X_norm, mu, sigma)
    else:
        return(X_norm)

P_train = zscore_normalize_features(P_train)
P_train.shape

(1999, 24)

## Initial Parameters

In [None]:
b_init = 100000.
w_init = np.array([100000. for i in range(24)])

In [None]:
def predict(x, w, b):
  p = np.dot(w,x) + b
  return p

In [None]:
def compute_cost(x, y, w, b):
  m = x.shape[0]
  cost = 0.
  for i in range(m):
    cost = cost + ((np.dot(x[i],w) + b - y[i])**2/(2*m))
  return cost

In [None]:
def compute_gradient(x, y, w, b):
  m, n = x.shape
  df_dw = np.zeros((n,))
  df_db = 0.0
  for i in range(m):
    err = np.dot(x[i],w) + b - y[i]
    df_dw = np.add(df_dw, err*x[i])
    df_db+=err

  df_dw /= m
  df_db /= m
  return df_dw, df_db

In [None]:
def gradient_descent(x, y, w_init, b_init, alpha, iter, cost, grad):
  w = copy.deepcopy(w_init)
  b = b_init
  J_his = []

  for i in range(iter):
    dj_dw, dj_db = grad(x,y,w,b)
    w = w - alpha*dj_dw
    b = b - alpha*dj_db
    if i%math.ceil(iter / 10) == 0:
      print("Iteration ", i, ": Cost ", cost(x, y, w, b))
      print("dj_dw is ", dj_dw)
      print("dj_db is ", dj_db)
      print()
  return w, b, J_his

In [None]:
iter = 10000
alpha = 0.1

## Computing minimum cost using gradient descent

In [None]:
w_final, b_final, J_his = gradient_descent(P_train, Y_train, w_init, b_init, alpha, iter, compute_cost, compute_gradient)
m,_ = X_train.shape

Iteration  0 : Cost  397187426163.2892
dj_dw is  [686544.89768612 637721.16624515 657078.72665629 260995.17348971
 823158.32215526 731272.21267142 669754.20152284 313393.3829022
 867343.45292457 772595.8136375  614766.88899718 345770.08165213
 815189.50872006 761866.54470658 545151.90694291 353150.96823338
 732590.05257608 720761.9642023  486554.08611495 347445.57627024
 659119.36940446 671332.22455929 441431.59288823 339471.27394776]
dj_db is  -519394.152576288

Iteration  1000 : Cost  235482442071.85785
dj_dw is  [   84.40201464  -352.37407721  -708.46964238  -577.26798411
  -300.61484794   830.14711709  1428.73016554  1036.82887449
    31.84590903   -97.42098607    22.34939665   200.89109503
   434.51127418  -711.0374786  -1283.99810869 -1002.5946593
   242.57391624  -261.04656273  -770.2186045   -774.73794934
  -516.66477071   607.2197553   1307.80789922  1133.47586941]
dj_db is  -3.8424626984197895e-10

Iteration  2000 : Cost  234356335459.19934
dj_dw is  [  328.99313995  -288.745

In [None]:
m,_ = P_train.shape

for i in range(m):
  print("Predicted: ", predict(P_train[i], w_final, b_final))
  print("Actual: ", Y_train[i])
  print()

## Computing minimum cost using the normal equation

In [None]:
b = np.ones((P_train.shape[0], 1))
c = np.hstack((b, P_train))
tran = c.T
f1 = np.matmul(tran, c)
f2 = np.linalg.inv(f1)
f3 = np.matmul(f2, tran)
final = np.matmul(f3, Y_train)
def compute_cost1(x, y, w):
  f = np.subtract(np.matmul(x,w), y)
  fl = f.T
  print(np.matmul(f.T, f)/(2*m))
compute_cost1(c, Y_train, final)

222062873718.62543


Not very far off from the gradient descent value...although training error is very high. Model suffers from high bias even when a sixth order polynomial was fitted onto the data.