In [1]:
import numpy as np
import pandas as pd
import random
import math

df = pd.read_csv("HousingPrice.csv")

Price = df['price']
FloorArea = df['lotsize']
NoOfBedrooms = df['bedrooms']
NoOfBathrooms = df['bathrms']

## Feature scaling on FlooeArea

In [2]:
FloorAreaMean = np.mean(FloorArea)
FloorAreaMin = min(FloorArea)
FloorAreaMax = max(FloorArea)
FloorAreaScaled = (FloorArea - FloorAreaMean)/(FloorAreaMax - FloorAreaMin)
FloorArea, FloorAreaScaled

(0      5850
 1      4000
 2      3060
 3      6650
 4      6360
        ... 
 541    4800
 542    6000
 543    6000
 544    6000
 545    6000
 Name: lotsize, Length: 546, dtype: int64,
 0      0.048092
 1     -0.079056
 2     -0.143661
 3      0.103075
 4      0.083143
          ...   
 541   -0.024073
 542    0.058401
 543    0.058401
 544    0.058401
 545    0.058401
 Name: lotsize, Length: 546, dtype: float64)

## Creating useful variables

In [3]:
n, m = df.shape
n, m

(546, 13)

In [4]:
n_train = math.floor(n * 0.70)
n_test = n - n_train
n_train, n_test

(382, 164)

In [6]:
col = [1] * n
data_scaling = list(zip(col,FloorAreaScaled,NoOfBedrooms,NoOfBathrooms))
len(data_scaling)

546

In [116]:
# X_train, Y_train, X_test, Y_test with scaling
X_train = np.array(data_scaling[:n_train])
Y_train = np.array(Price[:n_train])

X_test = np.array(data_scaling[n_train+1:])
Y_test = np.array(Price[n_train+1:])

In [117]:
X_train.shape

(382, 4)

## Function to find slope

In [118]:
def Slope(theta, X_train, Y_train):
    slope = np.zeros(theta.shape)
    for i in range(len(X_train)):
        itr = 0
        for j in range(theta.shape[1]):
            itr = itr + theta[0][j] * X_train[i][j]
        slope += (itr - Y_train[i]) * X_train[i]
    return slope

# Using batch gradient without regularisation

In [119]:
lr = 0.001
theta = np.zeros((1,4))
theta

array([[0., 0., 0., 0.]])

In [120]:
for i in range(5000):
    theta = theta - (lr/n_train)*Slope(theta,X_train,Y_train)
theta

array([[ 7645.16561033,  7527.93232483, 10966.99872425, 18651.19085438]])

In [121]:
theta = theta.reshape((4,1))
theta.shape

(4, 1)

In [123]:
Y_pred = np.dot(X_test, theta)
error = 0
for i in range(n_test-1):
    error += abs((Y_test[i] - Y_pred[i][0])/Y_test[i])
error /= n_test
error

0.20342942821184606

# Using batch gradient with regularisation

In [124]:
lr = 0.001
Lambda = -120
theta = np.zeros((1,4))
theta

array([[0., 0., 0., 0.]])

In [125]:
for epochs in range(5000):
    cof = 1 - ((lr*Lambda)/n_train)
    theta = cof*theta - (lr/n_train)*Slope(theta, X_train,Y_train)
theta

array([[10854.7790114 , 18034.6716255 ,  5068.24913908, 31438.87102068]])

In [126]:
theta = theta.reshape((4,1))
theta.shape

(4, 1)

In [127]:
theta

array([[10854.7790114 ],
       [18034.6716255 ],
       [ 5068.24913908],
       [31438.87102068]])

In [128]:
Y_pred = np.dot(X_test, theta)
error = 0
for i in range(n_test-1):
    error += abs((Y_test[i] - Y_pred[i])/Y_test[i])
error = error/n_test

print("MEAN ABSOLUTE PERCENTAGE ERROR: ")
float(error*100)

array([0.18921248])

# Using Stochastic gradient without regularisation.

In [165]:
# FUNCTION TO FIND SLOPE FOR STOCHASTIC DESCENT
def SlopeStoch(theta, X_train, Y, ind):
	itr = 0
	for j in range(theta.shape[1]):
		itr = itr + theta[0][j]*X_train[j]
	return (itr - Y) * X_train[ind]


In [176]:
lr = 0.00001
theta = np.zeros((1,4))
theta

array([[0., 0., 0., 0.]])

In [177]:
for iter in range(10):
    for i in range(X_train.shape[0]):
        theta = theta - (lr)*SlopeStoch(theta, X_train, Y_train[i], i)
theta

array([[2222.39526393,   16.94489919, 5546.26936474, 3125.24588452]])

In [178]:
theta = theta.reshape((4,1))
theta.shape

(4, 1)

In [180]:
Y_pred = np.dot(X_test, theta)
error = 0
for i in range(n_test-1):
    error += abs((Y_test[i] - Y_pred[i])) / Y_test[i]
error = error/n_test

print("MEAN ABSOLUTE PERCENTAGE ERROR: ")
float(error*100)

array([0.68200151])

# Using Stochastic gradient with Regularization

In [206]:
lr = 0.00001
Lambda = 30
theta = np.zeros((1,4))
theta

array([[0., 0., 0., 0.]])

In [207]:
for itr in range(10):
  for i in range(X_train.shape[0]):
    cof = 1-((Lambda*lr)/n_train)
    theta = cof*theta - lr*(SlopeStoch(theta, X_train, Y_train[i], i))
theta

array([[2219.19847494,   16.93408054, 5538.96715982, 3120.76286454]])

In [208]:
theta = theta.reshape((4,1))
theta.shape

(4, 1)

In [209]:
Y_pred = np.dot(X_test, theta)
error = 0
for i in range(n_test-1):
    error += abs((Y_test[i] - Y_pred[i])) / Y_test[i]
error = error/n_test

print("MEAN ABSOLUTE PERCENTAGE ERROR: ")
float(error*100)

array([0.68242224])

# Using Minibatch gradient without regularisation for batch size = 20

In [295]:
# USEFUL FUNCTIONS FOR MINI BATCH GRADIENT 
def product(w,x):
    ans=0
    for i in range(x.shape[0]):
        ans= ans + w[0][i]*x[i]
    return ans

def gradient(x,w,y,st,bs):
    ans=np.zeros(w.shape)
    for i in range(st,st+bs):
        if(i>=x.shape[0]):
          break
        ans=(ans+(product(w,x[i])-y[i])*x[i])
    return ans


In [296]:
BatchSize = 20;
lr = 0.0002
theta = np.zeros((1,4))
theta

array([[0., 0., 0., 0.]])

In [297]:
for itr in range(50):
  for i in range(BatchSize):
    theta = theta - (lr*gradient(X_train,theta,Y_train,BatchSize*i,BatchSize))/BatchSize
theta

array([[ 4891.71936057,   221.38545923, 14598.66776056,  7156.34800691]])

In [298]:
theta = theta.reshape((4,1))
theta.shape

(4, 1)

In [299]:
Y_pred = np.dot(X_test, theta)
error = 0
for i in range(n_test-1):
    error += abs((Y_test[i] - Y_pred[i])/Y_test[i])
error = error/n_test

print("MEAN ABSOLUTE PERCENTAGE ERROR: ")
float(error*100)

array([0.25794843])

# Using Minibatch gradient with regularisation for batch size = 20

In [300]:
BatchSize = 20;
lr = 0.0002
Lambda = -20
theta = np.zeros((1,4))
theta

array([[0., 0., 0., 0.]])

In [301]:
for itr in range(50):
  for i in range(BatchSize):
    temp = 1-((Lambda*lr)/BatchSize)
    theta = (temp * theta) - (lr * gradient(X_train, theta, Y_train, BatchSize*i, BatchSize))/ BatchSize

theta

array([[ 5231.20289362,   247.60936576, 15585.22770405,  7672.11402056]])

In [302]:
theta = theta.reshape((4,1))
theta.shape

(4, 1)

In [308]:
Y_pred = np.dot(X_test, theta)
error = 0
for i in range(n_test-1):
    error += abs((Y_test[i] - Y_pred[i])/Y_test[i])
error = error/n_test

print("MEAN ABSOLUTE PERCENTAGE ERROR: ")
float(error*100)

MEAN ABSOLUTE PERCENTAGE ERROR: 


23.185650124145248