### House Price Prediction using Multiple-Gradient Descent Algorithm

#### Importing the required libraries

In [3]:
import numpy as np
import pandas as pd
import math

#### Importing the data

In [4]:
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
data = pd.read_csv('./housing.csv', header=None, delimiter=r"\s+", names=column_names)
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


#### Scaling the dataset [Standardization of the data]

In [5]:
temp = data.drop(['MEDV'],axis =1)
X = np.array(temp)
Y = np.array(data['MEDV'])

from sklearn import preprocessing
standard = preprocessing.scale(X)
standard

array([[-0.41978194,  0.28482986, -1.2879095 , ..., -1.45900038,
         0.44105193, -1.0755623 ],
       [-0.41733926, -0.48772236, -0.59338101, ..., -0.30309415,
         0.44105193, -0.49243937],
       [-0.41734159, -0.48772236, -0.59338101, ..., -0.30309415,
         0.39642699, -1.2087274 ],
       ...,
       [-0.41344658, -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.98304761],
       [-0.40776407, -0.48772236,  0.11573841, ...,  1.17646583,
         0.4032249 , -0.86530163],
       [-0.41500016, -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.66905833]])

#### Separating the dataset into train and test data as 80% and 20% respectively.

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size =0.2,random_state=0)
print("x_train shape : ", X_train.shape)
print("x_test shape  : ", X_test.shape)
print("y_train shape : ", Y_train.shape)
print("y_test shape  : ", Y_test.shape)


x_train shape :  (404, 13)
x_test shape  :  (102, 13)
y_train shape :  (404,)
y_test shape  :  (102,)


#### Implementing the logic of the algorithm using Gradient Descent Function.

In [7]:
def mulgradientDescent(X,Y):
    lR = 0.000001
    iterations = 50000
    num_rows,num_cols = X.shape
    
    temp = np.ones(num_rows)
    X = np.insert(X,0,temp,axis = 1)
    
    theta = np.random.random(size = num_cols + 1)
       
    

    for i in range(iterations):
        transX = np.transpose(X)
        Xtheta = np.dot(X,theta)  
        
        diff = np.subtract(Xtheta,Y)
    
        res = np.dot(transX, diff)
        temp = ((lR/float(num_rows)) * res)
        theta = np.subtract(theta,temp)
    
    return theta

#### Train the model and plot the data

In [8]:
theta = mulgradientDescent(X_train,Y_train)

theta0 = theta[0]
theta = np.delete(theta,0)

#### Predicting the values for test data.

In [9]:

Y_test_est = np.dot(X_test,theta)
Y_test_est = Y_test_est + theta0 
print("Test Y value:")
print(Y_test)

print("\n\nEstimated Y Value:")
print(Y_test_est)

Test Y value:
[22.6 50.  23.   8.3 21.2 19.9 20.6 18.7 16.1 18.6  8.8 17.2 14.9 10.5
 50.  29.  23.  33.3 29.4 21.  23.8 19.1 20.4 29.1 19.3 23.1 19.6 19.4
 38.7 18.7 14.6 20.  20.5 20.1 23.6 16.8  5.6 50.  14.5 13.3 23.9 20.
 19.8 13.8 16.5 21.6 20.3 17.  11.8 27.5 15.6 23.1 24.3 42.8 15.6 21.7
 17.1 17.2 15.  21.7 18.6 21.  33.1 31.5 20.1 29.8 15.2 15.  27.5 22.6
 20.  21.4 23.5 31.2 23.7  7.4 48.3 24.4 22.6 18.3 23.3 17.1 27.9 44.8
 50.  23.  21.4 10.2 23.3 23.2 18.9 13.4 21.9 24.8 11.9 24.3 13.8 24.7
 14.1 18.7 28.1 19.8]


Estimated Y Value:
[14.08994898 25.94799608 27.73194582 18.09166331 20.01605586 23.8849725
 22.22599421 25.04592929 16.49726911 12.80580765  1.59795018  7.81568633
 19.45826283 12.42369517 32.08291087 27.82824819 24.1382973  30.97766065
 21.23843588 21.95639028 24.95502226 22.22112494 20.42425416 28.48382567
 20.84804825 13.79623886 23.02462188 16.89481938 26.47915053 17.97204351
 20.82193885 22.88238299 27.38795065 25.0333929  22.99838338 22.24568838
 14.409507

#### Calculating the error / accuracy of the model using root mean squared error

In [10]:
MSE = np.square(np.subtract(Y_test,Y_test_est)).mean()

RMSE = math.sqrt(MSE)
print("Root Mean Square Error:\n")
print(RMSE)

Root Mean Square Error:

7.316105509216002
