In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [4]:
# Loading data

data_frame = pd.read_csv("homeprices_banglore.csv")
data_frame.sample(5)

Unnamed: 0,area,bedrooms,price
12,1000,2,38.0
16,1180,3,48.0
0,1056,2,39.07
8,1310,3,50.0
10,1800,3,82.0


In [6]:
# Scalling

from sklearn import preprocessing
sx = preprocessing.MinMaxScaler()
sy = preprocessing.MinMaxScaler()

scaled_X = sx.fit_transform(data_frame.drop('price',axis='columns'))
scaled_y = sy.fit_transform(data_frame['price'].values.reshape(data_frame.shape[0],1))
scaled_y.reshape(20,)

array([0.05237037, 0.65185185, 0.22222222, 0.31851852, 0.14074074,
       0.04444444, 0.76296296, 0.91111111, 0.13333333, 1.        ,
       0.37037037, 0.8       , 0.04444444, 0.05925926, 0.51111111,
       0.07407407, 0.11851852, 0.20740741, 0.51851852, 0.        ])

In [12]:
def get_MBGD(X, y_true, epochs = 100, batch_size = 5, learning_rate = 0.01):
#   X data_Frame contains all features , so it's easly to get featres number by counting its columns , and sambels number by counting its rows
    number_of_features = X.shape[1]
    total_samples = X.shape[0]
    bias = 0
    
#   At first we should assume values to the weights , after that it can be shanged
    w = np.ones(shape=(number_of_features)) 
    
#   The batch size should be less than or equal to rows number , so if the batch size shouldn't be greater than the batch size
    if batch_size > total_samples:
        batch_size = total_samples
        
#   We should divide the rows number over batch size to clearify how many batches will be used
    num_batches = int(total_samples/batch_size)
    
#   Now we are gonna calculate the batches over each epoche
    for i in range(epochs):
        
#       Now we're gonna clarify the boundaries of each batch using random numbers
        random_indices = np.random.permutation(total_samples)
        X_tmp = X[random_indices]
        y_tmp = y_true[random_indices]
        
#       After knowing the boundaries we should calculate the cost in each batch to ba able to upade our weights & bias
        for j in range(0,total_samples,batch_size):
            Xj = X_tmp[j:j+batch_size]
            yj = y_tmp[j:j+batch_size]
            y_predicted = np.dot(w, Xj.T) + b
            
#           To be able to upadte the weights and bias , we should calculate the first derivative for weights and bias
            w_grad = -(2/len(Xj))*(Xj.T.dot(yj-y_predicted))
            b_grad = -(2/len(Xj))*np.sum(yj-y_predicted)
            
#           After calculating , it should be multiplied by the learning rate to set the value that will be added or removed from the original value
            w = w - learning_rate * w_grad
            b = b - learning_rate * b_grad

#           After setting the new weights & bias , It's cost calculating time 
            cost = np.mean(np.square(yj-y_predicted)) # MSE (Mean Squared Error)
    
#   returning the new values after finished all epoches
    return w, b, cost


In [13]:
# Testing

w, b, cost = get_MBGD(
    scaled_X,
    scaled_y.reshape(scaled_y.shape[0],),
    epochs = 120,
    batch_size = 5
)
w, b, cost

(array([0.71012727, 0.67816608]), -0.23331035115479395, 0.0036735030822317555)

In [17]:
# Now we are gonna create the prediction function
def predict(area,bedrooms,w,b):
#   Data entered should be scalled
    scaled_X = sx.transform([[area, bedrooms]])[0]
#   To predict the price clearly we should use last weights & bias we calculated
    scaled_price = w[0] * scaled_X[0] + w[1] * scaled_X[1] + b
    return sy.inverse_transform([[scaled_price]])[0][0]

predict(3000,5,w,b)

164.78235040360536

In [18]:
predict(1500,5,w,b)

115.19587730543422

In [19]:
predict(750,4,w,b)

67.51453560663576