In [16]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import random

In [3]:
df = pd.read_csv("dataset/homeprices_banglore.csv")
df.head()

Unnamed: 0,area,bedrooms,price
0,1056,2,39.07
1,2600,4,120.0
2,1440,3,62.0
3,1521,3,75.0
4,1200,2,51.0


In [4]:
from sklearn import preprocessing
sx = preprocessing.MinMaxScaler()
sy = preprocessing.MinMaxScaler()

scaled_X = sx.fit_transform(df.drop('price',axis='columns'))
scaled_y = sy.fit_transform(df['price'].values.reshape(df.shape[0],1))

scaled_X

array([[0.08827586, 0.25      ],
       [0.62068966, 0.75      ],
       [0.22068966, 0.5       ],
       [0.24862069, 0.5       ],
       [0.13793103, 0.25      ],
       [0.12758621, 0.25      ],
       [0.6662069 , 0.75      ],
       [0.86206897, 0.75      ],
       [0.17586207, 0.5       ],
       [1.        , 1.        ],
       [0.34482759, 0.5       ],
       [0.68448276, 0.75      ],
       [0.06896552, 0.25      ],
       [0.10344828, 0.25      ],
       [0.5       , 0.5       ],
       [0.12931034, 0.25      ],
       [0.13103448, 0.5       ],
       [0.25517241, 0.5       ],
       [0.67931034, 0.5       ],
       [0.        , 0.        ]])

In [5]:
scaled_y

array([[0.05237037],
       [0.65185185],
       [0.22222222],
       [0.31851852],
       [0.14074074],
       [0.04444444],
       [0.76296296],
       [0.91111111],
       [0.13333333],
       [1.        ],
       [0.37037037],
       [0.8       ],
       [0.04444444],
       [0.05925926],
       [0.51111111],
       [0.07407407],
       [0.11851852],
       [0.20740741],
       [0.51851852],
       [0.        ]])

In [6]:
scaled_y.reshape(20,)

array([0.05237037, 0.65185185, 0.22222222, 0.31851852, 0.14074074,
       0.04444444, 0.76296296, 0.91111111, 0.13333333, 1.        ,
       0.37037037, 0.8       , 0.04444444, 0.05925926, 0.51111111,
       0.07407407, 0.11851852, 0.20740741, 0.51851852, 0.        ])

In [18]:
w = np.ones(shape=(scaled_X.shape[1]))
w

array([1., 1.])

In [13]:
scaled_X[3:3+10].shape

(10, 2)

In [24]:
def mbgd(X, y_true, epochs, batch_size, learning_rate = 0.01):
    w = np.ones(shape=(X.shape[1]))
    b = 0
    total_samples = X.shape[0]

    cost_list = []
    epoch_list = []

    for i in range(epochs):
        index = random.randint(0, total_samples-batch_size-1)
        mini_batch_x = X[index:index+batch_size]
        mini_batch_y = y_true[index:index+batch_size]

        y_predicted = np.dot(w, mini_batch_x.T) + b

        w_grad = -(2/total_samples)*(mini_batch_x.T.dot(mini_batch_y-y_predicted))
        b_grad = -(2/total_samples)*(mini_batch_y-y_predicted)
        
        w = w - learning_rate * w_grad
        b = b - learning_rate * b_grad
        print(b_grad)
        
        cost = np.mean(np.square(mini_batch_y-y_predicted))

        if i%10==0: # at every 10th iteration record the cost and epoch value
            cost_list.append(cost)
            epoch_list.append(i)
        
    return w, b, cost, cost_list, epoch_list

w_sgd, b_sgd, cost_sgd, cost_list_sgd, epoch_list_sgd = mbgd(scaled_X,scaled_y.reshape(scaled_y.shape[0],),10000,10)
w_sgd, b_sgd, cost_sgd

[0.03331418 0.06532439 0.07009579 0.05425287 0.1        0.04744572
 0.06344828 0.02745211 0.0294189  0.04888889]
[0.0427271  0.02452462 0.03311818 0.06480543 0.06947096 0.05397796
 0.09928265 0.04713894 0.06294854 0.0272953 ]
[0.04929501 0.04242797 0.02436147 0.03294696 0.06424088 0.06896316
 0.05364127 0.09863986 0.0468034  0.06244712]
[0.09793513 0.04648277 0.06192478 0.0269775  0.02883167 0.04774873
 0.02993056 0.05043934 0.05388702 0.06481005]
[0.05315709 0.09733088 0.04618945 0.06147685 0.02677511 0.02874667
 0.04739741 0.02981613 0.05023757 0.05360936]
[0.02384637 0.03245599 0.06295494 0.06745381 0.05290779 0.09675537
 0.04586527 0.06103604 0.02670686 0.02861276]
[0.05268949 0.0961001  0.04560635 0.06055449 0.02650949 0.02841821
 0.04675364 0.02950956 0.04979213 0.05309742]
[0.04107795 0.02349943 0.03217419 0.06204218 0.06644947 0.05243667
 0.09552115 0.04534367 0.06018079 0.0264265 ]
[0.0477001  0.04076977 0.02340735 0.03197052 0.06154398 0.06590747
 0.05211423 0.09491155 0.0450

(array([0.82833545, 0.37060616]),
 array([-0.12430678, -0.12538733, -0.11291118, -0.10834888, -0.11618703,
        -0.11751813, -0.11269326, -0.12322573, -0.13588866, -0.13931447]),
 0.001978947898103455)