# Simple Gradient Descent Demo

# 1. Importing data

In [3]:
import pandas as pd
import numpy as np


In [4]:
sample_data_df = pd.read_csv(r"F:\Data Science Grand Track\Machine Learning\datasets\SampleDataGE.csv")
display(sample_data_df)

Unnamed: 0,Size (sq. m),No. rooms,Age (yrs),Price
0,102,4,2,190257
1,67,4,29,216410
2,74,2,27,132887
3,63,4,1,197341
4,114,3,12,127647
5,109,5,9,225818
6,73,3,21,336404
7,108,4,24,324823
8,69,2,29,313475
9,81,4,11,144088


# 2. Setup hyperparameters

## Setup learning rate, theta (bias + weight) matrix

In [14]:
# Learning rate
lr_rate = 0.0001

# Theta array [b w_1 w_2 w_3]
theta = np.zeros(4)

# Number of epoch
epochs = 10

# Print theta, learning_rate, epoch count:
print(lr_rate)
print(theta)
print(epochs)



0.0001
[0. 0. 0. 0.]
10


## Setup feature matrix

In [6]:
# Feature vectors
X = sample_data_df[['Size (sq. m)','No. rooms', 'Age (yrs)']]
print(X)

# Features 
y = sample_data_df['Price']
print(y)

    Size (sq. m)  No. rooms  Age (yrs)
0            102          4          2
1             67          4         29
2             74          2         27
3             63          4          1
4            114          3         12
5            109          5          9
6             73          3         21
7            108          4         24
8             69          2         29
9             81          4         11
10            60          5          9
11            62          5         32
12            87          4         19
13            61          5         15
14            62          4          9
15            69          5         21
16           119          2         24
17           101          3         21
18            76          4         20
19            91          4          7
20            75          4         19
0     190257
1     216410
2     132887
3     197341
4     127647
5     225818
6     336404
7     324823
8     313475
9     144088
10    182709

In [7]:
print(X.shape)
print(y.shape)

(21, 3)
(21,)


# 3. Initialize training

In [None]:
"""Pseudocode:

for epoch from 1 to epochs:

    # a. Calculate predicted values
    # Matrix multiplication: X (m, n) dot theta (n, 1) -> y_pred (m, 1)
    y_predicted = X . theta

    # b. Calculate the loss (cost function)
    # Mean Squared Error with L2 Regularization (Ridge)
    loss = (1 / (2 * m)) * sum((y_predicted - y)^2) + (lambda / (2 * m)) * sum(theta[1:]^2)

    # c. Calculate the gradient
    # This is the derivative of the loss function
    # It tells us the direction to update the weights
    gradient = (1 / m) * X.transpose() . (y_predicted - y) + (lambda / m) * theta

    # d. Update the weights
    # Move the weights in the opposite direction of the gradient
    theta = theta - learning_rate * gradient
    
"""

## Add a bias column (column full of 1s) into X

In [8]:
# Create a column of ones with the same number of rows as X
bias_column = np.ones((X.shape[0], 1))

# Horizontally stack the bias column with X
X = np.hstack((bias_column, X))

In [13]:
# Display X and X.shape
display(X)
print(X.shape)

array([[  1., 102.,   4.,   2.],
       [  1.,  67.,   4.,  29.],
       [  1.,  74.,   2.,  27.],
       [  1.,  63.,   4.,   1.],
       [  1., 114.,   3.,  12.],
       [  1., 109.,   5.,   9.],
       [  1.,  73.,   3.,  21.],
       [  1., 108.,   4.,  24.],
       [  1.,  69.,   2.,  29.],
       [  1.,  81.,   4.,  11.],
       [  1.,  60.,   5.,   9.],
       [  1.,  62.,   5.,  32.],
       [  1.,  87.,   4.,  19.],
       [  1.,  61.,   5.,  15.],
       [  1.,  62.,   4.,   9.],
       [  1.,  69.,   5.,  21.],
       [  1., 119.,   2.,  24.],
       [  1., 101.,   3.,  21.],
       [  1.,  76.,   4.,  20.],
       [  1.,  91.,   4.,   7.],
       [  1.,  75.,   4.,  19.]])

(21, 4)


In [None]:
# Looping over each epoch
for epoch in range(0,epochs):
    # 1. Calculate predicted value:
    y_pred = X.dot(theta)
    
    
    # 2. Calculate MSE:
    mean_square_err = mse = (1/y.shape[0]) * np.sum((y_pred - y)**2)
    print("MSE" + str(mean_square_err))
    

    # 3. Calculate the gradient:
    gradient = (2/y.shape[0]) * X.transpose().dot(y_pred-y)
    print(gradient)
    

    # 4. Update the theta:
    theta = theta - lr_rate * gradient
    print(theta)
    



MSE6689048904.2570095
[ -19765.35079778   99214.52332469 -108463.64088806 -591696.99126483]
[  51.4417634  2382.78411615  229.27397522 1216.31137073]
MSE6652222952.183059
[ -19272.51123782  130212.84853601 -106784.06047395 -573829.24581798]
[  53.36901453 2369.76283129  239.95238127 1273.69429531]
MSE6616763233.270189
[ -19351.14869223  112163.26063205 -107235.50650663 -565944.73072477]
[  55.3041294  2358.54650523  250.67593192 1330.28876839]
MSE6582610286.9587965
[ -19160.3490316   117545.87648588 -106677.10555514 -553616.41542971]
[  57.2201643  2346.79191758  261.34364248 1385.65040993]
MSE6549712973.619425
[ -19100.72506437  111825.17619027 -106605.27296103 -543712.32354666]
[  59.13023681 2335.60939996  272.00416977 1440.02164228]
MSE6518023465.158477
[ -18981.34499497  111454.99802191 -106306.9146335  -532954.21870492]
[  61.02837131 2324.46390016  282.63486123 1493.31706415]
MSE6487496054.024983
[ -18893.12906376  108594.49607264 -106121.69503829 -522898.32633275]
[  62.9176842