In [153]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [154]:
## Importing necessary libraries

| Size (sqft) | Number of Bedrooms | Number of Floors | Age of Home | Price (1000s dollars) |
|-------------|--------------------|------------------|-------------|------------------------|
| 2104        | 5                  | 1                | 45          | 460                    |
| 1416        | 3                  | 2                | 40          | 232                    |
| 852         | 2                  | 1                | 35          | 178                    |


In [155]:
## This is our dataset that we will be working forward with
## Here, the training set has four features, namely size of the house, number of bedrooms, number of floors and age of home
## The training label is the price of the house in 1000s of dollars i.e; Price column

In [156]:
X_train = np.array([[2104, 5, 1, 45],
                    [1416, 3, 2, 40],
                    [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [157]:
## The number of training examples
print(f"The number of training sets : {X_train.shape[0]}")
print(f"The number of features : {X_train.shape[1]}")

The number of training sets : 3
The number of features : 4


In [158]:
## Lets take some temporary weights and bias
w = np.random.rand(4)
b = 0.3
print(f"Random assigned weights : {w}")
print(f"Random bias assigned : {b}")

Random assigned weights : [0.93736908 0.31307521 0.27000226 0.45072724]
Random bias assigned : 0.3


### Calculate the cost function

$$
J(\theta) = \frac{1}{2m} \sum_{i=1}^{m} 
\Big( \theta_0 + \theta_1 x_1^{(i)} + \theta_2 x_2^{(i)} + \theta_3 x_3^{(i)} + \theta_4 x_4^{(i)} - y^{(i)} \Big)^2
$$


In [159]:
## Function to calculate the cost function
def compute_cost_function(X_train, y_train, w, b):

    m = X_train.shape[0] ## Number of training sets
    n = X_train.shape[1] ## Number of features

    y_pred = np.dot(X_train, w.reshape(n, 1)) + b

    total_cost = 0

    for i in range(m):
        total_cost += (y_pred[i] - y_train[i])**2

    total_cost /= (2 * m)

    return total_cost

In [160]:
compute_cost_function(X_train, y_train, w, b)

array([667528.97282805])

### Similarly get the derivatives

$$
\frac{\partial J(\theta)}{\partial w_j} 
= \frac{1}{m} \sum_{i=1}^{m} 
\Big( \big( \theta_0 + \theta_1 x_1^{(i)} + \theta_2 x_2^{(i)} + \theta_3 x_3^{(i)} + \theta_4 x_4^{(i)} \big) - y^{(i)} \Big) \cdot x_j^{(i)}, 
\quad j = 1,2,3,4
$$


$$
\frac{\partial J(\theta)}{\partial b} 
= \frac{1}{m} \sum_{i=1}^{m} 
\Big( \big( \theta_0 + \theta_1 x_1^{(i)} + \theta_2 x_2^{(i)} + \theta_3 x_3^{(i)} + \theta_4 x_4^{(i)} \big) - y^{(i)} \Big)
$$


In [161]:
def calculate_derivative(X_train, y_train, w, b):

    m = X_train.shape[0] ## Number of training sets
    n = X_train.shape[1] ## Number of features

    y_pred = np.dot(X_train, w.reshape(n, 1)) + b

    d_dw = np.zeros((n,))
    d_db = 0

    for i in range(m):
        for j in range(n):
            d_dw[j] += (y_pred[i][0] - y_train[i]) * X_train[i, j]
        d_db += y_pred[i] - y_train[i]

    d_dw /= m
    d_db /= m

    return d_dw, d_db  

In [162]:
calculate_derivative(X_train, y_train, w, b)

(array([1.78371533e+06, 4.09793407e+03, 1.46749953e+03, 4.53267297e+04]),
 array([1095.79188561]))

In [163]:
## initialize variables
## w and b we have inititalized
w = np.zeros((4,))
b = 0
iterations = 500
alpha = 0.0001
X_train = (X_train - np.mean(X_train, axis = 0))/np.std(X_train, axis = 0)
y_train = (y_train - np.mean(y_train))/np.std(y_train)

In [164]:
X_train

array([[ 1.26311506,  1.33630621, -0.70710678,  1.22474487],
       [-0.08073519, -0.26726124,  1.41421356,  0.        ],
       [-1.18237987, -1.06904497, -0.70710678, -1.22474487]])

In [165]:
def gradient_descent(X_train, y_train, w, b, alpha, iteration):

    m = X_train.shape[0] ## Number of training examples
    n = X_train.shape[1] ## Number of features

    total_cost = compute_cost_function(X_train, y_train, w, b)
    d_dw, d_db = calculate_derivative(X_train, y_train, w, b)

    w = w - (alpha * d_dw)
    b = b - (alpha * d_db)

    return iteration, total_cost, w, b

In [166]:
gradient_descent(X_train, y_train, w, b, alpha, 0)

(0,
 array([0.5]),
 array([ 9.59633187e-05,  9.88455824e-05, -3.35579825e-05,  9.42011773e-05]),
 array([-3.70074342e-21]))

In [167]:
iteration_num = []
costs = []

for i in range(iterations):
    iteration, total_cost, w, b = gradient_descent(X_train, y_train, w, b, alpha, i)
    iteration_num.append(iteration)
    costs.append(total_cost)

In [168]:
df = pd.DataFrame(iteration_num, columns = ['iterations'])
df['costs'] = costs

In [169]:
df.head()

Unnamed: 0,iterations,costs
0,0,[0.5]
1,1,[0.49971024879389603]
2,2,[0.49942066902022236]
3,3,[0.4991312605768172]
4,4,[0.4988420233615795]
