In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
import statsmodels.api as sm
from sklearn.metrics import r2_score
import time
from tqdm import tqdm
import math

In [None]:
X = np.array([0.8,1,1.2,1.4,1.6,1.8,2,2.2,2.4,2.6])
Y = np.array([0.7,0.65,0.90,0.95,1.1,1.15,1.2,1.4,1.55,1.5])
X.shape, Y.shape

In [None]:
X = X.reshape(X.shape[0],1)
Y = Y.reshape(Y.shape[0],1)

In [None]:
X.shape, Y.shape

In [None]:
# Backward Propogation
alpha = 0.01
epoch = 10000
m = Y.size
np.random.seed(123)
theta = np.random.rand(2)
theta = theta.reshape(theta.shape[0], 1)

## Gradient Descent

In [None]:
def GD(X, Y, theta, epoch, alpha):
    past_cost = []
    past_theta = [theta]
    for i in range(epoch):
        h_theta = np.dot(X, theta) # instead of theta you can also use past_theta[i]
        error = h_theta - Y
        cost = 1/(2*m)*np.dot(error.T, error)
        past_cost.append(cost)
        theta = theta - ( (1/m) * alpha * np.dot(X.T, error) ) 
        past_theta.append(theta)
    return past_theta, past_cost

In [None]:
past_theta, past_cost = GD(X, Y, theta, epoch, alpha)

In [None]:
best_theta = past_theta[-1]
best_theta

In [None]:
cost_arr = np.asarray(past_cost)
cost_arr.shape

In [None]:
cost_arr = cost_arr.reshape((cost_arr.shape[0],1))
cost_arr.shape

In [None]:
import matplotlib.pyplot as plt
plt.plot(cost_arr)
plt.show()

## Gradient Descent Stopping Condition

In [None]:
def GD_stop(X, Y, theta, epoch, alpha):
    past_cost = []
    past_theta = [theta] 
    for i in range(epoch):
        h_theta = np.dot(X, theta)
        error = h_theta - Y
        cost = 1/(2*m)*np.dot(error.T, error)
        past_cost.append(cost)
        theta = theta - ( (1/m) * alpha * np.dot(X.T, error) ) 
        past_theta.append(theta)
        if ( past_theta[i] == past_theta[i+1] ).all() : break
    return past_theta, past_cost, i+1    

In [None]:
start = time.time()
past_theta_gd, past_cost_gd, epoch_stop_gd = GD_stop(X, Y, theta, 50000, alpha)
gd_time = time.time()-start

In [None]:
epoch_stop_gd

In [None]:
cost_arr = np.asarray(past_cost_gd)
cost_arr = cost_arr.reshape((cost_arr.shape[0],1))
cost_arr.shape

In [None]:
plt.plot(cost_arr)
plt.show()

## Mini Batch Gradient Descent

In [None]:
def mini_batch_GD(X, Y, theta, epoch, alpha, batch_size):
    m = Y.size
    past_cost = []
    past_theta = [theta] 
    mini_batches = []
    xy_combine = np.hstack([X,Y]) # to merge X and Y
    np.random.shuffle(xy_combine)
    no_of_batches = xy_combine.shape[0] // batch_size #Calculate the no. of complete batches possible
    
    
    for i in range(no_of_batches): #Dividing the rest of the data as per the batch sizes and merge with the list of mini batches created
        data = xy_combine[i*batch_size : (i+1)*batch_size]
        y_mini = np.array(data[:,-1]).reshape((-1,1))
        x_mini = np.array(data[:, :-1])
        mini_batches.append((x_mini, y_mini))
    
    if xy_combine.shape[0] % batch_size != 0: # merging the one extra batch since its smaller than the batch size
        data = xy_combine[-(xy_combine.shape[0] % batch_size):]
        y_mini = np.array(data[:,-1]).reshape((-1,1))
        x_mini = np.array(data[:, :-1])
        mini_batches.append((x_mini, y_mini))
        
    for i in range(epoch):
        for batch in  mini_batches:
            h_theta = np.dot(batch[0], theta) # X-->batch[0]
            error = h_theta - batch[1] #Y-->batch[1]
            theta = theta - ( (1/m) * alpha * np.dot(batch[0].T, error) ) 
         
        past_theta.append(theta)   #update the final theta of the batch
        cost = 1/(2*m)*np.dot(error.T, error)
        past_cost.append(cost)
        if ( past_theta[i] == theta ).all() : break
    return past_theta, past_cost, i+1  

In [None]:
batch_size = 3
start = time.time()
past_theta_mb, past_cost_mb, epoch_stop_mb = mini_batch_GD(X, Y, theta, 50000, alpha, batch_size)
mb_time = time.time()-start

In [None]:
cost_arr = np.asarray(past_cost_mb)
cost_arr = cost_arr.reshape((cost_arr.shape[0],1))
cost_arr.shape

In [None]:
plt.plot(cost_arr)
plt.show()

## Comparing Gradient Descent and Mini_Batch

In [None]:
print(f'Time taken by :- \nGradient Descent : {gd_time} \nMini Batch : {mb_time}')

In [None]:
print(f'Gradient Descent : \nBest Theta : {past_theta_gd[-1]} \nBest Cost : {past_cost_gd[-1]} \nEpoch Stop : {epoch_stop_gd}')
print(f'\nMini_Batch : \nBest Theta : {past_theta_mb[-1]} \nBest Cost : {past_cost_mb[-1]} \nEpoch Stop : {epoch_stop_mb}')

## Linear Regression

In [None]:
def Linear_Regression(data, pred_col, alpha, epoch):
    #Declaring X,y and theta
    y = np.array(data[pred_col]).reshape((-1,1))
    X = np.array(data.drop(pred_col, axis = 1))
    X = np.c_[np.ones(X.shape[0]), X]
    theta = np.random.rand(X.shape[1])
    theta = theta.reshape(theta.shape[0], 1)
    
    #Gradient Descent
    m = y.size
    past_cost = []
    for i in range(epoch):
        predictions = np.dot(X,theta)
        error = np.dot(X.T, (predictions-y))
        descent = alpha * 1/m * error
        theta-=descent # updated value of theta
        
        # Cost Computation
        square_err = (predictions - y)**2
        cost = 1/(m)*np.sum(square_err)
        past_cost.append(cost) # Appending cost
    return theta, past_cost, predictions

In [None]:
alpha = 0.01
epoch = 10000
data = pd.DataFrame(np.hstack([X,Y]))
data.columns = ['1s','X', 'Y']

start = time.time()
theta_lr, past_cost_lr, predictions_lr = Linear_Regression(data,'Y',alpha, epoch)
total_time_lr = time.time() - start

In [None]:
print(f"h(x) = {str(round(theta_lr[0,0],2))}+{str(round(theta_lr[1,0],2))}x1{str(round(theta_lr[2,0],2))}x2\nTime Taken to calculate Cost : {total_time_lr} \nAccuracy : {round(r2_score(Y, predictions_lr) * 100 , 3)}%")

In [None]:
cost_arr = np.asarray(past_cost_lr)
cost_arr = cost_arr.reshape((cost_arr.shape[0],1))
cost_arr.shape

In [None]:
plt.plot(cost_arr)
plt.show()