# Import used libraries

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Create Train-Test split and fix data

In [7]:
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

train_data.dropna(inplace=True) # Only for test purposes, it is not recommended to mindlessly drop data
train_data.reset_index(drop=True, inplace=True)

test_data.dropna(inplace=True) # Only for test purposes, it is not recommended to mindlessly drop data
test_data.reset_index(drop=True, inplace=True)

# Training data
x_train = np.array(train_data.loc[:, train_data.columns != "y"])
y_train = np.array(train_data.loc[:, "y"])[:, np.newaxis]

# Testing data
y_test = np.array(test_data.loc[:, "y"])[:, np.newaxis]
x_test = np.array(test_data.loc[:, test_data.columns != "y"])

# Parameters initialization
w = np.zeros((1,x_train.shape[1]))
b = 0

(699, 1)


# Define Functions

In [8]:
def linear_function(x: np.array, w:np.array, b:float):
    if not isinstance(w, np.ndarray): # To make sure the weights are in an array
        raise TypeError(f"w should be a NumPy array not a {type(w).__name__}")
    
    if x.shape[1] == w.shape[0]: # To make sure the shapes are compatible
        return (x @ w) + b # W . X + B
    else:
        raise ValueError(f"w should be ({x.shape[1]},{1}) not {w.shape}")

In [9]:
def mse(x, y, w, b): # Mean Squared Error
    m = x.shape[0] # Number of training examples
    try:
        return (1/(2*m)) * (np.sum((linear_function(x, w, b)-y)**2, axis=0))
    except ValueError:
        return (1/(2*m)) * (np.sum((linear_function(x, w.T, b)-y)**2, axis=0))
    

In [10]:
def gradient_descent(x, y, w, b, iters = 2000, alpha=0.001, automatic_test = False, thresh=0.001):
    m = x.shape[0] # Number of training examples
    loss_hist = []
    cost = mse(x, y, w, b)
    loss_hist.append(cost)
    if automatic_test == False:
        for i in range(iters): # Gradient descent is an iterative algorithm
            try:
                f = linear_function(x, w, b)
            except ValueError:
                f = linear_function(x, w.T, b)
            dw = (1/(m)) * np.sum((f-y)*x, axis=0) # Derivative of the cost w.r.t w
            db = (1/m) * np.sum(f-y, axis=0) # Derivative of the cost w.r.t b
    
            w = w - (alpha*dw) # Updated values of W
            b = b - (alpha*db) # updated value of b
            
            cost_old = cost # to check the difference between them
            cost = mse(x, y, w, b) # update cost
            loss_hist.append(cost)

            print(f"Iteration #{i+1}\t\tCost = {cost}\t\tw = {w}\t\tb = {b}")
            if (cost > cost_old): # if the cost increases even once that means that alpha is too big
                return f"Alpha is too big!\nOld Cost = {cost_old}, New Cost = {cost}"
        return w, b, loss_hist
    else:
        i=0
        while True: # Gradient descent is an iterative algorithm
            i += 1
            try:
                f = linear_function(x, w, b)
            except ValueError:
                f = linear_function(x, w.T, b)
            dw = (1/(m)) * np.sum((f-y)*x, axis=0) # Derivative of the cost w.r.t w
            db = (1/m) * np.sum(f-y, axis=0) # Derivative of the cost w.r.t b
    
            w = w - (alpha*dw) # Updated values of W
            b = b - (alpha*db) # updated value of b
            
            cost_old = cost # to check the difference between them
            cost = mse(x, y, w, b) # update cost
            loss_hist.append(cost)
            
            print(f"Iteration #{i+1}\t\tCost = {cost}\t\tw = {w}\t\tb = {b}")

            if (cost > cost_old): # if the cost increases even once that means that alpha is too big
                raise ValueError(f"Alpha is too big!\nOld Cost = {cost_old}, New Cost = {cost}")
            if (cost_old - cost) <= thresh: # Automatic convergence test
                return w, b, loss_hist
                break

# Test the Algorithm

In [None]:
# w_new, b_new, loss_hist = gradient_descent(x_train, 
#                                            y_train, 
#                                            w, 
#                                            b,
#                                            automatic_test=True, 
#                                            thresh=0.0000001,
#                                            alpha = 0.0000001) # play with the values of alpha and thresh alittle bit to see the effects they have!

w_new, b_new, loss_hist = gradient_descent(x_train, 
                                           y_train, 
                                           w, 
                                           b,
                                           alpha = 0.0000001,
                                           iters = 17000) # You could set this as low as 15K and it's fine, more than that gives better performance but by a really really small margin, and less than 15K is generally not recommended although i do encourage you to set this to any number to see the effects it has!

w_new, b_new

In [None]:
# print(linear_function(x_test, w_new, b_new))

print(f"Cost: ",mse(x_train, y_train, w_new, b_new))

# Plot the loss function

In [None]:
plt.xlabel("#Iterations")
plt.ylabel("Cost")
plt.title("The cost per number of iterations on gradient descnet")

plt.plot(loss_hist)
plt.show()

# Plot the regression line over the data

In [None]:
plt.plot(x_test, linear_function(x_test, w_new, b_new), color="r")
plt.scatter(x_test, y_test)
plt.show()