# Given the Mathematics and Computer Science scores, the Exercise is to come up with a linear function for given test results using gradient descent

## Importing Libraries and Collecting Data

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import linear_model

sns.set()
plt.style.use('dark_background')

In [3]:
df = pd.read_csv('test_scores.csv')
df

Unnamed: 0,name,math,cs
0,david,92,98
1,laura,56,68
2,sanjay,88,81
3,wei,70,80
4,jeff,80,83
5,aamir,49,52
6,venkat,65,66
7,virat,35,30
8,arthur,66,68
9,paul,67,73


In [15]:
X = df.math.values.reshape(-1,1)
y = df.cs.values.reshape(-1,1)

In [16]:
X

array([[92],
       [56],
       [88],
       [70],
       [80],
       [49],
       [65],
       [35],
       [66],
       [67]], dtype=int64)

In [17]:
y

array([[98],
       [68],
       [81],
       [80],
       [83],
       [52],
       [66],
       [30],
       [68],
       [73]], dtype=int64)

## Creating and Fitting the Linear Regression Model

In [18]:
reg = linear_model.LinearRegression()
reg.fit(X,y)

LinearRegression()

In [20]:
m_sklearn = reg.coef_
b_sklearn = reg.intercept_

m_sklearn, b_sklearn

(array([[1.01773624]]), array([1.91521931]))

## Calculating the Loss Function

In [28]:
import math

def gradient_descent(x,y):
    """
    x and y must be numpy array
    """
    m_curr = 0
    b_curr = 0
    iterations = 1000000
    n = len(x)
    learning_rate = 0.0002
    
    previous_cost = 0

    for i in range(iterations):
        y_predicted = m_curr*x + b_curr
        cost = (1/n) * sum([val**2 for val in (y-y_predicted)]) # mean squared error - MSE
        md = -(2/n)*sum(x*(y-y_predicted)) # del cost/ del m
        bd = -(2/n)*sum(y-y_predicted) # del cost/ del b
        m_curr = m_curr - learning_rate*md
        b_curr = b_curr - learning_rate*bd
        if math.isclose(cost, previous_cost, rel_tol=1e-20):
            break
        previous_cost = cost
        
    return m_curr, b_curr

In [29]:
m_Grad_Desc, b_Grad_Desc = gradient_descent(X,y)
m_Grad_Desc, b_Grad_Desc

(array([1.01773817]), array([1.91508262]))

## Comparing the Results from SkLearn and Gradient_Descent Function

### Slope = m

In [36]:
print(m_sklearn[0][0], m_Grad_Desc[0])
error_m = abs(m_sklearn[0][0] - m_Grad_Desc[0])/m_sklearn[0][0]
error_m*100

1.017736237856933 1.0177381667350405


0.00018952632673488017

### Intercept = b

In [41]:
print(b_sklearn[0], b_Grad_Desc[0])
error_b = abs(b_sklearn[0] - b_Grad_Desc[0])/b_sklearn[0]
error_b*100

1.9152193111568891 1.9150826165722297


0.007137281034249276