In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/student-performance-multiple-linear-regression/Student_Performance.csv


In [2]:
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import time

In [3]:
data = pd.read_csv("/kaggle/input/student-performance-multiple-linear-regression/Student_Performance.csv",sep=",",)
print("Data dimension: ",data.shape)

Data dimension:  (10000, 6)


In [4]:
data.columns

Index(['Hours Studied', 'Previous Scores', 'Extracurricular Activities',
       'Sleep Hours', 'Sample Question Papers Practiced', 'Performance Index'],
      dtype='object')

In [5]:
data.columns = ['Hours_Studied', 'Previous_Scores', 'Extracurricular_Activities',
       'Sleep_Hours', 'Sample_Question_Papers_ Practiced', 'Performance_Index']

In [6]:
temp = [1 if data.iloc[i,2] =="Yes" else 0 for i in range(len(data))]
data.Extracurricular_Activities = temp

#### Baseline model

In [7]:
pred = data.Performance_Index.mean()
mse = data.Performance_Index.var()
print("Baseline model MSE: ",mse)

Baseline model MSE:  369.1223771977198


#### Linear Regression model   

$y_{pred} = w.x + b$    

*Loss function*  
$L(w,b) = \frac{1}{2m} {\Sigma^m_{i=1}(y^i_{actual}-(w^i.x^i+b) )^2}$  

*Derivative of loss function wrt to w*  
$\frac{df}{dw} L(w,b) = -\frac{1}{m} {\Sigma^m_{i=1}(y^i_{actual}-(w^i.x^i+b)).x^i}$  

*Derivative of loss function wrt to b*  
$\frac{df}{db} L(w,b) = -\frac{1}{m} {\Sigma^m_{i=1}(y^i_{actual}-(w^i.x^i+b))}$  

### Incorporating b in w 

$w_{twiddle} = (b,w)$   
$w ∈ R^{1xd}$   
$w_{twiddle} ∈ R^{1xd+1}$  

$x_{twiddle} = (1,x)$   
$x ∈ R^{rxd}$   
$x_{twiddle} ∈ R^{rxd+1}$   

$y_{pred} = w_{twiddle}.x_{twiddle}$  

#### Data structure for w and x



In [8]:
# Input data and Output data

x = np.array(data.iloc[:,0:5],ndmin=2)
y = np.array(data.iloc[:,5],ndmin=1)

print("X shape:",x.shape)
print("Y shape:",y.shape)

X shape: (10000, 5)
Y shape: (10000,)


#### Predicting Y

$y_{pred} = w.x + b$  
$y_{pred} = w_{twiddle} . x_{twiddle}$

In [9]:
# Predicting y

def predict_y(x_twid,w_twid):
    y_pred = np.dot(x_twid,w_twid)
    return y_pred

#### Derivative of Loss function

$L(w) = \frac{1}{2m} {\Sigma^m_{i=1}(y^i_{actual}-w^i_{twiddle}.x^i_{twiddle} )^2}$  

$\frac{df}{dw} L(w) = -\frac{1}{m} {\Sigma^m_{i=1}(y^i_{actual}-w^i_{twiddle}.x^i_{twiddle}).x^i_{twiddle}}$  

$ = -\frac{1}{m} {\Sigma^m_{i=1}(y^i_{actual}-y^i_{pred}).x^i_{twiddle}}$  

In [10]:
# # Method 1 - Defining the loss function

# def loss_fn_derivative(y_pred,y_actual,x_twiddle):
    
#     summation = -2 * np.mean((y_actual - y_pred)*x_twiddle)
     
#     #derivative should be of shape w_twiddle
#     return summation

In [11]:
# Method 1 - Defining the loss function

# def loss_fn_derivative(y_pred,y_actual,x_twiddle):
#     r,d = x_twiddle.shape
#     summation = np.zeros((r,d))
#     for i in range(len(y_pred)):
#         summation[i,] = ((y_actual[i] - y_pred[i])*x_twiddle[i,])
    
#     summation = -2 * np.mean(summation,axis = 0)

#     #derivative should be of shape w_twiddle
#     return summation

In [12]:
#Method 2 - Defining the loss function

def loss_fn_derivative(y_pred,y_actual,x_twiddle):
    
    y = y_actual - y_pred
    summation = (y.reshape(-1,1)*x_twiddle)
    summation = -2 * np.mean(summation,axis = 0)
 
    #derivative should be of shape w_twiddle
    return summation

In [13]:
# Testing the loss derivative function

xx = np.array([[1,2,3],[4,5,6],[7,8,9]])
xt = np.array([[1,1,2,3],[1,4,5,6],[1,7,8,9]])
print(xx.shape)
print(xt.shape)
ww = np.array([1,2,3])
b = 1
yy_actual = np.array([14,32,50])
wt = np.array([1,1,2,3])
print(ww.shape)
print(wt.shape)

yy_pred = predict_y(xt,wt)

print(yy_actual[1],yy_actual.shape)
print(yy_pred[1],yy_pred.shape)
print(xt[1,:],yy_pred.shape)

print(loss_fn_derivative(yy_pred[1],yy_actual[1],xt[1,:]))
#print(loss_fn_derivative(yy_pred,yy_actual,xt))

(3, 3)
(3, 4)
(3,)
(4,)
32 (3,)
33 (3,)
[1 4 5 6] (3,)
[ 2.  8. 10. 12.]


#### Gradient descent to update w

$w_{twiddle} = w_{twiddle} - \alpha * \frac{df}{dw} L(w)$  
$\alpha : Step Rate$

In [14]:
def gradient_descent(alpha,y_actual,y_pred,x_twid,w_twid):
    derivative = loss_fn_derivative(y_pred,y_actual,x_twid)
    w_twid = w_twid - (alpha * derivative)
    return w_twid

In [15]:
def get_training_data(x,y,n):
    perm = np.random.permutation(len(x))
    train_idx = perm[0:n]
    test_idx = perm[n:]
    train_x = x[train_idx,:]
    train_y = y[train_idx]
    test_x = x[test_idx,:]
    test_y = y[test_idx]
    return train_x, train_y, test_x, test_y

In [16]:
train_x,train_y,test_x,test_y = get_training_data(x,y,9000)


In [17]:
# Algorithm for linear egression using gradient research

# x - input data
# y - output data

def linear_reg_GD(train_x,train_y,n_iters=10):
    
    r,d = train_x.shape
    ones = np.ones((len(train_x),1))
    # Appending 1 in front of each row in X
    x_twiddle = np.append(ones,train_x,axis=1) #X twiddle shape: (9000 , 6)
    w = np.ones((d,))
    b = np.zeros((1,)) 
    # Appending b to W array
    w_twiddle = np.append(b,w,axis=0) #W twiddle shape: (6,)    
    y_pred = predict_y(x_twiddle,w_twiddle)
    
    alpha = 0.0001
    
    for i in range(n_iters):
        w_twiddle = gradient_descent(alpha,train_y,y_pred,x_twiddle,w_twiddle)
        y_pred = predict_y(x_twiddle,w_twiddle)
    
    mse = mean_squared_error(train_y,y_pred)
    print("Mse", mse)
       
    return w_twiddle,y_pred

In [18]:
# Algorithm for linear egression using stochastic gradient research

# x - input data
# y - output data

def linear_reg_stochasticGD(train_x,train_y,n_iters):
    
    r,d = train_x.shape
    ones = np.ones((len(train_x),1))
    # Appending 1 in front of each row in X
    x_twiddle = np.append(ones,train_x,axis=1) #X twiddle shape: (9000 , 6)
    w = np.ones((d,))
    b = np.zeros((1,)) 
    # Appending b to W array
    w_twiddle = np.append(b,w,axis=0) #W twiddle shape: (6,)    
    y_pred = predict_y(x_twiddle,w_twiddle)
    
    loss_previous = 100000
    alpha = 0.0001
    
    for i in range(n_iters):
        loss_all_run = []
        for i in range(len(train_y)):
            w_twiddle = gradient_descent(alpha,train_y[i],y_pred[i],x_twiddle[i,:],w_twiddle)
            y_pred[i] = predict_y(x_twiddle[i,:],w_twiddle)
            loss = np.mean((y_pred[i]-train_y[i])**2)
            loss_all_run.append(loss)
            
        loss_all_run = np.mean(loss_all_run)
        if np.abs(loss_all_run - loss_previous) < 0.01:
            print('Converged at run')
            break
        loss_previous = loss_all_run
        
    mse = mean_squared_error(train_y,y_pred)
    print("Mse", mse)
       
    return w_twiddle,y_pred

In [19]:
t_bfre = time.time()
w_twid, y_pred = linear_reg_GD(train_x,train_y,20000)
t_aftr = time.time()

Mse 28.27137532337727


In [20]:
print(t_aftr - t_bfre)

19.394795656204224


In [21]:
# Getting w and b from w twiddle
print(w_twid)
w = w_twid[1:]
b = w_twid[0]
print(f"\n y_pred = {w}.x + {b}")

[-3.51004568  2.24624673  0.83727317 -0.29534433 -1.32468604 -0.23019287]

 y_pred = [ 2.24624673  0.83727317 -0.29534433 -1.32468604 -0.23019287].x + -3.5100456758737213


Optimal solution: 

$w=(X^TX)^{−1} X^TY$

In [22]:
def test_optimal_solution(x,y,w):
    x_T = np.transpose(x)
    a = np.matmul(x_T,x)
    b = np.matmul(x_T,y)
    w_optimal = np.linalg.inv(x.T@x)@x.T@y
    print("W optimal",w_optimal)
    print("W from GD",w)
    return w_optimal

In [23]:
ones = np.ones((len(train_x),1))
x_twid = np.append(ones,train_x,axis=1) #X twiddle shape: (9000 , 6)

w_optimal = test_optimal_solution(x_twid,train_y,w_twid)

W optimal [-34.08259031   2.84954268   1.01883139   0.63218421   0.48219392
   0.19089528]
W from GD [-3.51004568  2.24624673  0.83727317 -0.29534433 -1.32468604 -0.23019287]


w =  [2.85178776 1.01803091 0.62725788 0.4816724  0.19402352]   
b =  -34.05322184418339

In [24]:
y_optimal = predict_y(x_twid,w_optimal)

In [25]:
print(y_optimal[0:10])
print(train_y[0:10])
print(y_pred[0:10])

[70.11019472 67.60431545 34.63584637 58.69242212 76.07111402 75.55766634
 60.35461784 67.85956571 41.76261135 58.6488499 ]
[71. 66. 32. 59. 74. 74. 61. 65. 41. 54.]
[69.02106916 63.33390693 37.49214194 63.39991047 67.1822989  68.25776198
 57.43518434 65.0919516  46.44446509 63.49526692]
