Linear Regression 

In [150]:
import numpy as np 
#regular gradient descent. 
def gradient_descent_vanilla(x, y, a = 0.01, e = 15000, l = 0.7):

    w = np.zeros((x.shape[1],1))
    b = 0
    
    m = x.shape[0]
    
    for i in range(e):
        h = x @ w + b 
        error = h - y 
        dw = (1/m)* (x.T @ error)
        db = (1/m)* (np.sum(error))
        
        w = w - a*dw - l*w/m 
        b = b - a*db - l*b/m 
        
    return w,b

In [2]:
#gradient_descent with momentum and regularization. 
def gradient_descent_momentum(x, y, a = 0.01, e = 10000, l = 0.7, beta = 0.9):

    w = np.zeros((x.shape[1],1))
    b = 0
    
    m = x.shape[0]
    v_w = 0 
    v_b = 0 
    
    for i in range(e):
        h = x @ w + b 
        error = h - y 
        dw = (1/m)* (x.T @ error)
        db = (1/m)* (np.sum(error))
        
        v_w = beta*v_w + (1-beta)*dw 
        v_b = b*v_b + (1-beta)*db 
        
        w = w - a*v_w - l*w/m
        b = b - a*v_b - l*b/m 
        
    return w,b

In [217]:
#gradient descent with adam optimization. 
def gradient_descent_adam(x, y, a = 0.1, t = 1500, l = 0.7, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-08):

    w = np.zeros((x.shape[1],1))
    b = 0
    
    m = x.shape[0]
    v_w = 0 
    v_b = 0 
    
    s_w = 0 
    s_b = 0 
    
    for i in range(t):
        h = x @ w + b 
        error = h - y 
        dw = (1/m)* (x.T @ error)
        db = (1/m)* (np.sum(error))
        
        v_w = beta1*v_w + (1-beta1)*dw 
        v_b = beta1*v_b + (1-beta1)*db 
        
        s_w = beta2*s_w + (1-beta2)*dw**2 
        s_b = beta2*s_b + (1-beta2)*db**2 
        
        v_w_c = v_w/(1 - beta1**t)
        v_b_c = v_b/(1 - beta1**t)
        
        s_w_c = s_w/(1 - beta2**t)
        s_b_c = s_b/(1 - beta2**t)
        
        
        
        
        w = w - a*v_w_c/(s_w_c**(0.5) + epsilon) - l*w/m
        b = b - a*v_b_c/(s_b_c**(0.5) + epsilon) - l*b/m 
        
    return w,b

Data Normalization

In [171]:
import pandas as pd 

df = pd.read_csv('linear_train.csv')
x_train = np.array(df.iloc[:,1:21])
y_train = np.array(df.iloc[:,21:22])


In [210]:
def normalize(x):
    min_val = np.min(x, axis=0)
    max_val = np.max(x, axis=0)
    ranges = max_val - min_val 

    normalized_data = (x - min_val) / ranges
    return normalized_data 

normalized_x = normalize(x_train)
normalized_y = normalize(y_train)

Testing gradient descent algorithms 

In [133]:
import time
start_time = time.time()
w,b = gradient_descent_momentum(normalized_x, normalized_y)
w,b
print("--- %s seconds ---" % (time.time() - start_time))

--- 12.001650094985962 seconds ---


In [151]:
start_time = time.time()
w_,b_ = gradient_descent_vanilla(normalized_x, normalized_y)
w_,b_
print("--- %s seconds ---" % (time.time() - start_time))

--- 18.181382179260254 seconds ---


In [218]:
start_time = time.time()
w_a, b_a = gradient_descent_adam(normalized_x, normalized_y)
w_a, b_a
print("--- %s seconds ---" % (time.time() - start_time))

--- 1.9830615520477295 seconds ---


Testing 

In [174]:
df_test = pd.read_csv('linear_test_data.csv')
x_test = df_test.iloc[:,14:15]


In [233]:
from sklearn.linear_model import LinearRegression 

lr = LinearRegression()
#reshaping x_train as a 2D array -

lr.fit(normalized_x, normalized_y)

LinearRegression()

In [234]:
m = lr.coef_
m

array([[ 3.08768040e-01,  2.67779658e-01,  3.78048123e-01,
         6.78392429e-03,  7.76528102e-02,  1.02033070e-01,
         2.75139813e-01,  1.32774944e-01,  7.74896459e-02,
         3.93211746e-01,  1.61475401e-01,  1.45876260e-01,
         1.25740634e-02,  2.75314987e+10, -2.75314987e+10,
         2.00757244e-01,  8.35007879e-02,  1.40230969e-01,
         1.36722742e-01,  1.59381626e-01]])

In [235]:
c = lr.intercept_
c

array([-1.21789525])

In [236]:
w_a, b_a

(array([[ 0.2991884 ],
        [ 0.26046354],
        [ 0.37031374],
        [-0.00103859],
        [ 0.06726719],
        [ 0.09400967],
        [ 0.2680924 ],
        [ 0.12265618],
        [ 0.07037365],
        [ 0.38491626],
        [ 0.1547047 ],
        [ 0.13841969],
        [ 0.00302743],
        [ 0.16401391],
        [ 0.16401391],
        [ 0.19195581],
        [ 0.07623713],
        [ 0.13161089],
        [ 0.12806266],
        [ 0.15143659]]),
 -1.1389677131121125)

In [244]:
def r2_score(y_pred, y_test):
    rss = np.sum((y_pred - y_test)**2)
    tss = np.sum((y_test - np.mean(y_test))**2)
    r = 1 - rss/tss
    return r

In [245]:
normalized_x @ w_a + b_a

array([[0.38791634],
       [0.47727039],
       [0.70182748],
       ...,
       [0.50552213],
       [0.65523373],
       [0.31063012]])

In [246]:
normalized_y

array([[0.36779872],
       [0.55049488],
       [0.66866161],
       ...,
       [0.47313444],
       [0.69345611],
       [0.22891573]])

In [247]:
r2_score(normalized_x @ w_a + b_a, normalized_y)

0.8416039111266782

In [249]:
r2_score(normalized_x @ m.T + c, normalized_y)

0.8428783264665048

In [251]:
r2_score(normalized_x @ w_ + b_, normalized_y)

0.6612908021307451

In [227]:
y_pred = normalized_x @ w_a + b_a 

In [228]:
def mse(y_pred, y_actual):
    err = np.sum((y_pred - y_actual)**2)/len(y_pred)
    return err 


In [252]:
mse(y_pred, normalized_y)

0.0023176185150267487

In [262]:
df = pd.read_csv('linear_test_data.csv')
x_test = np.array(df.iloc[:,1:21])
y_test = np.array(df.iloc[:,21:22])
x_test

array([[ 146.79064077,  223.71180565,   -0.88137077, ...,    4.88054597,
         125.78555074,  800.20477227],
       [  33.25365297,  285.93522871,   -3.35261115, ...,   30.74826423,
         130.8324214 ,  736.11914334],
       [ 180.53179872,  208.42034341,    9.84618777, ...,  -44.22657541,
           7.93928048,  833.65673771],
       ...,
       [-208.06214994,  218.78847063,   -9.15764925, ...,   -5.94135997,
         -67.82122337,  811.02237566],
       [ 246.77261619,  229.34815412,   54.29351768, ...,   67.34885785,
         363.34733553,  748.9426883 ],
       [ 151.89825368,  261.23698597,  -85.46693489, ...,    8.9994506 ,
         -22.70322019,  790.46707145]])

In [263]:
normalized_test = normalize(x_test)

In [264]:
normalized_test @ w_a + b_a

array([[0.4967296 ],
       [0.58358216],
       [0.53873098],
       ...,
       [0.53463429],
       [0.62612933],
       [0.42242837]])

In [265]:
#Re-normalize. 