### Multivariate Linear regression - Example

#### importing libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### getting data set

In [2]:
m = 5
data = {
    "x0" : np.ones(m, dtype=int),
    "x1" : [0, 1, 2, 3, 2],
    "x2" : [1, 2, 2, 1, 1],
    "y" :  [4, 7, 8, 7, 6],
} 
df = pd.DataFrame(data)
print(df)
#print(type(df['x1']))

   x0  x1  x2  y
0   1   0   1  4
1   1   1   2  7
2   1   2   2  8
3   1   3   1  7
4   1   2   1  6


#### setting initial values of parameters (B0, B1, B2) and learning rate a = 0.1

In [3]:
B0 = B1 = B2 = 1
a = 0.1

#### calculating hypothesis function: (hB(x))

In [4]:
def getHypothesisF(B0, B1, B2):
    hB_x = round(B0*df['x0'] + B1*df['x1'] + B2*df['x2'], 4)
    return hB_x

#### calculating cost function (J(B0, B1, B2))

In [5]:

def getCostF(B0, B1, B2, hB_x):
    err = hB_x - df['y']
#     print(err)
    err2 = err**2 
    err2_sum = sum(err2)
    J_B0_B1 = round(err2_sum/(2*m), 4)
    return J_B0_B1

#### getting the initial hypothesis and cost function

In [6]:

hB_x = getHypothesisF(B0, B1, B2)
J_B0_B1_B2 = getCostF(B0, B1, B2, hB_x)
og_cost = J_B0_B1_B2
print(list(hB_x))
print(J_B0_B1_B2)

[2, 4, 5, 5, 4]
3.0


#### recalculating parameters until convergence ( declaring convergence when Ji+1(B) - Ji(B) < 0.01) 

In [7]:
isConverge = False
j = 0
y = df['y']
while(isConverge == False and j < 100):
            #getting new values of parameters to get closer to minima
    print("\nITERATION {}:\n\n".format(j))
    err_B0 = (hB_x - y).round(4)
    print("err_B0: {}".format(list(err_B0)))
    err_B0_sum = sum(err_B0)
    temp0 = round((B0 - (a*(1/m)*err_B0_sum)), 4)
    print("old B0: {}".format(B0))
    print("new B0: {}\n".format(temp0))

    err_B1 = ((hB_x - y)*df['x1']).round(4)
    print("err_B1: {}".format(list(err_B1)))
    err_B1_sum = sum(err_B1)
    temp1 = round((B1 - (a*(1/m)*err_B1_sum)), 4)
    print("old B1: {}".format(B1))
    print("new B1: {}\n".format(temp1))
    
    err_B2 = ((hB_x - y)*df['x2']).round(4)
    print("err_B2: {}".format(list(err_B2)))
    err_B2_sum = sum(err_B2)
    temp2 = round((B2 - (a*(1/m)*err_B2_sum)), 4)
    print("old B2: {}".format(B2))
    print("new B2: {}\n".format(temp2))

    B0 = temp0
    B1 = temp1
    B2 = temp2
    
    prev_y = y
    prev_cost = J_B0_B1_B2
    print("current cost: {}".format(prev_cost))
    
    hB_x = getHypothesisF(B0, B1, B2)
    print("new hypothesis f(x): {}".format(list(hB_x)))
    
    J_B0_B1_B2 = getCostF(B0, B1, B2, hB_x)
    print("new cost: {}".format(J_B0_B1_B2))

    diff = prev_cost - J_B0_B1_B2
    print("difference from previous iteration: {}\n\n".format(diff))

    if(diff < 0.01):
        isConverge = True
    else:
        j += 1
                

if(isConverge == True):
    print("\n\nConvergenece acheieved at iteration {} ".format(j))
    print("\nJ_B0_B1_B2 minimised from {} to {} with new parameters, B0:{}, B1:{}, B2:{}\n\n".format(og_cost, J_B0_B1_B2, B0, B1, B2))

else:
    #iterations ran out
    print("\n\nBest convergenece acheieved after all iterations")
    print("\nJ_B0_B1_B2 minimised from {} to {} with new parameters, B0:{}, B1:{}, B2:{}\n\n".format(og_cost, J_B0_B1_B2, B0, B1, B2))



ITERATION 0:


err_B0: [-2, -3, -3, -2, -2]
old B0: 1
new B0: 1.24

err_B1: [0, -3, -6, -6, -4]
old B1: 1
new B1: 1.38

err_B2: [-2, -6, -6, -2, -2]
old B2: 1
new B2: 1.36

current cost: 3.0
new hypothesis f(x): [2.6, 5.34, 6.72, 6.74, 5.36]
new cost: 0.6831
difference from previous iteration: 2.3169



ITERATION 1:


err_B0: [-1.4, -1.66, -1.28, -0.26, -0.64]
old B0: 1.24
new B0: 1.3448

err_B1: [-0.0, -1.66, -2.56, -0.78, -1.28]
old B1: 1.38
new B1: 1.5056

err_B2: [-1.4, -3.32, -2.56, -0.26, -0.64]
old B2: 1.36
new B2: 1.5236

current cost: 0.6831
new hypothesis f(x): [2.8684, 5.8976, 7.4032, 7.3852, 5.8796]
new cost: 0.3015
difference from previous iteration: 0.38160000000000005



ITERATION 2:


err_B0: [-1.1316, -1.1024, -0.5968, 0.3852, -0.1204]
old B0: 1.3448
new B0: 1.3961

err_B1: [-0.0, -1.1024, -1.1936, 1.1556, -0.2408]
old B1: 1.5056
new B1: 1.5332

err_B2: [-1.1316, -2.2048, -1.1936, 0.3852, -0.1204]
old B2: 1.5236
new B2: 1.6089

current cost: 0.3015
new hypothesis f(x)

#### plotting the hypothesis function

In [8]:
def plotHypothesisF(y, x0, x1, x2):
    plt.scatter(x0, y, color='blue')
    plt.scatter(x1, y, color='green')
    plt.scatter(x2, y, color='yellow')
    plt.plot(x, hB_x, color='red')
    plt.show()