## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Loading the dataset

In [2]:
dataset = pd.read_csv("../dataset/50_Startups.csv")
dataset.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [3]:
dataset.isnull().sum() #checking null values

R&D Spend          0
Administration     0
Marketing Spend    0
State              0
Profit             0
dtype: int64

In [4]:
#Spliting the dataset into x and y
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [5]:
print(X)
print(y)

[[165349.2 136897.8 471784.1 'New York']
 [162597.7 151377.59 443898.53 'California']
 [153441.51 101145.55 407934.54 'Florida']
 [144372.41 118671.85 383199.62 'New York']
 [142107.34 91391.77 366168.42 'Florida']
 [131876.9 99814.71 362861.36 'New York']
 [134615.46 147198.87 127716.82 'California']
 [130298.13 145530.06 323876.68 'Florida']
 [120542.52 148718.95 311613.29 'New York']
 [123334.88 108679.17 304981.62 'California']
 [101913.08 110594.11 229160.95 'Florida']
 [100671.96 91790.61 249744.55 'California']
 [93863.75 127320.38 249839.44 'Florida']
 [91992.39 135495.07 252664.93 'California']
 [119943.24 156547.42 256512.92 'Florida']
 [114523.61 122616.84 261776.23 'New York']
 [78013.11 121597.55 264346.06 'California']
 [94657.16 145077.58 282574.31 'New York']
 [91749.16 114175.79 294919.57 'Florida']
 [86419.7 153514.11 0.0 'New York']
 [76253.86 113867.3 298664.47 'California']
 [78389.47 153773.43 299737.29 'New York']
 [73994.56 122782.75 303319.26 'Florida']
 [67532

# Encoding the categorical data

In [6]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [3])], remainder = "passthrough")
X = np.array(ct.fit_transform(X))

In [7]:
print(X)

[[0.0 0.0 1.0 165349.2 136897.8 471784.1]
 [1.0 0.0 0.0 162597.7 151377.59 443898.53]
 [0.0 1.0 0.0 153441.51 101145.55 407934.54]
 [0.0 0.0 1.0 144372.41 118671.85 383199.62]
 [0.0 1.0 0.0 142107.34 91391.77 366168.42]
 [0.0 0.0 1.0 131876.9 99814.71 362861.36]
 [1.0 0.0 0.0 134615.46 147198.87 127716.82]
 [0.0 1.0 0.0 130298.13 145530.06 323876.68]
 [0.0 0.0 1.0 120542.52 148718.95 311613.29]
 [1.0 0.0 0.0 123334.88 108679.17 304981.62]
 [0.0 1.0 0.0 101913.08 110594.11 229160.95]
 [1.0 0.0 0.0 100671.96 91790.61 249744.55]
 [0.0 1.0 0.0 93863.75 127320.38 249839.44]
 [1.0 0.0 0.0 91992.39 135495.07 252664.93]
 [0.0 1.0 0.0 119943.24 156547.42 256512.92]
 [0.0 0.0 1.0 114523.61 122616.84 261776.23]
 [1.0 0.0 0.0 78013.11 121597.55 264346.06]
 [0.0 0.0 1.0 94657.16 145077.58 282574.31]
 [0.0 1.0 0.0 91749.16 114175.79 294919.57]
 [0.0 0.0 1.0 86419.7 153514.11 0.0]
 [1.0 0.0 0.0 76253.86 113867.3 298664.47]
 [0.0 0.0 1.0 78389.47 153773.43 299737.29]
 [0.0 1.0 0.0 73994.56 122782.75 3

In [8]:
y = y.reshape(len(y),1)

## Splitting the dataset into training and test dataset

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.2, random_state = 42)

In [10]:
print(X_train.shape[0])

40


In [11]:
print(X_train)

[[0.0 1.0 0.0 93863.75 127320.38 249839.44]
 [0.0 1.0 0.0 142107.34 91391.77 366168.42]
 [1.0 0.0 0.0 44069.95 51283.14 197029.42]
 [0.0 0.0 1.0 120542.52 148718.95 311613.29]
 [0.0 0.0 1.0 144372.41 118671.85 383199.62]
 [1.0 0.0 0.0 134615.46 147198.87 127716.82]
 [0.0 1.0 0.0 27892.92 84710.77 164470.71]
 [0.0 1.0 0.0 1315.46 115816.21 297114.46]
 [1.0 0.0 0.0 0.0 135426.92 0.0]
 [0.0 0.0 1.0 114523.61 122616.84 261776.23]
 [1.0 0.0 0.0 123334.88 108679.17 304981.62]
 [1.0 0.0 0.0 78013.11 121597.55 264346.06]
 [0.0 0.0 1.0 77044.01 99281.34 140574.81]
 [1.0 0.0 0.0 46426.07 157693.92 210797.67]
 [0.0 0.0 1.0 61136.38 152701.92 88218.23]
 [0.0 0.0 1.0 165349.2 136897.8 471784.1]
 [1.0 0.0 0.0 22177.74 154806.14 28334.72]
 [0.0 0.0 1.0 72107.6 127864.55 353183.81]
 [0.0 1.0 0.0 55493.95 103057.49 214634.81]
 [0.0 0.0 1.0 131876.9 99814.71 362861.36]
 [0.0 0.0 1.0 65605.48 153032.06 107138.38]
 [1.0 0.0 0.0 100671.96 91790.61 249744.55]
 [0.0 1.0 0.0 28663.76 127056.21 201126.82]
 [1.

In [12]:
print(X_test)

[[1.0 0.0 0.0 91992.39 135495.07 252664.93]
 [1.0 0.0 0.0 38558.51 82982.09 174999.3]
 [0.0 1.0 0.0 61994.48 115641.28 91131.24]
 [0.0 0.0 1.0 1000.23 124153.04 1903.93]
 [0.0 0.0 1.0 94657.16 145077.58 282574.31]
 [0.0 0.0 1.0 542.05 51743.15 0.0]
 [0.0 1.0 0.0 75328.87 144135.98 134050.07]
 [1.0 0.0 0.0 64664.71 139553.16 137962.62]
 [1.0 0.0 0.0 63408.86 129219.61 46085.25]
 [0.0 0.0 1.0 86419.7 153514.11 0.0]]


In [13]:
print(y_train)

[[141585.52]
 [166187.94]
 [ 89949.14]
 [152211.77]
 [182901.99]
 [156122.51]
 [ 77798.83]
 [ 49490.75]
 [ 42559.73]
 [129917.04]
 [149759.96]
 [126992.93]
 [108552.04]
 [ 96712.8 ]
 [ 97483.56]
 [192261.83]
 [ 65200.33]
 [105008.31]
 [ 96778.92]
 [156991.12]
 [101004.64]
 [144259.4 ]
 [ 90708.19]
 [191792.06]
 [111313.02]
 [191050.39]
 [ 69758.98]
 [ 96479.51]
 [108733.99]
 [ 78239.91]
 [146121.95]
 [110352.25]
 [124266.9 ]
 [ 14681.4 ]
 [118474.03]
 [155752.6 ]
 [ 71498.49]
 [132602.65]
 [103282.38]
 [ 81229.06]]


In [14]:
print(y_test)

[[134307.35]
 [ 81005.76]
 [ 99937.59]
 [ 64926.08]
 [125370.37]
 [ 35673.41]
 [105733.54]
 [107404.34]
 [ 97427.84]
 [122776.86]]


In [15]:
#Avoiding dummy variable trap
X_train = X_train[:, 1:]
X_test = X_test[:, 1:]

In [16]:
print(X_train.shape[0])

40


In [17]:
print(X_test)

[[0.0 0.0 91992.39 135495.07 252664.93]
 [0.0 0.0 38558.51 82982.09 174999.3]
 [1.0 0.0 61994.48 115641.28 91131.24]
 [0.0 1.0 1000.23 124153.04 1903.93]
 [0.0 1.0 94657.16 145077.58 282574.31]
 [0.0 1.0 542.05 51743.15 0.0]
 [1.0 0.0 75328.87 144135.98 134050.07]
 [0.0 0.0 64664.71 139553.16 137962.62]
 [0.0 0.0 63408.86 129219.61 46085.25]
 [0.0 1.0 86419.7 153514.11 0.0]]


In [18]:
print(X_train)

[[1.0 0.0 93863.75 127320.38 249839.44]
 [1.0 0.0 142107.34 91391.77 366168.42]
 [0.0 0.0 44069.95 51283.14 197029.42]
 [0.0 1.0 120542.52 148718.95 311613.29]
 [0.0 1.0 144372.41 118671.85 383199.62]
 [0.0 0.0 134615.46 147198.87 127716.82]
 [1.0 0.0 27892.92 84710.77 164470.71]
 [1.0 0.0 1315.46 115816.21 297114.46]
 [0.0 0.0 0.0 135426.92 0.0]
 [0.0 1.0 114523.61 122616.84 261776.23]
 [0.0 0.0 123334.88 108679.17 304981.62]
 [0.0 0.0 78013.11 121597.55 264346.06]
 [0.0 1.0 77044.01 99281.34 140574.81]
 [0.0 0.0 46426.07 157693.92 210797.67]
 [0.0 1.0 61136.38 152701.92 88218.23]
 [0.0 1.0 165349.2 136897.8 471784.1]
 [0.0 0.0 22177.74 154806.14 28334.72]
 [0.0 1.0 72107.6 127864.55 353183.81]
 [1.0 0.0 55493.95 103057.49 214634.81]
 [0.0 1.0 131876.9 99814.71 362861.36]
 [0.0 1.0 65605.48 153032.06 107138.38]
 [0.0 0.0 100671.96 91790.61 249744.55]
 [1.0 0.0 28663.76 127056.21 201126.82]
 [0.0 0.0 162597.7 151377.59 443898.53]
 [0.0 1.0 78389.47 153773.43 299737.29]
 [1.0 0.0 153441

## Feature Scaling

In [19]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)
X_test = sc_X.transform(X_test)
y_test = sc_y.transform(y_test)

In [20]:
print(X_train.shape[0])

40


In [21]:
print(X_train)

[[ 1.36277029 -0.69388867  0.34202149  0.22787678  0.12425038]
 [ 1.36277029 -0.69388867  1.36207849 -1.0974737   1.14990688]
 [-0.73379939 -0.69388867 -0.71081297 -2.5770186  -0.34136825]
 [-0.73379939  1.44115338  0.90611438  1.0172367   0.66890185]
 [-0.73379939  1.44115338  1.40997088 -0.09115403  1.30006861]
 [-0.73379939 -0.69388867  1.20367103  0.96116332 -0.95248784]
 [ 1.36277029 -0.69388867 -1.05285826 -1.34392538 -0.62843389]
 [ 1.36277029 -0.69388867 -1.61480906 -0.19649414  0.54106768]
 [-0.73379939 -0.69388867 -1.642623    0.52691442 -2.07854935]
 [-0.73379939  1.44115338  0.77885123  0.05437051  0.2294954 ]
 [-0.73379939 -0.69388867  0.96515572 -0.45976843  0.61043134]
 [-0.73379939 -0.69388867  0.00687736  0.01677049  0.25215324]
 [-0.73379939  1.44115338 -0.01361318 -0.80643974 -0.83912073]
 [-0.73379939 -0.69388867 -0.66099544  1.34830937 -0.2199755 ]
 [-0.73379939  1.44115338 -0.34996231  1.16416226 -1.30074144]
 [-0.73379939  1.44115338  1.85350175  0.58117289  2.08

In [22]:
print(X_test)

[[-0.73379939 -0.69388867  0.30245367  0.52942836  0.14916233]
 [-0.73379939 -0.69388867 -0.82734624 -1.40769369 -0.53560477]
 [ 1.36277029 -0.69388867 -0.33181874 -0.20294703 -1.27505783]
 [-0.73379939  1.44115338 -1.62147425  0.11103854 -2.06176266]
 [-0.73379939  1.44115338  0.35879726  0.88291223  0.41286919]
 [-0.73379939  1.44115338 -1.63116196 -2.56004955 -2.07854935]
 [ 1.36277029 -0.69388867 -0.04987791  0.84817808 -0.89664846]
 [-0.73379939 -0.69388867 -0.2753597   0.67912498 -0.86215204]
 [-0.73379939 -0.69388867 -0.30191325  0.29793642 -1.67222209]
 [-0.73379939  1.44115338  0.18462534  1.19412269 -2.07854935]]


In [23]:
print(y_train)

[[ 0.62505458]
 [ 1.21802035]
 [-0.61948176]
 [ 0.8811677 ]
 [ 1.62086118]
 [ 0.97542407]
 [-0.91232766]
 [-1.59460698]
 [-1.76165793]
 [ 0.34382171]
 [ 0.82207435]
 [ 0.27334502]
 [-0.17111599]
 [-0.45646451]
 [-0.4378877 ]
 [ 1.84645138]
 [-1.2159758 ]
 [-0.25652672]
 [-0.45487089]
 [ 0.99635925]
 [-0.35302289]
 [ 0.68950024]
 [-0.60118719]
 [ 1.83512902]
 [-0.10457105]
 [ 1.81725334]
 [-1.10610355]
 [-0.46208725]
 [-0.16673065]
 [-0.90169678]
 [ 0.73439129]
 [-0.12772746]
 [ 0.20764244]
 [-2.43357944]
 [ 0.0680231 ]
 [ 0.96650853]
 [-1.064178  ]
 [ 0.40855009]
 [-0.29812496]
 [-0.8296525 ]]


In [24]:
print(y_test)

[[ 0.44963665]
 [-0.83503446]
 [-0.37874085]
 [-1.22258576]
 [ 0.2342382 ]
 [-1.92763152]
 [-0.23904728]
 [-0.19877777]
 [-0.43923066]
 [ 0.1717296 ]]


## Implementing multiple linear regression from scratch using gradient descent

In [153]:
#Initializing b = 0, a1=0, a2 = 0, a3 =0, a4 =0, a5 = 0 where cost function = 1/2n * summation from i = 0 to n (y - y_pred)^2  and y_pred = b + a1*x1 + a2*x2 + a3*x3 + a4*x4 + a5*x5
b = 2
a1 = 0.5
a2 = 0.3
a3 = 0.8
a4 = 0.45
a5 = 0.9

n = X_train.shape[0] 

#Initializing learning rate (L) and epochs(iterations) and update it through hit and trial
L = 0.00001
epochs = 104000

for i in range(epochs):
    y_predt = a1 * X_train[:, 0] + a2 * X_train[:, 1] + a3*X_train[:,2] + a4*X_train[:,3] + a5*X_train[:,4]  + b      #The current predicted value of y 
    db = 1/n * sum(sum(y_predt - y_train))                       # partial derivative of cost function w.r.t b
    da1 = 1/n * sum(sum(X_train[:, 0] * (y_predt - y_train)))     #partial derivative of cost function w.r.t a1
    da2 = 1/n * sum(sum(X_train[:, 1] * (y_predt - y_train)))      #partial derivative of cost function w.r.t a2
    da3 = 1/n * sum(sum(X_train[:, 2] * (y_predt - y_train)))      #partial derivative of cost function w.r.t a3
    da4 = 1/n * sum(sum(X_train[:, 3] * (y_predt - y_train)))      #partial derivative of cost function w.r.t a4
    da5 = 1/n * sum(sum(X_train[:, 4] * (y_predt - y_train)))      #partial derivative of cost function w.r.t a5
    b = b - L * db                                              #update b
    a1 = a1 - L * da1                                           #update a1
    a2 = a2 - L * da2                                           # update a2
    a3 = a3 - L * da3                                           # update a3
    a4 = a4 - L * da4                                           # update a4
    a5 = a5 - L * da5                                           # update a5
    
    
print(b, a1, a2, a3, a4, a5)

-2.831662344411297e-16 -3.3191154192874676e-06 -1.4478776906328434e-06 -1.4887954477304283e-05 4.878154848737899e-06 1.547251864427974e-05


In [154]:
y_predtrain =  sc_y.inverse_transform( b + a1 * X_train[:,0]+ a2 * X_train[:, 1] + a3*X_train[:,2] + a4*X_train[:,3] + a5*X_train[:,4])

In [155]:
print(y_predtrain)

[115651.48913088 115651.24921906 115651.56158573 115651.81055901
 115651.68017298 115650.70279508 115651.54943687 115652.87957027
 115651.65019458 115651.4122072  115651.56587025 115652.0242556
 115651.04148274 115652.40321664 115651.35174908 115652.04367328
 115651.66560102 115652.522747   115651.60985857 115651.58746795
 115651.40293387 115651.42312647 115652.06300147 115652.15814138
 115652.33162746 115651.41040901 115651.46048411 115651.70806801
 115651.98290194 115652.12664598 115651.14207843 115652.01745948
 115651.67376765 115651.76817983 115652.1837631  115651.56828335
 115651.88617601 115651.40449788 115651.52005894 115651.78760183]


In [156]:
from sklearn.metrics import mean_squared_error, r2_score
print(mean_squared_error(sc_y.inverse_transform(y_train), y_predtrain))


1721466687.1597614
