## Multiple Linear Regression

In [1]:
import pandas as pd
import numpy as np


In [4]:
from sklearn.datasets import load_diabetes
X , y = load_diabetes(return_X_y=True)
X.shape

(442, 10)

In [5]:
y.shape

(442,)

In [7]:
#train test split
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X, y , test_size=0.2 , random_state = 42 )
X_train.shape

(353, 10)

In [8]:
X_train

array([[ 0.07076875,  0.05068012,  0.01211685, ...,  0.03430886,
         0.02736405, -0.0010777 ],
       [-0.00914709,  0.05068012, -0.01806189, ...,  0.07120998,
         0.00027248,  0.01963284],
       [ 0.00538306, -0.04464164,  0.04984027, ..., -0.00259226,
         0.01703607, -0.01350402],
       ...,
       [ 0.03081083, -0.04464164, -0.02021751, ..., -0.03949338,
        -0.01090325, -0.0010777 ],
       [-0.01277963, -0.04464164, -0.02345095, ..., -0.00259226,
        -0.03845972, -0.03835666],
       [-0.09269548, -0.04464164,  0.02828403, ..., -0.03949338,
        -0.00514219, -0.0010777 ]])

Apply sklearns in built lr

In [10]:
#sklearn lr
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train , y_train)

In [31]:
y_pred = lr.predict(X_test)

In [32]:
y_pred

array([139.5475584 , 179.51720835, 134.03875572, 291.41702925,
       123.78965872,  92.1723465 , 258.23238899, 181.33732057,
        90.22411311, 108.63375858,  94.13865744, 168.43486358,
        53.5047888 , 206.63081659, 100.12925869, 130.66657085,
       219.53071499, 250.7803234 , 196.3688346 , 218.57511815,
       207.35050182,  88.48340941,  70.43285917, 188.95914235,
       154.8868162 , 159.36170122, 188.31263363, 180.39094033,
        47.99046561, 108.97453871, 174.77897633,  86.36406656,
       132.95761215, 184.53819483, 173.83220911, 190.35858492,
       124.4156176 , 119.65110656, 147.95168682,  59.05405241,
        71.62331856, 107.68284704, 165.45365458, 155.00975931,
       171.04799096,  61.45761356,  71.66672581, 114.96732206,
        51.57975523, 167.57599528, 152.52291955,  62.95568515,
       103.49741722, 109.20751489, 175.64118426, 154.60296242,
        94.41704366, 210.74209145, 120.2566205 ,  77.61585399,
       187.93203995, 206.49337474, 140.63167076, 105.59

In [33]:
from sklearn.metrics import r2_score
r2_score(y_test , y_pred)


0.4526027629719195

In [17]:
lr.coef_   # Beta matrix

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [18]:
lr.intercept_  # Beta naught

np.float64(151.34560453985995)

## Our own multiple regression

In [27]:
class custom_mlr :
  def __init__(self) :
    self.beta_coef = None
    self.beta_naught_intercept = None

  def fit(self , X_train , y_train) :

    # Beta naught ki jagah pehle x train matrix mein 1 bharna padega
    X_train = np.insert(X_train ,0 ,1 , axis= 1 )
    # print(X_train)
    Beta = np.linalg.inv(np.dot(X_train.T , X_train)).dot(np.dot(X_train.T , y_train ))
    self.beta_coef = Beta[1:]
    self.beta_naught_intercept = Beta[0]

  def predict(self , X_test):
    # y = x_test . beta matrix coeficients  + beta_intercept
    y_pred = np.dot(X_test ,self.beta_coef ) + self.beta_naught_intercept
    return y_pred

In [28]:
cmlr = custom_mlr()
cmlr.fit(X_train , y_train)


In [34]:
y_pred = cmlr.predict(X_test)

In [35]:
r2_score(y_test ,y_pred)

0.45260276297192026

In [36]:
cmlr.beta_coef

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [37]:
cmlr.beta_naught_intercept

np.float64(151.34560453985998)