# Implementation of Multiple Linear Regression Algorithm

In [36]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt

### Importing and Splitting of Dataset into Train and Test..

In [37]:
# Loading sklearn's built-in dataset..
X, y = load_diabetes(return_X_y = True)

In [38]:
X   # X contains all features and y contains target variables

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286377, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04687948,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452837, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00421986,  0.00306441]])

In [39]:
X.shape, y.shape

((442, 10), (442,))

In [40]:
# Splitting Data into Train and Test..
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 2)

In [41]:
print("Shape of X_train : ", X_train.shape)
print("Shape of y_train : ", y_train.shape)
print("Shape of X_test : ", X_test.shape)
print("Shape of y_test : ", y_test.shape)

Shape of X_train :  (353, 10)
Shape of y_train :  (353,)
Shape of X_test :  (89, 10)
Shape of y_test :  (89,)


### Using SciKit_Learn's LinearRegression() Class

In [42]:
reg = LinearRegression()
reg.fit(X_train, y_train)

LinearRegression()

In [43]:
y_pred = reg.predict(X_test)

In [44]:
print("Co-efficients or Slopes or Weights : ", reg.coef_)
print("Intercepts : ", reg.intercept_)

Co-efficients or Slopes or Weights :  [  -9.16088483 -205.46225988  516.68462383  340.62734108 -895.54360867
  561.21453306  153.88478595  126.73431596  861.12139955   52.41982836]
Intercepts :  151.88334520854633


In [45]:
r2_score(y_test, y_pred)

0.4399387660024645

## Making Our own LinearRegression() Class for Multiple Features

In [46]:
class MyLinearRegression:
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
    
    def fit(self, X_train, y_train):
        
        # Inserting a column with all 1's, at 0th index.
        X_train = np.insert(X_train, 0, 1, axis=1)
        
        # Calculating betas or Creating 1D Matrix of Co-efficients/betas
        betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)
        
        self.intercept_ = betas[0]  # 0th element of 'betas' Matrix is the Intercept.
        self.coef_ = betas[1: ]  # and all other elements of 'betas' Matrix are the Coefficients..
        
        print("Co-efficients or Weights : ", self.coef_)
        print("Intercepts : ", self.intercept_)
        
    def predict(self, X_train):
        
        return X_train.dot(self.coef_) + self.intercept_

### Working with our own LinearRegression() Class

In [47]:
# Creating Class Object and Trainning The Dataset..
lr = MyLinearRegression()
lr.fit(X_train, y_train)

Co-efficients or Weights :  [  -9.16088483 -205.46225988  516.68462383  340.62734108 -895.54360867
  561.21453306  153.88478595  126.73431596  861.12139955   52.41982836]
Intercepts :  151.88334520854633


In [48]:
# Predicting the Values from X_test dataset..
y_pred1 = lr.predict(X_test)

In [49]:
# Checking Accuracy of the Class..
r2_score(y_test, y_pred1)

0.4399387660024646

##### Observe the fact that our MyLinearRegression() class is giving the same accuracy as Scikit_Learn's LinearRegression() Class. 