# Multiple Linear Regression

here we have multiple features/columns
we can form a mathematical equation

y=w0+w1x1+w2x2+w3x3+....wnxn

finding all weights associated with features as a matrix called W

refer this [link text](https://cmdlinetips.com/2020/03/linear-regression-using-matrix-multiplication-in-python-using-numpy/)



here we calculate the inverse of a matrix it takes O(n3) time complexity
so it is not efficient for the dataset which has more no of features.

so for that we use gradient descent to get optimum values of weights in sklearn, "SGD regression" (stochastic gradient descent) uses that concept.


In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,r2_score

In [9]:
class MyReg:

    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self,X_train,y_train):
        X_train = np.insert(X_train,0,1,axis=1) # inserts 1's in 1st column of the traing dataset

        # formula to compute all weights and stored as array
        betas = np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0] #w0
        self.coef_ = betas[1:]  #w1,w2,w3,w4...wn  n->no of features

        print("intercept:"+str(self.intercept_))
        print("weights/coefficients:",self.coef_)

    def predict(self,X_test):
        y_pred = np.dot(X_test,self.coef_) + self.intercept_
        return y_pred


In [22]:
#loading data from sklearn datasets
X,Y = load_diabetes(return_X_y=True)

x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2,random_state=2)

mylr=MyReg()


mylr.fit(x_train,y_train)
y_pred = mylr.predict(x_test)
print("mse:",mean_squared_error(y_test,y_pred))
print("r2 score:",r2_score(y_test,y_pred))

# plt.scatter(y_test,y_pred)
# plt.plot(np.unique(y_test),np.poly1d(np.polyfit(y_test,y_pred,1))(np.unique(y_test)))
# plt.xlabel("y_test")
# plt.ylabel("y_pred")
# plt.show()



intercept:151.88331005254165
weights/coefficients: [  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
mse: 3094.4566715660653
r2 score: 0.43993386615689634




#now use sklearn regression


In [23]:

from sklearn.linear_model import LinearRegression

lr=LinearRegression()
lr.fit(x_train,y_train)

y_pred=lr.predict(x_test)



print(lr.intercept_)
print(lr.coef_)

print("mse:",mean_squared_error(y_test,y_pred))
print("r2 score:",r2_score(y_test,y_pred))

151.88331005254167
[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
mse: 3094.4566715660626
r2 score: 0.4399338661568968
