# Multiple Linear Regression Using Sklearn  VS  Making Out Our Own Regression Class

In [25]:
import numpy as np
from sklearn.datasets import load_diabetes

In [26]:
X,y = load_diabetes(return_X_y=True)

In [27]:
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [28]:
y[:10]

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310.])

In [29]:
X.shape, y.shape

((442, 10), (442,))

### Using SKlearn's Linear Regression

In [30]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=2)

In [31]:
print('X_train Shape : ', X_train.shape)
print('X_test Shape : ', X_test.shape)

X_train Shape :  (353, 10)
X_test Shape :  (89, 10)


In [32]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()

In [33]:
reg.fit(X_train,y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [34]:
y_pred = reg.predict(X_test)

In [35]:
from sklearn.metrics import r2_score
print('R2 Score : ', r2_score(y_test, y_pred))

R2 Score :  0.4399338661568968


In [36]:
reg.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [37]:
reg.intercept_

151.88331005254167

# Making Out Our Own Regression Class

In [49]:
# X_train  # it is not containing 1 in all rows in the first column

In [50]:
# print(np.insert(X_train, 0, 1, axis=1)) # it is containing 1 in all rows in the first column

In [51]:
# print(X_train.shape)
# print(np.insert(X_train, 0, 1, axis=1).shape)

In [55]:
class MeraLR:
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X_train, y_train):
        X_train = np.insert(X_train, 0, 1, axis=1)  # numpy.insert(arr, idx, values, axis=None)
        betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0]   # B0
        self.coef_ = betas[1:]       # B1 - B10
        # print(betas)

    # y = B0 + B1*X1 + B2*X2 + B3*X3.......
    def predict(self,X_test):
        y_pred = np.dot(X_test, self.coef_) + self.intercept_
        return y_pred

In [56]:
# Object of MeraLr Class
lr = MeraLR()

In [57]:
lr.fit(X_train,y_train)

In [59]:
y_pred = lr.predict(X_test)

In [60]:
r2_score(y_test,y_pred)

0.43993386615689634

In [61]:
lr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [62]:
lr.intercept_

151.88331005254165