In [46]:
import numpy as np
from sklearn.datasets import load_diabetes

In [47]:
x,y = load_diabetes(return_X_y = True)

In [48]:
x.shape, y.shape

((442, 10), (442,))

In [49]:
x

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [50]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

### Using Sklearn's Linear Regression

In [51]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2,random_state=42)

lr = LinearRegression()
lr.fit(xtrain, ytrain)
ypred = lr.predict(xtest)
r2_score(ytest, ypred)

0.4526027629719195

### Implementing our own Multiple LR class

In [52]:
class LR :
    
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
        
    def fit(self, xtrain, ytrain):
        # betas = (XtX)^-1 XtY
        xtrain = np.insert(np.array(xtrain),0, 1, axis = 1)
        betas = np.linalg.inv(np.dot(xtrain.T, xtrain)).dot(xtrain.T).dot(ytrain)
        self.coef_ = betas[1:]
        self.intercept_ = betas[0]
        #print(betas.shape) 
    
    def predict(self, xtest):
        #print(np.dot(xtest, self.coef_ ).shape)
        return self.intercept_ + np.dot(xtest, self.coef_ )

In [53]:
lr = LR()
lr.fit(xtrain, ytrain)
pred = lr.predict(xtest)
r2_score(ytest, pred)

0.4526027629719197

In [54]:
np.insert(xtrain,0, 1, axis = 1)

array([[ 1.00000000e+00,  7.07687525e-02,  5.06801187e-02, ...,
         3.43088589e-02,  2.73640491e-02, -1.07769750e-03],
       [ 1.00000000e+00, -9.14709343e-03,  5.06801187e-02, ...,
         7.12099798e-02,  2.72478149e-04,  1.96328371e-02],
       [ 1.00000000e+00,  5.38306037e-03, -4.46416365e-02, ...,
        -2.59226200e-03,  1.70360713e-02, -1.35040182e-02],
       ...,
       [ 1.00000000e+00,  3.08108295e-02, -4.46416365e-02, ...,
        -3.94933829e-02, -1.09032507e-02, -1.07769750e-03],
       [ 1.00000000e+00, -1.27796319e-02, -4.46416365e-02, ...,
        -2.59226200e-03, -3.84597173e-02, -3.83566597e-02],
       [ 1.00000000e+00, -9.26954778e-02, -4.46416365e-02, ...,
        -3.94933829e-02, -5.14218980e-03, -1.07769750e-03]])

In [55]:
lr.coef_

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [56]:
lr.intercept_

151.34560453986

### Checking if our Multiple LR class works for single feature and target as in simple LR

In [57]:
import pandas as pd
df = pd.read_csv(r"E:\Jupyter Notebooks\regression data placement.csv")

In [58]:
df.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [59]:
xtrain, xtest, ytrain, ytest = train_test_split(df[['cgpa']], df.package, test_size=0.2,random_state=42)

lr = LR()
lr.fit(xtrain, ytrain)
ypred = lr.predict(xtest)
r2_score(ytest, ypred)

0.7730984312051702

In [60]:
xtrain#.shape, xtest.shape, ytrain.shape, ytest.shape

Unnamed: 0,cgpa
79,7.18
197,7.21
38,8.62
24,6.53
122,5.12
...,...
106,6.13
14,7.73
92,7.90
179,7.14


In [61]:
np.insert(np.array(xtrain),0, 1, axis = 1)

array([[1.  , 7.18],
       [1.  , 7.21],
       [1.  , 8.62],
       [1.  , 6.53],
       [1.  , 5.12],
       [1.  , 6.93],
       [1.  , 7.15],
       [1.  , 7.48],
       [1.  , 4.85],
       [1.  , 7.61],
       [1.  , 5.84],
       [1.  , 6.75],
       [1.  , 7.89],
       [1.  , 5.91],
       [1.  , 7.12],
       [1.  , 8.44],
       [1.  , 7.91],
       [1.  , 7.69],
       [1.  , 6.93],
       [1.  , 7.11],
       [1.  , 9.31],
       [1.  , 5.98],
       [1.  , 8.1 ],
       [1.  , 8.94],
       [1.  , 6.87],
       [1.  , 7.39],
       [1.  , 5.95],
       [1.  , 8.11],
       [1.  , 7.11],
       [1.  , 5.64],
       [1.  , 9.26],
       [1.  , 6.78],
       [1.  , 5.53],
       [1.  , 6.89],
       [1.  , 7.4 ],
       [1.  , 6.94],
       [1.  , 8.31],
       [1.  , 7.19],
       [1.  , 7.95],
       [1.  , 5.42],
       [1.  , 7.82],
       [1.  , 8.99],
       [1.  , 6.07],
       [1.  , 6.26],
       [1.  , 7.28],
       [1.  , 4.79],
       [1.  , 7.3 ],
       [1.  ,