# **Linear Regression**

In [48]:
import numpy as np
from sklearn.datasets import load_diabetes

In [49]:
X, y = load_diabetes(return_X_y=True)

In [50]:
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [51]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [52]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [53]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=2)

In [54]:
print(X_train.shape)
print(y_train.shape)

(397, 10)
(397,)


In [55]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)

In [56]:
y_pred = lr.predict(X_test)

In [57]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.5161508238085537

In [58]:
print(lr.coef_)
print(lr.intercept_)

[ -15.18701492 -235.26111545  479.42329303  350.84419985 -844.7635999
  529.18628628   91.92604391  117.00124179  810.36469769   56.03426843]
152.3439331860667


# **My Class**

In [59]:
class MyLr:
  def __init__(self):
    self.coef_ = None
    self.intercept_ = None

  def fit(self, X_train, y_train):
    X_train = np.insert(X_train, 0, 1, axis=1)

    betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)
    self.intercept_ = betas[0]
    self.coef_ = betas[1:]

  def predict(self, X_test):
    y_pred = np.dot(X_test,self.coef_) + self.intercept_
    return y_pred

In [60]:
lr = MyLr()
lr.fit(X_train, y_train)

In [61]:
y_pred = lr.predict(X_test)

In [62]:
y_pred

array([153.35022382, 200.06847398, 124.95270772, 107.81692209,
       254.70931036, 253.75070687, 120.45781775, 123.35209917,
       100.45905897, 188.70755302, 139.30025158, 175.49587858,
       175.82500529, 134.99454985, 290.46842064,  90.62100955,
       212.09609804, 161.21751902, 133.33971802, 121.73395313,
       148.68883456, 160.71524911, 153.86174461, 173.4801726 ,
       132.39887177, 222.78807261, 195.35730584,  97.29830629,
        52.67637581, 228.47907793, 240.72093025, 118.38817644,
        72.22699959,  94.70336324, 198.92613534, 169.08961481,
       158.19174046, 191.21955929, 116.26942296, 236.58211438,
       138.14267257, 120.39928703, 188.98229903, 192.25433588,
       176.46052839])

In [63]:
r2_score(y_test, y_pred)

0.5161508238085546

In [64]:
print(lr.coef_)
print(lr.intercept_)

[ -15.18701492 -235.26111545  479.42329303  350.84419985 -844.7635999
  529.18628628   91.92604391  117.00124179  810.36469769   56.03426843]
152.34393318606672
