In [17]:
import numpy as np
from sklearn.datasets import load_diabetes

In [18]:
X, y = load_diabetes(return_X_y=True)
X
X.shape
y
y.shape

(442,)


### Using Sklearn's Linear Regression

In [19]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)
print(X_train.shape, X_test.shape)

(353, 10) (89, 10)


In [20]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

In [21]:
from sklearn.metrics import r2_score
print(r2_score(y_test, y_pred))
print(lr.coef_)
print(lr.intercept_)

0.4399338661568969
[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
151.88331005254167


### Making our own Linear Regression Class

In [22]:
class MyLinearRegression:

  def __init__(self):
    self.coef_ = None
    self.intercept_ = None
  
  def fit(self, X_train, y_train):
    X_train = np.insert(X_train, 0, 1, axis=1)

    # calculate the coeffs
    betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)
    self.intercept_ = betas[0]
    self.coef_ = betas[1:]

  def predict(self, X_test):
    y_pred = np.dot(X_test, self.coef_) + self.intercept_
    return y_pred

In [23]:
mlr = MyLinearRegression()
mlr.fit(X_train, y_train)
X_train.shape

(353, 10)

In [24]:
r2_score = (y_test, y_pred)
mlr.coef_
mlr.intercept_
print(mlr.coef_, mlr.intercept_)

[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238] 151.88331005254167
