In [1]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_diabetes

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score,mean_absolute_error


In [2]:
X, y=load_diabetes(return_X_y=True)

In [3]:
X.shape, y.shape

((442, 10), (442,))

In [4]:
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=2)

In [5]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((353, 10), (89, 10), (353,), (89,))

In [6]:
lr=LinearRegression()

In [7]:
lr.fit(X_train, y_train)

In [8]:
y_predict=lr.predict(X_test)

In [9]:
f"R2 Score: {r2_score(y_test, y_predict)}"

'R2 Score: 0.4399338661568968'

In [11]:
lr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [12]:
lr.intercept_

np.float64(151.88331005254167)

Building Own Class of Multiple Regression

In [20]:
class myLR:
  def __init__(self):
    self.coef_=None
    self.intercept_=None

  def fit(self, X_train, y_train):
    X_train=np.insert(X_train, 0, 1, axis=1)

    betas=np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)
    self.intercept_=int(betas[0])
    self.coef_=betas[1:]


  def predict(self, X_test):
    y_predict=np.dot(X_test, self.coef_) + self.intercept_
    return y_predict

In [21]:
my_lr=myLR()
my_lr.fit(X_train, y_train)

In [22]:
X_train.shape

(353, 10)

In [23]:
np.insert(X_train, 0, 1, axis=1).shape

(353, 11)

In [24]:
y_predict=my_lr.predict(X_test)

In [25]:
y_predict

array([153.23807804, 203.93504112, 124.05424348, 105.20619888,
       257.65154755, 255.44769735, 117.86756611, 118.64109691,
       100.6248573 , 189.65717656, 140.82325806, 171.63552956,
       173.45530644, 133.92611701, 293.25663531,  93.23467033,
       211.0872879 , 155.61248373, 133.32669423, 118.74333639,
       147.99511246, 164.12542404, 150.21690032, 175.15732751,
       132.39438642, 220.41224387, 196.28993936,  95.27445875,
        49.37681706, 229.60249311, 241.17742861, 113.22798212,
        66.19201412,  93.64612819, 200.3308437 , 166.16805196,
       158.99795794, 191.90415653, 113.6122032 , 232.59903546,
       139.94232039, 120.18473085, 191.39149766, 190.2440784 ,
       178.28534783, 147.46604596, 162.59083617, 275.93316879,
        99.29595426, 163.22224292, 254.92431183, 136.06331034,
       151.49172694, 107.03906877, 193.33593673,  76.46339787,
       117.62151474,  67.50004758, 153.40927524, 161.60509254,
       167.48457321, 155.99459317,  96.25860792, 237.28

In [26]:
r2_score(y_test, y_predict)

0.4393953929739828

In [27]:
lr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [28]:
lr.intercept_

np.float64(151.88331005254167)