In [1]:
import numpy as np
from sklearn.datasets import load_diabetes

In [2]:
X,y = load_diabetes(return_X_y = True)
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 2)


In [5]:
X_train.shape

(353, 10)

In [6]:
from sklearn.linear_model import LinearRegression

In [7]:
reg = LinearRegression()
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)

In [8]:
from sklearn.metrics import r2_score

In [9]:
r2_score(y_test,y_pred)

0.439933866156897

In [10]:
reg.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

#Making our own linear Regression Class


In [93]:
class CustomLR:
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X_train, y_train):
    # Add a column of ones to X_train for the intercept term
        X_train = np.insert(X_train, 0, 1, axis=1)  # (array, value, axis=1 for column)

        # Compute betas using the formula: (X^T X)^-1 X^T y
        betas = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)
        
        # Separate intercept and coefficients
        self.intercept_ = betas[0]  # The first element is the intercept
        self.coef_ = betas[1:]      # The remaining are the coefficients
        
    
        print("Coefficient shape:", self.coef_.shape)  # Should match the number of features

    def predict(self, X_test):
        # Add a column of ones to X_test for the intercept term
        X_test = np.insert(X_test, 0, 1, axis=1)

        # Make predictions
        y_pred = np.dot(X_test, np.insert(self.coef_, 0, self.intercept_))  # Insert intercept in coefficients
        return y_pred

In [94]:
lr = CustomLR()

In [95]:
lr.fit(X_train,y_train)
X_test.shape


Coefficient shape: (10,)


(89, 10)

In [96]:
y_pred = lr.predict(X_test)
y_pred

array([154.1213881 , 204.81835118, 124.93755353, 106.08950893,
       258.5348576 , 256.3310074 , 118.75087616, 119.52440696,
       101.50816735, 190.54048661, 141.70656811, 172.51883961,
       174.33861649, 134.80942706, 294.13994537,  94.11798038,
       211.97059795, 156.49579378, 134.21000428, 119.62664644,
       148.87842251, 165.00873409, 151.10021038, 176.04063756,
       133.27769647, 221.29555392, 197.17324941,  96.1577688 ,
        50.26012711, 230.48580317, 242.06073866, 114.11129218,
        67.07532417,  94.52943825, 201.21415375, 167.05136201,
       159.881268  , 192.78746659, 114.49551325, 233.48234551,
       140.82563045, 121.0680409 , 192.27480772, 191.12738845,
       179.16865788, 148.34935601, 163.47414622, 276.81647884,
       100.17926432, 164.10555298, 255.80762189, 136.9466204 ,
       152.37503699, 107.92237882, 194.21924678,  77.34670792,
       118.50482479,  68.38335763, 154.29258529, 162.48840259,
       168.36788326, 156.87790322,  97.14191797, 238.16