In [1]:
from sklearn.datasets import make_regression
from sklearn.datasets import load_diabetes

import numpy as np
import pandas as pd

### Making my own Linear Regression Class

In [10]:
class MeraLR:

    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X_train, y_train):

        # inserting 1 in the first row (see X matrix in notes)
        X_train = np.insert(X_train, 0, 1, axis=1)    

        # calculating the coefficient matrix
        m1 = np.dot(X_train.T, X_train)    
        m1 = np.linalg.inv(m1)              # (XT.X)^-1
        m2 = X_train.T                      # XT
        m3 = y_train                        # Y


        beta_matrix = np.linalg.multi_dot([m1,m2,m3])   # need to dot product one by one [cannot do np.dot(m1,m2,m3) ]   
                                                        # or we can use np.linalg.multi_dot([m1,m2,m3]) 

        self.coef_ = beta_matrix[1:]      
        self.intercept_ = beta_matrix[0]  

    def predict(self, X_test):

        # B0 is added  seperately cuz X_test does not have 1 in first column
        # B1 - Bm is multiplied with X_test
        # simple logic and observation
        y_pred = self.intercept_ + np.dot(X_test, self.coef_)    
        return y_pred

In [11]:
X, y = load_diabetes(return_X_y=True)

X.shape, y.shape

((442, 10), (442,))

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

lr = MeraLR()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

r2_score(y_pred, y_test), lr.coef_, lr.intercept_

(0.09621602305047738,
 array([  47.74968054, -241.99090728,  531.97106288,  381.56286182,
        -918.50290455,  508.25778252,  116.95016447,  269.4923028 ,
         695.80811712,   26.32458203]),
 np.float64(151.66517559199332))

In [13]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

r2_score(y_pred, y_test), lr.coef_, lr.intercept_

(0.09621602305047694,
 array([  47.74968054, -241.99090728,  531.97106288,  381.56286182,
        -918.50290455,  508.25778252,  116.95016447,  269.4923028 ,
         695.80811712,   26.32458203]),
 np.float64(151.6651755919933))

In [None]:
y_pred = lr.predict

In [39]:
X, y = make_regression(n_samples=100, n_features=3, n_targets=1, noise=30, random_state=42)
data = pd.DataFrame({'X1' : X[:,0],
                     'X2' : X[:,1],
                     'target' : y})
data

Unnamed: 0,X1,X2,target
0,-0.792521,0.504987,11.309054
1,0.280992,-0.208122,-28.136884
2,0.791032,1.402794,50.357238
3,0.625667,-1.070892,-90.904419
4,-0.342715,-0.161286,-19.234373
...,...,...,...
95,0.651391,0.758969,15.442772
96,1.586017,2.133033,228.363047
97,0.010233,0.462103,15.894383
98,-0.234587,-0.420645,-57.190019


In [40]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

print(f'''
MAE = {mean_absolute_error(y_pred, y_test)}
MSE = {mean_squared_error(y_pred, y_test)}
RMSE = {mean_squared_error(y_pred, y_test)**0.5}
R2 Score = {r2_score(y_pred, y_test)}
''')


MAE = 27.111984750875404
MSE = 1073.7924404356081
RMSE = 32.76877233641212
R2 Score = 0.8450754047024209

