<a href="https://colab.research.google.com/github/Mounika-Alwar/ML-Practice/blob/main/Linear_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Implement Linear Regression from scratch using NumPy

In [8]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression as SklearnLR

In [6]:
class LinearRegression:

  def __init__(self,intercept=None,slopes=None):
    self.intercept = intercept
    self.slopes = slopes

  def fit(self,X_train,y_train):
    X_train = np.insert(X_train,0,1,axis = 1)
    a = np.linalg.inv(np.dot(X_train.T,X_train))
    b = np.dot(X_train.T,y_train)
    coeff = np.dot(a,b)
    self.intercept = coeff[0]
    self.slopes = coeff[1:]

  def predict(self,X_test):
    X_test = np.insert(X_test,0,1,axis=1)
    coeff = np.hstack(([self.intercept],self.slopes))
    return X_test @ coeff

# Compare results with sklearn.linear_model.LinearRegression on a toy dataset.

In [21]:
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

In [23]:
X = X[:,:3]

In [24]:
X.shape,y.shape

((442, 3), (442,))

In [25]:
X

array([[ 0.03807591,  0.05068012,  0.06169621],
       [-0.00188202, -0.04464164, -0.05147406],
       [ 0.08529891,  0.05068012,  0.04445121],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626],
       [-0.04547248, -0.04464164,  0.03906215],
       [-0.04547248, -0.04464164, -0.0730303 ]])

In [26]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [27]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [28]:
my_lr = LinearRegression()
my_lr.fit(X_train, y_train)
y_pred = my_lr.predict(X_test)

In [29]:
print("Custom Implementation")
print("Intercept:", my_lr.intercept)
print("Slopes:", my_lr.slopes)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))

Custom Implementation
Intercept: 151.8330790030991
Slopes: [156.20183803 -47.06639118 971.91191127]
MSE: 4056.6716187618736
R2: 0.23432324868805854


In [30]:
# Compare with sklearn
sk_lr = SklearnLR()
sk_lr.fit(X_train, y_train)
y_pred_sklearn = sk_lr.predict(X_test)

In [31]:
print("\nScikit-Learn Implementation")
print("Intercept:", sk_lr.intercept_)
print("Slope:", sk_lr.coef_)
print("MSE:", mean_squared_error(y_test, y_pred_sklearn))
print("R2:", r2_score(y_test, y_pred_sklearn))


Scikit-Learn Implementation
Intercept: 151.83307900309907
Slope: [156.20183803 -47.06639118 971.91191127]
MSE: 4056.671618761874
R2: 0.23432324868805843
