# Multiple Linear Regression with gredient descent


In [9]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

np.random.seed(42)

In [6]:
import seaborn as sns

df = sns.load_dataset("mpg").drop(columns=["name", "origin"])

In [53]:
x1 = np.random.random(100)
x2 = np.random.random(100)
X = pd.DataFrame(
    {
        "intercept": np.ones(100),
        "feature1": x1,
        "feature2": x2,
    },
)

y = X["intercept"] * 2 + X["feature1"] * 3 + X["feature2"] * 5

In [54]:
def computeGredient(X: pd.DataFrame, y: pd.Series, w: pd.Series):
    n = len(y)

    predictions = X @ w
    error = predictions - y

    gradient = (2 / n) * X.T @ error

    return gradient


def gradientDescent(X, y, lr, iterCount):
    n = X.shape[1]
    coefficents = np.ones(n)

    for _ in range(iterCount):
        gradients = computeGredient(X, y, coefficents)
        coefficents -= lr * gradients

    return coefficents


finalW = gradientDescent(X, y, 0.1, 10000)

print("Coefficents: ")
print(finalW)


y_pred = X @ finalW
ss_res = ((y - y_pred) ** 2).sum()
ss_tot = ((y - y.mean()) ** 2).sum()
r2 = 1 - (ss_res / ss_tot)

print(f"R2 Score: {r2}")

Coefficents: 
intercept    2.0
feature1     3.0
feature2     5.0
dtype: float64
R2 Score: 1.0


## Test Model with mpg data


In [57]:
df = sns.load_dataset("mpg")
df = df.dropna()
X = df.drop(columns=["mpg", "origin", "name"])
y = df["mpg"]

In [62]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [63]:
finalW = gradientDescent(X_train_scaled, y_train, 0.01, 10000)
finalW

array([-0.19672686,  0.10414905, -0.0869307 , -5.50960439,  0.17363964,
        2.75678422])

In [65]:
predictions = X_test_scaled @ finalW
ss_res = ((y_test - predictions) ** 2).sum()
ss_tot = ((y_test - y_test.mean()) ** 2).sum()
r2 = 1 - (ss_res / ss_tot)

In [66]:
r2

-9.95113347739311