In [19]:
import pandas as pd
import numpy as np


In [30]:
df = pd.read_csv("DS9.csv")

X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [31]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [32]:
y_noisy = y_train.copy()

# Gaussian noise
y_noisy += np.random.normal(0, 3, size=len(y_noisy))

# Outliers
outliers = np.random.choice(len(y_noisy), size=10, replace=False)
y_noisy.iloc[outliers] *= 2


In [33]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

lr = LinearRegression()
lr.fit(X_train, y_noisy)

y_pred_lr = lr.predict(X_val)
mae_lr = mean_absolute_error(y_val, y_pred_lr)


In [34]:
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s = scaler.transform(X_val)


In [35]:
def mini_batch_sgd(X, y, lr=0.01, epochs=500, batch_size=32):
    m, n = X.shape
    w = np.zeros(n)

    for _ in range(epochs):
        indices = np.random.permutation(m)
        for i in range(0, m, batch_size):
            idx = indices[i:i+batch_size]
            error = X[idx] @ w - y.iloc[idx]
            grad = (1/len(idx)) * X[idx].T @ error
            w -= lr * grad
    return w


In [36]:
w_gd = mini_batch_sgd(X_train_s, y_noisy)
y_pred_gd = X_val_s @ w_gd
mae_gd = mean_absolute_error(y_val, y_pred_gd)


In [37]:
mae_lr, mae_gd


(1.6221361268463723, 15.086228854487889)

In [38]:
coef_df = pd.DataFrame({
    "Sklearn_LR": lr.coef_,
    "GD_Coefficients": w_gd
}, index=X.columns)

coef_df


Unnamed: 0,Sklearn_LR,GD_Coefficients
TV,0.060233,5.067454
Radio,0.08206,1.213523
Newspaper,-0.006735,-0.130751


In [39]:
'''sklearn Linear Regression shows higher sensitivity to outliers, while Gradient Descent shows more stable coefficients under noisy conditions.'''

'sklearn Linear Regression shows higher sensitivity to outliers, while Gradient Descent shows more stable coefficients under noisy conditions.'