# Libraries

In [37]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Data

In [35]:
data = sns.load_dataset("mpg").dropna()

X = data.drop(["mpg", "name", "origin"], axis=1)
X = pd.concat([X, pd.get_dummies(data["origin"])], axis=1)
X = X.to_numpy().astype("float64")

y = data["mpg"].to_numpy().astype("float64")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ridge (L2) Regression

Sources:
- <a href="https://www.youtube.com/watch?v=Q81RR3yKn30">StatQuest - Ridge (L2) Regression</a>
- <a href="https://towardsdatascience.com/regularization-in-machine-learning-76441ddcf99a">Prashant Gupta - Regularization in Machine Learning</a>

$$
RSS = \sum_{i=1}^{n} \left( y_i - \beta_0 - \sum_{j=1}^{p} \beta_j x_{ij} \right)^2
$$

$$
J \left( \beta \right) = RSS + \lambda \sum_{j=1}^{p} \beta_j^2
= \sum_{i=1}^{n} \left( y_i - \beta_0 - \sum_{j=1}^{p} \beta_j x_{ij} \right)^2 + \lambda \sum_{j=1}^{p} \beta_j^2
$$

In [40]:
from sklearn.linear_model import Ridge, LinearRegression

linReg = LinearRegression()
linReg.fit(X_train, y_train)

ridge = Ridge()
ridge.fit(X_train, y_train)

linRegMSE = mean_squared_error(y_test, linReg.predict(X_test))
ridgeMSE = mean_squared_error(y_test, ridge.predict(X_test))

print("Linear regression RMSE: {:.5f}\nRidge regression RMSE: {:.5f}".format(
    np.sqrt(linRegMSE),
    np.sqrt(ridgeMSE)
))

Linear regression RMSE: 3.25611
Ridge regression RMSE: 3.25321


# Lasso (L1) Regression

Sources:
- <a href="https://www.youtube.com/watch?v=NGf0voTMlcs">Statquest - Lasso (L1) Regression</a>
- <a href="https://towardsdatascience.com/regularization-in-machine-learning-76441ddcf99a">Prashant Gupta - Regularization in Machine Learning</a>

$$
J \left( \beta \right) = RSS + \lambda \sum_{j=1}^{p} \left| \beta_j \right|
= \sum_{i=1}^{n} \left( y_i - \beta_0 - \sum_{j=1}^{p} \beta_j x_{ij} \right)^2 + \lambda \sum_{j=1}^{p} \left| \beta_j \right|
$$

In [42]:
from sklearn.linear_model import Lasso, LinearRegression

linReg = LinearRegression()
linReg.fit(X_train, y_train)

lasso = Lasso()
lasso.fit(X_train, y_train)

linRegMSE = mean_squared_error(y_test, linReg.predict(X_test))
lassoMSE = mean_squared_error(y_test, lasso.predict(X_test))

print("Linear regression RMSE: {:.5f}\nLasso regression RMSE: {:.5f}".format(
    np.sqrt(linRegMSE),
    np.sqrt(lassoMSE)
))

Linear regression RMSE: 3.25611
Lasso regression RMSE: 3.26483
