In [1]:
# Importing libraries
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge # Ridge L2 Coefficient (Square), LASSO L1 Coefficient (Absolute)
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
# Loading Dataset
miles_per_gallon = sns.load_dataset(name="mpg")

miles_per_gallon.head()

In [None]:
# Removing columns
miles_per_gallon = miles_per_gallon.drop(['origin', 'name'], axis=1)

miles_per_gallon.head()

In [None]:
# Removing NA and dataframe shape
miles_per_gallon = miles_per_gallon.dropna()

miles_per_gallon.shape

In [None]:
# Information on the dataframe
miles_per_gallon.info()

In [None]:
# Separating feature (X) and Target (y) variables
X = miles_per_gallon.drop(['mpg'], axis=1)

y = miles_per_gallon['mpg']

In [None]:
X.shape

In [None]:
# Separating train and test dataframes
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
X_train.shape

In [None]:
# Normalizing the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train

In [None]:
# Linear Regression
linear_model = LinearRegression()

# Training the model without penalties
linear_model.fit(X_train, y_train)

# Predictions (THIS IS ONLY AN EXAMPLE TO UNDERSTAND RIDGE REGRESSION. THIS IS NOT THE PROPER WAY FOR A REAL PROJECT).
y_pred_linear = linear_model.predict(X_test)

# Prediction error (MSE)
mse_linear = mean_squared_error(y_test, y_pred_linear)

print(f'Mean Squared Error (Linear Regression): {mse_linear}')

In [None]:
# Ridge Regression - alpha is the regularization coefficient
ridge_model = Ridge(alpha=0.1, random_state=42)

# Training the model
ridge_model.fit(X_train, y_train)

# Predictions (THIS IS ONLY AN EXAMPLE TO UNDERSTAND RIDGE REGRESSION. THIS IS NOT THE PROPER WAY FOR A REAL PROJECT).
y_pred_ridge = ridge_model.predict(X_test)

# Prediction error (MSE)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)

print(f'Mean Squared Error (Ridge Regression): {mse_ridge}')

In [None]:
# Ridge Regression Heavy Penalty
ridge_model_heavy = Ridge(alpha=1, random_state=42)

# Training the model
ridge_model_heavy.fit(X_train, y_train)

# Predictions (THIS IS ONLY AN EXAMPLE TO UNDERSTAND RIDGE REGRESSION. THIS IS NOT THE PROPER WAY FOR A REAL PROJECT).
y_pred_ridge_heavy = ridge_model_heavy.predict(X_test)

# Prediction error (MSE)
mse_ridge_heavy = mean_squared_error(y_test, y_pred_ridge_heavy)
print(f'Mean Squared Error (Ridge Regression Heavy Penalty): {mse_ridge_heavy}')

In [None]:
# Comparing coefficients
coef_linear = pd.DataFrame(data=linear_model.coef_, index=X.columns, columns=['Linear Regression Coefficient'])

coef_ridge = pd.DataFrame(data=ridge_model.coef_, index=X.columns, columns=['Ridge Regression Coefficient'])

coef_ridge_heavy = pd.DataFrame(data=ridge_model_heavy.coef_, index=X.columns, columns=['Ridge Regression Heavy Penalty Coefficient'])

coef_comparison = pd.concat([coef_linear, coef_ridge, coef_ridge_heavy], axis=1)

coef_comparison