### Ridge and Lasso Regression with the Wine Quality Dataset

**Objective**: Predict wine quality based on physicochemical properties using Ridge and Lasso regression techniques.

**Dataset**: The Wine Quality dataset from the UCI Machine Learning Repository provides a good balance of features for regularization techniques. As previously mentioned, it can be found [here](https://archive.ics.uci.edu/ml/datasets/Wine+Quality).

In [None]:
# Import the necessary libraries for the task
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Load the dataset
data_url = "https://raw.githubusercontent.com/HamoyeFoundation/DE/main/datasets/regression/winequality-red.csv"

data = pd.read_csv(data_url, delimiter=";")

data.head()

In [None]:
# Explore the loaded dataset
data.shape

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
# Separate the target and features
x = data.drop(columns=["quality"])
y = data["quality"]

In [None]:
# Separate train and test dataset
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
# Normalize the features dataset
scaler = StandardScaler()
scaler = scaler.fit(x_train)
x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [None]:
# Train the model with linear regression
linear_reg = LinearRegression()
linear_reg.fit(x_train_scaled, y_train)

In [None]:
# Set the hyperparameters
params = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

In [None]:
# Tune the hyperparameters for Ridge regression
ridge = Ridge()
ridge_cv = GridSearchCV(ridge, params, cv=5)
ridge_cv.fit(x_train_scaled, y_train)

print(f"Best alpha for Ridge: {ridge_cv.best_params_['alpha']}")

In [None]:
# Train model with the best alpha param
ridge_best = Ridge(alpha=ridge_cv.best_params_['alpha']).fit(x_train_scaled, y_train)

In [None]:
# Tune the hyperparameters for Lasso regression
lasso = Lasso()
lasso_cv = GridSearchCV(lasso, params, cv=5)
lasso_cv.fit(x_train_scaled, y_train)
print(f"Best alpha for Lasso: {lasso_cv.best_params_['alpha']}")

In [None]:
# Train the model with the best alpha param
lasso_best = Lasso(alpha=lasso_cv.best_params_['alpha']).fit(x_train_scaled, y_train)

In [None]:
# Evaluate the trained models
linear_reg_mse = mean_squared_error(y_test, linear_reg.predict(x_test_scaled))
ridge_mse = mean_squared_error(y_test, ridge_best.predict(x_test_scaled))
lasso_mse = mean_squared_error(y_test, lasso_best.predict(x_test_scaled))

print(f"Linear Regression MSE: {linear_reg_mse}")
print(f"Ridge Regression MSE: {ridge_mse}")
print(f"Lasso Regression MSE: {lasso_mse}")

In [None]:
# Evaluate the trained models
linear_reg_r2 = r2_score(y_test, linear_reg.predict(x_test_scaled))
ridge_r2 = r2_score(y_test, ridge_best.predict(x_test_scaled))
lasso_r2 = r2_score(y_test, lasso_best.predict(x_test_scaled))

print(f"Linear Regression R2 Score: {linear_reg_r2}")
print(f"Ridge Regression R2 Score: {ridge_r2}")
print(f"Lasso Regression R2 Score: {lasso_r2}")