In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
data = pd.read_csv(url, delimiter=';')

In [None]:
print("Missing values:\n", data.isnull().sum())

In [None]:
print("Data info:\n")
data.info()

In [None]:
data['total_acidity'] = data['fixed acidity'] + data['volatile acidity'] + data['citric acid']
data['density_to_sulfur_ratio'] = data['density'] / (data['free sulfur dioxide'] + data['total sulfur dioxide'])

In [None]:
features = data.columns.drop('quality')
scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])

In [None]:
X = data.drop('quality', axis=1)
y = data['quality']

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [None]:
base_model = LinearRegression()
base_model.fit(X_train, y_train)

In [None]:
y_val_pred = base_model.predict(X_val)
base_mse = mean_squared_error(y_val, y_val_pred)
print("Base model MSE:", base_mse)

In [None]:
ridge = Ridge()
parameters = {'alpha': [0.1, 1, 10, 100]}
ridge_search = GridSearchCV(ridge, parameters, scoring='neg_mean_squared_error', cv=5)
ridge_search.fit(X_train, y_train)

In [None]:
best_ridge_model = ridge_search.best_estimator_
ridge_mse = mean_squared_error(y_val, best_ridge_model.predict(X_val))
print("Best Ridge model parameters:", ridge_search.best_params_)
print("Ridge model MSE on validation set:", ridge_mse)

In [None]:
y_test_pred = best_ridge_model.predict(X_test)
test_mse = mean_squared_error(y_test, y_test_pred)
print("Test MSE:", test_mse)