In [1]:
# 6.Hyperparameter Tuning
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_wine # import wine quality Dataset
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE


data_ar = pd.read_csv('/content/WineQT.csv')

 # Feature Scaling
scaler = StandardScaler()
X = data_ar.drop('quality', axis=1)
y = data_ar['quality']
X_scaled = scaler.fit_transform(X)


# Splitting Data
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)



def evaluate_model(y_true, y_pred, model_name):
    print(f"--- {model_name} Evaluation ---")
    print(classification_report(y_true, y_pred,zero_division=1))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print("Accuracy:", accuracy_score(y_true, y_pred))



# 6. Hyperparameter Tuning using Grid Search

# Logistic Regression Tuning
param_grid_logreg = {'C': [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search_logreg = GridSearchCV(LogisticRegression(max_iter=5000), param_grid_logreg, cv=5)
grid_search_logreg.fit(X_train, y_train)
best_logreg = grid_search_logreg.best_estimator_
y_val_pred_best_logreg = best_logreg.predict(X_val)

# Decision Tree Tuning
param_grid_dt = {'max_depth': [3, 5, 7, 10, None], 'min_samples_split': [2, 5, 10]}
grid_search_dt = GridSearchCV(DecisionTreeClassifier(random_state=42), param_grid_dt, cv=5)
grid_search_dt.fit(X_train, y_train)
best_dt = grid_search_dt.best_estimator_
y_val_pred_best_dt = best_dt.predict(X_val)

# KNN Tuning
param_grid_knn = {'n_neighbors': [3, 5, 7, 10]}
grid_search_knn = GridSearchCV(KNeighborsClassifier(), param_grid_knn, cv=5)
grid_search_knn.fit(X_train, y_train)
best_knn = grid_search_knn.best_estimator_
y_val_pred_best_knn = best_knn.predict(X_val)

evaluate_model(y_val, y_val_pred_best_logreg, "Tuned Logistic Regression")
evaluate_model(y_val, y_val_pred_best_dt, "Tuned Decision Tree")
evaluate_model(y_val, y_val_pred_best_knn, "Tuned K-Nearest Neighbors")

--- Tuned Logistic Regression Evaluation ---
              precision    recall  f1-score   support

           4       1.00      0.00      0.00         3
           5       0.62      0.77      0.69        62
           6       0.65      0.63      0.64        81
           7       0.67      0.42      0.51        24
           8       1.00      0.00      0.00         1

    accuracy                           0.64       171
   macro avg       0.79      0.36      0.37       171
weighted avg       0.65      0.64      0.62       171

Confusion Matrix:
[[ 0  2  1  0  0]
 [ 0 48 13  1  0]
 [ 0 26 51  4  0]
 [ 0  2 12 10  0]
 [ 0  0  1  0  0]]
Accuracy: 0.6374269005847953
--- Tuned Decision Tree Evaluation ---
              precision    recall  f1-score   support

           3       0.00      1.00      0.00         0
           4       0.00      0.00      0.00         3
           5       0.69      0.74      0.71        62
           6       0.61      0.67      0.64        81
           7      