In [None]:
# Wine Quality Prediction

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.impute import SimpleImputer
import warnings
warnings.filterwarnings('ignore')


In [None]:
# Load dataset
df = pd.read_csv('winequality.csv')
df.head()


In [None]:
df.info()


In [None]:
df.describe().T


In [None]:
df.isnull().sum()


In [None]:
# Fill missing values with column mean
for col in df.columns:
    if df[col].isnull().sum() > 0:
        df[col] = df[col].fillna(df[col].mean())

df.isnull().sum().sum()  # Confirm no missing values


In [None]:
df.hist(bins=20, figsize=(10, 10))
plt.show()


In [None]:
plt.bar(df['quality'], df['alcohol'])
plt.xlabel('Quality')
plt.ylabel('Alcohol')
plt.show()


In [None]:
# Convert object columns to numeric (if any)
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = pd.to_numeric(df[col], errors='coerce')


In [None]:
plt.figure(figsize=(12, 12))
sb.heatmap(df.corr() > 0.7, annot=True, cbar=False)
plt.show()


In [None]:
df = df.drop('total sulfur dioxide', axis=1)
df['best quality'] = [1 if x > 5 else 0 for x in df.quality]


In [None]:
features = df.drop(['quality', 'best quality'], axis=1)
target = df['best quality']


In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(features, target, test_size=0.2, random_state=40)

imputer = SimpleImputer(strategy='mean')
xtrain = imputer.fit_transform(xtrain)
xtest = imputer.transform(xtest)


In [None]:
models = [LogisticRegression(), XGBClassifier(), SVC(kernel='rbf')]

for model in models:
    model.fit(xtrain, ytrain)
    print(f'{model}:\nTrain AUC: {metrics.roc_auc_score(ytrain, model.predict(xtrain))}')
    print(f'Validation AUC: {metrics.roc_auc_score(ytest, model.predict(xtest))}\n')


In [None]:
cm = confusion_matrix(ytest, models[1].predict(xtest))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=models[1].classes_)
disp.plot()
plt.show()


In [None]:
print(metrics.classification_report(ytest, models[1].predict(xtest)))
