In [None]:
# 🟡 Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay

# 🟡 Load Dataset
red = pd.read_csv(r"C:\Users\Dragon\Downloads\winequality-red.csv", sep=';')
white = pd.read_csv(r"C:\Users\Dragon\Downloads\winequality-white.csv", sep=';')
red['type'] = 0
white['type'] = 1
df = pd.concat([red, white], ignore_index=True)

# 🟡 Create Classes
df['quality_label'] = df['quality'].apply(lambda q: 0 if q <= 5 else (1 if q == 6 else 2))
df.drop('quality', axis=1, inplace=True)

# 🟡 Feature Engineering (to make 21 features)
df['acid_index'] = df['fixed acidity'] * df['volatile acidity']
df['sweetness_level'] = df['residual sugar'] / (df['density'] + 1e-6)
df['sulfur_ratio'] = df['free sulfur dioxide'] / (df['total sulfur dioxide'] + 1)
df['ph_sulphate'] = df['pH'] * df['sulphates']
df['alcohol_density'] = df['alcohol'] * df['density']
df['sugar_sulphate_ratio'] = df['residual sugar'] / (df['sulphates'] + 1)
df['chloride_acidity'] = df['chlorides'] * df['fixed acidity']
df['total_acid'] = df['fixed acidity'] + df['citric acid'] + df['volatile acidity']
df['sulfur_acid_ratio'] = df['total sulfur dioxide'] / (df['total_acid'] + 1e-6)
df['density_alcohol_ratio'] = df['density'] / (df['alcohol'] + 1e-6)

# 🟡 Split & Scale
X = df.drop('quality_label', axis=1)
y = df['quality_label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

print("Logistic Regression")
print("Accuracy:", acc)
print("F1 Score:", f1)

cm = confusion_matrix(y_test, y_pred)
ConfusionMatrixDisplay(cm, display_labels=[0, 1, 2]).plot(cmap="Blues")
plt.title("Logistic Regression - Confusion Matrix")
plt.show()


In [None]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("KNN Accuracy:", accuracy_score(y_test, y_pred))
print("KNN F1 Score:", f1_score(y_test, y_pred, average='weighted'))

ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred), display_labels=[0, 1, 2]).plot(cmap='Blues')
plt.title("KNN - Confusion Matrix")
plt.show()


In [None]:
from sklearn.svm import SVC

model = SVC()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("SVM Accuracy:", accuracy_score(y_test, y_pred))
print("SVM F1 Score:", f1_score(y_test, y_pred, average='weighted'))

ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred), display_labels=[0, 1, 2]).plot(cmap='Blues')
plt.title("SVM - Confusion Matrix")
plt.show()


In [None]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("Naive Bayes Accuracy:", accuracy_score(y_test, y_pred))
print("Naive Bayes F1 Score:", f1_score(y_test, y_pred, average='weighted'))

ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred), display_labels=[0, 1, 2]).plot(cmap='Blues')
plt.title("Naive Bayes - Confusion Matrix")
plt.show()


In [None]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred))
print("Decision Tree F1 Score:", f1_score(y_test, y_pred, average='weighted'))

ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred), display_labels=[0, 1, 2]).plot(cmap='Blues')
plt.title("Decision Tree - Confusion Matrix")
plt.show()


In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("Random Forest Accuracy:", accuracy_score(y_test, y_pred))
print("Random Forest F1 Score:", f1_score(y_test, y_pred, average='weighted'))

ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred), display_labels=[0, 1, 2]).plot(cmap='Blues')
plt.title("Random Forest - Confusion Matrix")
plt.show()


In [None]:
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred))
print("Gradient Boosting F1 Score:", f1_score(y_test, y_pred, average='weighted'))

ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred), display_labels=[0, 1, 2]).plot(cmap='Blues')
plt.title("Gradient Boosting - Confusion Matrix")
plt.show()


In [None]:
from sklearn.ensemble import AdaBoostClassifier

model = AdaBoostClassifier()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print("AdaBoost Accuracy:", accuracy_score(y_test, y_pred))
print("AdaBoost F1 Score:", f1_score(y_test, y_pred, average='weighted'))

ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred), display_labels=[0, 1, 2]).plot(cmap='Blues')
plt.title("AdaBoost - Confusion Matrix")
plt.show()
