In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
df = pd.read_csv("winequality-red.csv", sep=";")
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [3]:
def quality_to_class(q):
    if q <= 4:
        return 0  # Low
    elif q <= 6:
        return 1  # Medium
    else:
        return 2  # High

df["quality_class"] = df["quality"].apply(quality_to_class)
df[["quality", "quality_class"]].head()

Unnamed: 0,quality,quality_class
0,5,1
1,5,1
2,5,1
3,6,1
4,5,1


In [4]:
X = df.drop(["quality", "quality_class"], axis=1)
y = df["quality_class"]

print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (1599, 11)
y shape: (1599,)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Train-test split completed.")

Train-test split completed.


In [6]:
knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(X_train, y_train)

print("KNN model trained successfully!")

KNN model trained successfully!


In [7]:
y_pred = knn.predict(X_test)
y_pred[:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)

In [8]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.815625

Confusion Matrix:
 [[  0  11   0]
 [  2 251   9]
 [  0  37  10]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        11
           1       0.84      0.96      0.89       262
           2       0.53      0.21      0.30        47

    accuracy                           0.82       320
   macro avg       0.46      0.39      0.40       320
weighted avg       0.76      0.82      0.78       320

