In [None]:
# Import necessary libraries
import pandas as pd  # For data manipulation
from sklearn.model_selection import train_test_split, cross_val_score  # For data splitting and cross-validation
from sklearn.preprocessing import LabelEncoder, StandardScaler  # For data preprocessing
from sklearn.neighbors import KNeighborsClassifier  # K-Nearest Neighbors classifier
from sklearn.metrics import accuracy_score, confusion_matrix  # For model evaluation
import seaborn as sns  # For data visualization
import matplotlib.pyplot as plt  # For data visualization

# Load the wine dataset from "wine.csv" and remove rows with missing values
wine_df = pd.read_csv("wine.csv")
wine_df.dropna(axis=0, inplace=True)

# Encode the "type" column to convert categorical data to numeric
label_encoder = LabelEncoder()
wine_df['type'] = label_encoder.fit_transform(wine_df['type'])

# Define features and target variable
features = wine_df.columns.drop('quality')
X = wine_df[features]
y = wine_df['quality']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)

# Standardize the feature data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define a range of k values for KNN
k_values = list(range(3, 50))
mean_accuracies = []

# Find the optimal k value using cross-validation
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train_scaled, y_train, cv=5)
    mean_accuracies.append(scores.mean())

# Plot the accuracy values against k values
plt.figure(figsize=(10, 6))
plt.plot(k_values, mean_accuracies, marker='o', linestyle='-')
plt.title('KNN Accuracy vs. Number of Neighbors (K)')
plt.xlabel('Number of Neighbors (K)')
plt.ylabel('Mean Accuracy')
plt.xticks(k_values)
plt.grid(True)

# Determine the best k value
best_k = k_values[mean_accuracies.index(max(mean_accuracies))]

# Train a KNN classifier with the best k value
knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train_scaled, y_train)

# Make predictions on the test data and calculate accuracy
y_pred = knn.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)

# Print the test accuracy with the best K value
print(f"Test Accuracy with K={best_k}: {accuracy}")

# Create a confusion matrix and plot it as a heatmap
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, cmap='Blues', fmt='d')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix (Heatmap)')

# Display the confusion matrix heatmap
plt.show()
