## Multiple Classification using KNN

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
train_df = pd.read_csv('multi_classification_train.csv')
test_df = pd.read_csv('multi_classification_test.csv')

In [7]:
X_train = train_df.drop(columns=["ID", "Class"]).values
y_train = train_df["Class"].values

In [9]:
X_test = test_df.drop(columns=["ID"]).values

In [11]:
def knn_predict(X_train, y_train, X_test, k):
    predictions = []
    for test_point in X_test:
        distances = np.linalg.norm(X_train - test_point, axis=1)
        k_indices = np.argsort(distances)[:k]
        k_nearest_labels = y_train[k_indices]
        unique, counts = np.unique(k_nearest_labels, return_counts=True)
        most_common_label = unique[np.argmax(counts)]
        predictions.append(most_common_label)
    return np.array(predictions)

In [13]:
def calculate_f1(y_true, y_pred):
    classes = np.unique(y_true)
    precision_total = 0.0
    recall_total = 0.0
    f1_total = 0.0

    for cls in classes:
        tp = np.sum((y_true == cls) & (y_pred == cls))
        fp = np.sum((y_true != cls) & (y_pred == cls))
        fn = np.sum((y_true == cls) & (y_pred != cls))
        
        precision_class = tp / (tp + fp) if tp + fp > 0 else 0.0
        recall_class = tp / (tp + fn) if tp + fn > 0 else 0.0
        f1_class = (
            2 * (precision_class * recall_class) / (precision_class + recall_class)
            if precision_class + recall_class > 0
            else 0.0
        )

        precision_total += precision_class
        recall_total += recall_class
        f1_total += f1_class

    precision = precision_total / len(classes)
    recall = recall_total / len(classes)
    f1 = f1_total / len(classes)
    
    return precision, recall, f1


In [15]:
k = 8
train_predictions = knn_predict(X_train, y_train, X_train, k)

In [16]:
precision, recall, f1 = calculate_f1(y_train, train_predictions)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Precision: 0.9588
Recall: 0.9430
F1 Score: 0.9505


In [17]:
predictions = knn_predict(X_train, y_train, X_test, k)

print("Predicted labels for the test set:", predictions)

Predicted labels for the test set: [3 1 1 ... 3 4 1]


In [18]:
output_path = "knn_predictions.csv"
test_df["Class"] = predictions
test_df[["ID", "Class"]].to_csv(output_path, index=False)
print(f"Predictions saved to {output_path}")

Predictions saved to knn_predictions.csv


In [None]:
k_values = range(3, 12)  
accuracies = []

for k in k_values:
    train_predictions = knn_predict(X_train, y_train, X_train, k)
    precision, recall, f1 = calculate_f1(y_train, train_predictions)
    accuracies.append(f1)

optimal_k = k_values[np.argmax(accuracies)]
print(f"Optimal value of k: {optimal_k}")

import matplotlib.pyplot as plt
plt.plot(k_values, accuracies)
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.title('Accuracy vs k (for k-NN)')
plt.show()