In [241]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load the data
try:
    data = pd.read_csv('Students Performance Prediction.csv')
except FileNotFoundError:
    print("Error: The data file was not found. Please provide the correct path.")
    exit()

In [243]:
# Step 2: Data Preprocessing

data.drop(columns=['Student_ID'], inplace=True)
X = data .drop('Grade', axis=1)  # Features
y = data ['Grade']               # Target



In [245]:
label_encoders = {}
for column in X.columns:
    if X[column].dtype == 'object':
        label_encoders[column] = LabelEncoder()
        X[column] = label_encoders[column].fit_transform(X[column])

label_encoders['Grade'] = LabelEncoder()
y = label_encoders['Grade'].fit_transform(y)


In [247]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=30)

In [249]:
# Feature Scaling (important for KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [251]:
# Step 3: Initialize and Train the KNN model
k = 5
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)

In [253]:
# Step 4: Make Predictions on the Test Set

y_pred = knn.predict(X_test)

In [255]:
# Step 5: Evaluate the Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of KNN model: {accuracy:.2f}")


Accuracy of KNN model: 0.12


In [257]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Classification Report:
              precision    recall  f1-score   support

           0       0.20      0.31      0.24        16
           1       0.00      0.00      0.00        10
           2       0.08      0.33      0.13         6
           3       0.00      0.00      0.00         4
           4       0.50      0.11      0.18         9
           5       0.00      0.00      0.00         9
           6       0.33      0.07      0.12        14
           7       0.00      0.00      0.00         5

    accuracy                           0.12        73
   macro avg       0.14      0.10      0.08        73
weighted avg       0.18      0.12      0.11        73



In [259]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Confusion Matrix:
[[5 4 4 1 0 1 1 0]
 [6 0 4 0 0 0 0 0]
 [2 1 2 0 0 0 0 1]
 [2 1 1 0 0 0 0 0]
 [2 1 5 0 1 0 0 0]
 [2 0 3 2 1 0 1 0]
 [4 4 5 0 0 0 1 0]
 [2 3 0 0 0 0 0 0]]


In [261]:
# To get the original grade labels back from the encoded predictions:
original_predictions = label_encoders['Grade'].inverse_transform(y_pred)
original_test_grades = label_encoders['Grade'].inverse_transform(y_test)

In [263]:
print("\nSample Predictions (Original Labels):")
for i in range(10):  # Display the first 10 predictions
    print(f"Predicted Grade: {original_predictions[i]}, Actual Grade: {original_test_grades[i]}")


Sample Predictions (Original Labels):
Predicted Grade: BB, Actual Grade: DC
Predicted Grade: BB, Actual Grade: DC
Predicted Grade: BA, Actual Grade: AA
Predicted Grade: BB, Actual Grade: CC
Predicted Grade: AA, Actual Grade: AA
Predicted Grade: AA, Actual Grade: DD
Predicted Grade: DD, Actual Grade: AA
Predicted Grade: AA, Actual Grade: AA
Predicted Grade: AA, Actual Grade: AA
Predicted Grade: BB, Actual Grade: CC
