In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
train_df = pd.read_csv('train.csv')

# Drop 'id' and split features/target
X = train_df.drop(columns=['id', 'Target'])
y = train_df['Target']

# Encode categorical target labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split into train/validation
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Scale features (important for k-NN!)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Train k-NN classifier
knn = KNeighborsClassifier(n_neighbors=5)  # you can tune n_neighbors
knn.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = knn.predict(X_val_scaled)
print("Validation Accuracy:", accuracy_score(y_val, y_pred))
print("\nClassification Report:\n", classification_report(y_val, y_pred, target_names=le.classes_))

# Load test set and sample submission
test_df = pd.read_csv('test.csv')
submission_df = pd.read_csv('sample_submission.csv')

# Drop 'id' and scale features
X_test = test_df.drop(columns=['id'])
X_test_scaled = scaler.transform(X_test)

# Predict on test data
test_preds_encoded = knn.predict(X_test_scaled)
test_preds = le.inverse_transform(test_preds_encoded)

# Prepare submission
submission_df['Target'] = test_preds
submission_df.to_csv('knn_submission.csv', index=False)
print("Submission saved as 'knn_submission.csv'")


Validation Accuracy: 0.7786199686356509

Classification Report:
               precision    recall  f1-score   support

     Dropout       0.84      0.80      0.82      5028
    Enrolled       0.55      0.48      0.51      3017
    Graduate       0.82      0.89      0.85      7259

    accuracy                           0.78     15304
   macro avg       0.74      0.72      0.73     15304
weighted avg       0.77      0.78      0.77     15304

Submission saved as 'knn_submission.csv'
