In [1]:
# KNN Based Student Club Classifier
# Goal: Predict which club a new student will likely join
# 1) Imports
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

# 2) Load dataset
path = "students_club_dataset.csv"
df = pd.read_csv(path)
# 3) Prepare features
num_cols = ["cgpa", "projects", "sports", "cultural"]
X = pd.get_dummies(df[["interest"] + num_cols], columns=["interest"])
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])
y = df["club"]

# 4) Train model (KNN with distance-weighted voting)
knn = KNeighborsClassifier(n_neighbors=3, weights="distance")
knn.fit(X, y)

# 5) Predict for a new student
new_student = {
"interest": "technology",
"cgpa": 8.0,
"projects": 2,
"sports": 1,
"cultural": 0
}
# Convert to model-ready format: one-hot + align columns + scale numerics
X_new = pd.get_dummies(pd.DataFrame([new_student]), columns=["interest"])
X_new = X_new.reindex(columns=X.columns, fill_value=0)
X_new[num_cols] = scaler.transform(X_new[num_cols])

# 6) Predict and print the result
predicted_club = knn.predict(X_new)[0]
print("---PREDICTION---")
print(f"New student profile: {new_student}")
print(f"Predicted club : {predicted_club}")

---PREDICTION---
New student profile: {'interest': 'technology', 'cgpa': 8.0, 'projects': 2, 'sports': 1, 'cultural': 0}
Predicted club : Robotics Club
