# 🎓 UG Placement Prediction - Model Training (Classification)
This notebook trains and evaluates multiple classification models to predict whether a student is placed or not based on academic and background features.

In [1]:
# 📦 Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')

In [None]:
#  Load Data
df = pd.read_csv('data/raw.csv')

In [None]:
#  Preprocessing
df.drop(columns=['sl_no'], inplace=True, errors='ignore')
df = df[df['status'].notna()]
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])

#  Features and Target
X = df.drop('status', axis=1)
y = df['status']

In [5]:
#  Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
#  Models to Train
models = {
    'Logistic Regression': LogisticRegression(),
    'KNN': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVC': SVC(),
    'AdaBoost': AdaBoostClassifier(),
    'RidgeClassifier': RidgeClassifier(),
    'CatBoost': CatBoostClassifier(verbose=0),
    'XGBoost': XGBClassifier(eval_metric='logloss')
}

In [7]:
#  Train and Evaluate Models
results = []
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    results.append((name, acc, prec, rec, f1))

results_df = pd.DataFrame(results, columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score'])
results_df.sort_values(by='F1 Score', ascending=False)

Unnamed: 0,Model,Accuracy,Precision,Recall,F1 Score
8,XGBoost,0.860465,0.878788,0.935484,0.90625
1,KNN,0.837209,0.833333,0.967742,0.895522
5,AdaBoost,0.837209,0.852941,0.935484,0.892308
7,CatBoost,0.837209,0.852941,0.935484,0.892308
6,RidgeClassifier,0.837209,0.875,0.903226,0.888889
0,Logistic Regression,0.813953,0.848485,0.903226,0.875
3,Random Forest,0.790698,0.805556,0.935484,0.865672
4,SVC,0.767442,0.783784,0.935484,0.852941
2,Decision Tree,0.767442,0.818182,0.870968,0.84375
