In [1]:
# ============================================================
# 🧾 Student Placement Prediction using ML Techniques
# Author: Chetan Bachchhav
# ============================================================

# 1️⃣ Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

# 2️⃣ Load Dataset
df = pd.read_csv("student_placement_data.csv")   # 👈 Upload your CSV file in Colab first
print("✅ Dataset Loaded Successfully!\n")
print(df.head())

# 3️⃣ Drop Irrelevant Columns
df = df.drop(columns=["name"])

✅ Dataset Loaded Successfully!

        name  age  gender      branch  attendance_percentage  current_cgpa  \
0  Student_1   22    Male          HR                   70.7          6.05   
1  Student_2   20    Male          HR                   85.9          6.41   
2  Student_3   22    Male  Management                   67.6          9.80   
3  Student_4   21  Female       Other                   89.1          9.70   
4  Student_5   22    Male         CSE                   75.6          9.11   

   graduation_percentage   hsc   ssc  aptitude_score  communication_skills  \
0                   79.1  92.3  76.0              44                     4   
1                   70.7  66.9  67.6              32                     9   
2                   94.2  78.6  73.8              36                     4   
3                   80.7  79.5  91.9              37                     6   
4                   91.4  66.5  89.8              30                     7   

   technical_skills domain int

In [2]:
# 4️⃣ Encode Categorical Columns
cat_cols = ["gender", "branch", "domain", "internship", "certifications", "placement_status"]
le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col])

# 5️⃣ Define Features and Target
X = df.drop(columns=["placement_status"])
y = df["placement_status"]

In [3]:
# 6️⃣ Split into Train and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 7️⃣ Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 8️⃣ Initialize Models
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "KNN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB()
}

In [4]:
# 9️⃣ Train and Evaluate Models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f"\n📊 {name}")
    print("Accuracy:", round(acc*100, 2), "%")
    print(classification_report(y_test, y_pred))


📊 Logistic Regression
Accuracy: 55.0 %
              precision    recall  f1-score   support

           0       0.75      0.27      0.40        11
           1       0.50      0.89      0.64         9

    accuracy                           0.55        20
   macro avg       0.62      0.58      0.52        20
weighted avg       0.64      0.55      0.51        20


📊 Decision Tree
Accuracy: 55.0 %
              precision    recall  f1-score   support

           0       0.62      0.45      0.53        11
           1       0.50      0.67      0.57         9

    accuracy                           0.55        20
   macro avg       0.56      0.56      0.55        20
weighted avg       0.57      0.55      0.55        20


📊 Random Forest
Accuracy: 50.0 %
              precision    recall  f1-score   support

           0       1.00      0.09      0.17        11
           1       0.47      1.00      0.64         9

    accuracy                           0.50        20
   macro avg       0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [5]:
# 🔟 Compare Model Accuracies
print("\n✅ Model Comparison:")
for name, acc in results.items():
    print(f"{name}: {round(acc*100, 2)}%")

# 🎯 Identify Best Model
best_model = max(results, key=results.get)
print(f"\n🏆 Best Performing Model: {best_model} ({round(results[best_model]*100, 2)}%)")


✅ Model Comparison:
Logistic Regression: 55.0%
Decision Tree: 55.0%
Random Forest: 50.0%
SVM: 45.0%
KNN: 50.0%
Naive Bayes: 50.0%

🏆 Best Performing Model: Logistic Regression (55.0%)
