In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [2]:

# Load dataset
df = pd.read_csv("student_academic_placement_performance_dataset.csv")
df.head()


Unnamed: 0,student_id,gender,ssc_percentage,hsc_percentage,degree_percentage,cgpa,entrance_exam_score,technical_skill_score,soft_skill_score,internship_count,live_projects,work_experience_months,certifications,attendance_percentage,backlogs,extracurricular_activities,placement_status,salary_package_lpa
0,1,Male,53,79,56,8.87,50,92,90,1,3,23,4,91,4,Yes,0,0.0
1,2,Female,56,54,59,6.78,61,51,99,1,0,6,5,87,3,No,0,0.0
2,3,Male,94,83,83,7.92,91,93,84,1,1,10,2,81,2,No,1,6.92
3,4,Male,84,71,87,6.57,85,60,72,4,2,14,5,87,3,No,0,0.0
4,5,Male,58,88,74,9.01,73,52,88,1,2,20,0,60,1,No,0,0.0


In [3]:

# Drop ID column
df.drop(columns=['student_id'], inplace=True)

# Encode categorical columns
le = LabelEncoder()
df['gender'] = le.fit_transform(df['gender'])
df['extracurricular_activities'] = le.fit_transform(df['extracurricular_activities'])

df.head()


Unnamed: 0,gender,ssc_percentage,hsc_percentage,degree_percentage,cgpa,entrance_exam_score,technical_skill_score,soft_skill_score,internship_count,live_projects,work_experience_months,certifications,attendance_percentage,backlogs,extracurricular_activities,placement_status,salary_package_lpa
0,1,53,79,56,8.87,50,92,90,1,3,23,4,91,4,1,0,0.0
1,0,56,54,59,6.78,61,51,99,1,0,6,5,87,3,0,0,0.0
2,1,94,83,83,7.92,91,93,84,1,1,10,2,81,2,0,1,6.92
3,1,84,71,87,6.57,85,60,72,4,2,14,5,87,3,0,0,0.0
4,1,58,88,74,9.01,73,52,88,1,2,20,0,60,1,0,0,0.0


In [4]:

# Features and target
X = df.drop(columns=['placement_status'])
y = df['placement_status']


In [5]:

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


In [6]:

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [7]:

# Train model
model = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    class_weight='balanced'
)
model.fit(X_train, y_train)


In [8]:

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       827
           1       1.00      1.00      1.00       173

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000

