In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load data
df = pd.read_csv('student_performance_dataset.csv')

# Step 1: Preprocess the data
# Remove duplicates
df = df.drop_duplicates(subset='Student_ID')

df = df.drop(columns=['Final_Exam_Score'])
# Encode categorical features
# Gender: Male=0, Female=1
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})

# Parental_Education_Level (ordinal encoding)
education_levels = ['High School', 'Bachelors', 'Masters', 'PhD']
df['Parental_Education_Level'] = df['Parental_Education_Level'].map(
    {level: idx for idx, level in enumerate(education_levels)}
)

# Binary encoding for Yes/No columns
df['Internet_Access_at_Home'] = df['Internet_Access_at_Home'].map({'Yes': 1, 'No': 0})
df['Extracurricular_Activities'] = df['Extracurricular_Activities'].map({'Yes': 1, 'No': 0})

# Define features and target
X = df.drop(['Student_ID', 'Pass_Fail'], axis=1)
y = df['Pass_Fail'].map({'Fail': 0, 'Pass': 1})

# Step 2: Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Initialize AdaBoost with Decision Tree Stumps
base_estimator = DecisionTreeClassifier(max_depth=1)  # Decision Stump
ada_model = AdaBoostClassifier(
    n_estimators=50,
    learning_rate=1.0,
    random_state=42
)

# Train the model
ada_model.fit(X_train, y_train)

# Step 4: Evaluate the model
y_pred = ada_model.predict(X_test)

# Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Feature Importance
print("\nFeature Importances:")
for feature, importance in zip(X.columns, ada_model.feature_importances_):
    print(f"{feature}: {importance:.4f}")

Accuracy: 0.84

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.89      0.89        75
           1       0.68      0.68      0.68        25

    accuracy                           0.84       100
   macro avg       0.79      0.79      0.79       100
weighted avg       0.84      0.84      0.84       100


Feature Importances:
Gender: 0.0000
Study_Hours_per_Week: 0.2052
Attendance_Rate: 0.3625
Past_Exam_Scores: 0.3811
Parental_Education_Level: 0.0000
Internet_Access_at_Home: 0.0241
Extracurricular_Activities: 0.0271
