In [3]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [4]:
df = pd.read_csv("../data/StudentsPerformance.csv")
df.columns = df.columns.str.replace(" ", "_")

In [5]:
df["average_score"] = (
    df["math_score"] +
    df["reading_score"] +
    df["writing_score"]
) / 3

df["pass"] = df["average_score"].apply(lambda x: 1 if x >= 40 else 0)

In [6]:
X = df.drop(["pass", "average_score"], axis=1)
y = df["pass"]

In [7]:
le = LabelEncoder()

for col in X.select_dtypes(include="object"):
    X[col] = le.fit_transform(X[col])

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print(confusion_matrix(y_test, y_pred_lr))

Accuracy: 1.0
[[ 10   0]
 [  0 190]]


In [10]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_dt))

Accuracy: 0.995


In [11]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_rf))

Accuracy: 1.0


In [12]:
importances = rf.feature_importances_
features = X.columns

feature_df = pd.DataFrame({
    "Feature": features,
    "Importance": importances
}).sort_values(by="Importance", ascending=False)

feature_df

Unnamed: 0,Feature,Importance
7,writing_score,0.353797
6,reading_score,0.296597
5,math_score,0.291874
3,lunch,0.020723
2,parental_level_of_education,0.010478
1,race/ethnicity,0.009803
4,test_preparation_course,0.009411
0,gender,0.007317
