In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, recall_score,precision_score,f1_score,confusion_matrix,accuracy_score,roc_curve, RocCurveDisplay,roc_auc_score

In [None]:
df=pd.read_csv(r"C:\Users\DELL\Downloads\student_study_habits.csv",encoding='latin1')
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
duplicates=df.duplicated().sum()
print(duplicates)

In [None]:
df=df.drop_duplicates()
print(df.shape)

In [None]:
df.fillna(df.median(numeric_only=True), inplace=True)

In [None]:
df['pass_fail'] = (df['final_grade'] >= 50).astype(int)

In [None]:
plt.figure(figsize=(6,5))
sns.countplot(x="pass_fail",hue="pass_fail", data=df, palette="Set2",legend=False)

plt.title("Pass vs Fail Distribution")
plt.xlabel("Pass_Fail (1 = Pass, 0 = Fail)")
plt.ylabel("Number of Students")
plt.show()

In [None]:
plt.figure(figsize=(6,6))
df['pass_fail'].value_counts().plot(
    kind='pie',
    labels=['Pass','Fail'],
    autopct='%1.1f%%',
    colors=['skyblue','salmon'],
    startangle=90,
    wedgeprops={'edgecolor':'black'}
)
plt.title("Pass vs Fail Distribution (Pie Chart)")
plt.ylabel("")
plt.show()

In [None]:
if "final_grade" in df.columns:
    df["pass_fail"] = (df["final_grade"] >= 50).astype(int)

In [None]:
X = df[["study_hours_per_week", "attendance_percentage",
        "assignments_completed", "sleep_hours_per_day"]]
y = df["pass_fail"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(random_state=42)
nb = GaussianNB()

rf.fit(X_train, y_train)
nb.fit(X_train, y_train)

rf_pred = rf.predict(X_test)
nb_pred = nb.predict(X_test)

rf_proba = rf.predict_proba(X_test)[:,1]
nb_proba = nb.predict_proba(X_test)[:,1]

print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print("Naive Bayes Accuracy:", accuracy_score(y_test, nb_pred))
print("\nRandom Forest Report:\n", classification_report(y_test, rf_pred))
print("Naive Bayes Report:\n", classification_report(y_test, nb_pred))

In [None]:
plt.figure(figsize=(6,6))
RocCurveDisplay.from_predictions(y_test, rf_proba, name="Random Forest", color="blue")
RocCurveDisplay.from_predictions(y_test, nb_proba, name="Naive Bayes", color="green")
plt.plot([0,1],[0,1],'k--')
plt.title("ROC Curve - Random Forest vs Naive Bayes")
plt.show()
