In [9]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Load dataset
df = pd.read_csv("student_data.csv")

# Assume last column is target (Pass / Fail)
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Encode target if categorical
if y.dtype == 'object':
    y = y.map({'Fail': 0, 'Pass': 1})

# Identify categorical columns in X and apply one-hot encoding
categorical_cols = X.select_dtypes(include=['object']).columns
X = pd.get_dummies(X, columns=categorical_cols, drop_first=True)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Decision Tree model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# -------------------------
# MODEL EVALUATION METRICS
# -------------------------

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

# Accuracy, Precision, Recall, F1
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='weighted'))
print("Recall:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

# -------------------------
# CROSS-VALIDATION
# -------------------------
cv_scores = cross_val_score(
    model, X, y, cv=5, scoring="accuracy"
)
print("\nCross-Validation Scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())

# -------------------------
# BIAS–VARIANCE (TREE DEPTH)
# -------------------------
print("\nBias–Variance Analysis (Tree Depth):")
for depth in range(1, 11):
    dt = DecisionTreeClassifier(max_depth=depth, random_state=42)
    dt.fit(X_train, y_train)
    train_acc = dt.score(X_train, y_train)
    test_acc = dt.score(X_test, y_test)
    print(f"Depth {depth}: Train Acc={train_acc:.2f}, Test Acc={test_acc:.2f}")

Confusion Matrix:
 [[2 0 0 0 0 1 2 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 2 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 1 1 1 2 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [2 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0]
 [2 0 0 0 0 2 3 3 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 2 2 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 3 0 0 1 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 1 1 2 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 4 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 2 5 2 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 3 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
Accuracy: 0.3037974683544304
Precision: 0.33851215591721917
Recall: 0.3037974683544304
F1 Score: 0.3095520365340485

Cross-Validation Scores: [0.36708861 0.37974684 0.37974684 0.36708861 0.36708861]
Mean CV Accuracy: 0.3721518987341772

Bias–Variance Analysis (Tree Depth):
Dept

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Depth 5: Train Acc=0.66, Test Acc=0.33
Depth 6: Train Acc=0.74, Test Acc=0.30
Depth 7: Train Acc=0.85, Test Acc=0.33
Depth 8: Train Acc=0.93, Test Acc=0.33
Depth 9: Train Acc=0.98, Test Acc=0.27
Depth 10: Train Acc=1.00, Test Acc=0.29
