In [1]:
import pandas as pd
import numpy as np
import joblib

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("data/students_dataset.csv")

# Features and target
X = df.drop("Target_Class", axis=1)
y = df["Target_Class"]

# Load existing scaler and encoder
scaler = joblib.load("models/scaler.pkl")
label_encoder = joblib.load("models/label_encoder.pkl")

# Transform data
X_scaled = scaler.transform(X)
y_encoded = label_encoder.transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42
)

# Train Decision Tree
model = DecisionTreeClassifier(max_depth=5, random_state=42)
model.fit(X_train, y_train)

# Save model
joblib.dump(model, "models/decision_tree_model.pkl")

print("Decision Tree model saved successfully!")


Decision Tree model saved successfully!
