In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Ganti path sesuai dataset kamu
df = pd.read_csv("../Data/combine-feature.csv")

X = df[["LF", "HF", "LF/HF Ratio", "SDNN", "RMSSD", "pNN50"]]
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

dt = DecisionTreeClassifier(
    criterion='entropy',   # atau 'gini'
    max_depth=5,           # batasi kedalaman pohon agar tidak overfit
    min_samples_split=4,
    random_state=42
)

# ------------------------------------------------
# 5. Latih model
# ------------------------------------------------
dt.fit(X_train, y_train)

# ------------------------------------------------
# 6. Evaluasi model
# ------------------------------------------------
y_pred = dt.predict(X_test)

print("Akurasi:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ------------------------------------------------
# 7. Validasi silang (10-Fold Stratified)
# ------------------------------------------------
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
cv_scores = cross_val_score(dt, X, y, cv=skf, scoring='accuracy')
print("\nAkurasi rata-rata CV (10-Fold):", cv_scores.mean())

# ------------------------------------------------
# 8. Visualisasi pohon keputusan
# ------------------------------------------------
plt.figure(figsize=(16, 10))
plot_tree(
    dt,
    feature_names=X.columns,
    class_names=[str(c) for c in set(y)],
    filled=True,
    rounded=True,
    fontsize=10
)
plt.title("Decision Tree - Klasifikasi Kanker")
plt.show()
