In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, StratifiedKFold, LeaveOneOut, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

# === 1. Load Dataset ===
df = pd.read_csv("../Data/DataCancer/combine-feature.csv")

# Ganti sesuai fitur kamu
X = df[["SDNN", "RMSSD", "pNN50", "LF", "HF", "LF/HF Ratio"]]
y = df["Class"]

# Normalisasi fitur (optional untuk tree, tapi tetap rapi)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# === 2. Generate kombinasi parameter otomatis ===
criterions = ["gini", "entropy", "log_loss"]
splitters = ["best", "random"]
max_features_values = [None, "sqrt", "log2"]
max_depth_values = [None] + list(range(2, 11))
min_samples_split_values = [2, 3, 4, 5, 10]
min_samples_leaf_values = [1, 2, 3, 4, 5]
ccp_alpha_values = [0.0, 0.001, 0.01, 0.1]
class_weights = [None, "balanced"]

param_list = []
for criterion in criterions:
    for depth in max_depth_values:
        for split in min_samples_split_values:
            for leaf in min_samples_leaf_values:
                for alpha in ccp_alpha_values:
                    for weight in class_weights:
                        param_list.append({
                            "criterion": criterion,
                            "max_depth": depth,
                            "min_samples_split": split,
                            "min_samples_leaf": leaf,
                            "ccp_alpha": alpha,
                            "class_weight": weight
                        })

print(f"Total kombinasi parameter: {len(param_list)}")

# === 3. Evaluasi setiap kombinasi ===
results = []

for i, p in enumerate(param_list, start=1):
    dt = DecisionTreeClassifier(
        criterion=p["criterion"],
        max_depth=p["max_depth"],
        min_samples_split=p["min_samples_split"],
        min_samples_leaf=p["min_samples_leaf"],
        ccp_alpha=p["ccp_alpha"],
        class_weight=p["class_weight"],
        random_state=42
    )

    # Cross-validation methods
    kf5 = KFold(n_splits=5, shuffle=True, random_state=42)
    skf5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    loo = LeaveOneOut()
    kf10 = KFold(n_splits=10, shuffle=True, random_state=42)
    skf10 = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    scores = {
        "K-Fold (5)": cross_val_score(dt, X, y, cv=kf5).mean(),
        "Stratified K-Fold (5)": cross_val_score(dt, X, y, cv=skf5).mean(),
        "Leave-One-Out": cross_val_score(dt, X, y, cv=loo).mean(),
        "K-Fold (10)": cross_val_score(dt, X, y, cv=kf10).mean(),
        "Stratified K-Fold (10)": cross_val_score(dt, X, y, cv=skf10).mean(),
    }

    for cv_name, acc in scores.items():
        results.append({
            "No": i,
            "criterion": p["criterion"],
            "max_depth": p["max_depth"],
            "min_samples_split": p["min_samples_split"],
            "min_samples_leaf": p["min_samples_leaf"],
            "ccp_alpha": p["ccp_alpha"],
            "class_weight": p["class_weight"],
            "Cross-Val": cv_name,
            "Acc": round(acc, 3)
        })

    if i % 10 == 0:
        print(f"Progress: {i}/{len(param_list)} kombinasi dievaluasi...")

# === 4. Simpan hasil ke DataFrame ===
df_results = pd.DataFrame(results)

# Simpan ke CSV
output_path = "combine-params.csv"
df_results.to_csv(output_path, index=False)
print(f"\n✅ Hasil evaluasi disimpan ke: {output_path}")

# Tampilkan 10 baris pertama
print(df_results.head(10))

Total kombinasi parameter: 6000
Progress: 10/6000 kombinasi dievaluasi...
Progress: 20/6000 kombinasi dievaluasi...
Progress: 30/6000 kombinasi dievaluasi...
Progress: 40/6000 kombinasi dievaluasi...
Progress: 50/6000 kombinasi dievaluasi...
Progress: 60/6000 kombinasi dievaluasi...
Progress: 70/6000 kombinasi dievaluasi...
Progress: 80/6000 kombinasi dievaluasi...
Progress: 90/6000 kombinasi dievaluasi...
Progress: 100/6000 kombinasi dievaluasi...
Progress: 110/6000 kombinasi dievaluasi...
Progress: 120/6000 kombinasi dievaluasi...
Progress: 130/6000 kombinasi dievaluasi...
Progress: 140/6000 kombinasi dievaluasi...
Progress: 150/6000 kombinasi dievaluasi...
Progress: 160/6000 kombinasi dievaluasi...
Progress: 170/6000 kombinasi dievaluasi...
Progress: 180/6000 kombinasi dievaluasi...
Progress: 190/6000 kombinasi dievaluasi...
Progress: 200/6000 kombinasi dievaluasi...
Progress: 210/6000 kombinasi dievaluasi...
Progress: 220/6000 kombinasi dievaluasi...
Progress: 230/6000 kombinasi di