In [1]:
import pandas as pd
from sklearn.model_selection import KFold, StratifiedKFold, LeaveOneOut, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# === 1. Load Dataset ===
# Ganti dengan path datasetmu
df = pd.read_csv("../Data/combine-feature-augmented.csv")

# Pastikan dataset ada kolom: SDNN, RMSSD, pNN50, Class
X = df[["SDNN", "RMSSD", "pNN50"]]
y = df["Class"]

# Normalisasi fitur
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Model LDA
lda = LinearDiscriminantAnalysis(
    solver='lsqr',  
    shrinkage=None,  
     priors=None,
    # priors=[0.3, 0.7],
    # priors=[0.5, 0.5],
    # priors=[0.2, 0.8],  
    n_components=1,   
)

# === 2. Cross-Validation ===

# (a) K-Fold
kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores_kf = cross_val_score(lda, X, y, cv=kf)
print("K-Fold (5-split) scores:", scores_kf)
print("Mean accuracy (K-Fold):", scores_kf.mean())

# (b) Stratified K-Fold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores_skf = cross_val_score(lda, X, y, cv=skf)
print("\nStratified K-Fold (5-split) scores:", scores_skf)
print("Mean accuracy (Stratified K-Fold):", scores_skf.mean())

loo = LeaveOneOut()
scores_loo = cross_val_score(lda, X, y, cv=loo)
print("\nLeave-One-Out scores (first 10 shown):", scores_loo[:10])
print("Mean accuracy (Leave-One-Out):", scores_loo.mean())

kf = KFold(n_splits=10, shuffle=True, random_state=42)
scores_kf = cross_val_score(lda, X, y, cv=kf)
print("\nK-Fold (10-split) scores:", scores_kf)
print("Mean accuracy (K-Fold):", scores_kf.mean())

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
scores_skf = cross_val_score(lda, X, y, cv=skf)
print("\nStratified K-Fold (10-split) scores:", scores_skf)
print("Mean accuracy (Stratified K-Fold):", scores_skf.mean())

K-Fold (5-split) scores: [0.5        0.59210526 0.47368421 0.39473684 0.63157895]
Mean accuracy (K-Fold): 0.5184210526315789

Stratified K-Fold (5-split) scores: [0.44736842 0.40789474 0.47368421 0.46052632 0.55263158]
Mean accuracy (Stratified K-Fold): 0.46842105263157896

Leave-One-Out scores (first 10 shown): [1. 0. 0. 1. 1. 0. 0. 1. 0. 0.]
Mean accuracy (Leave-One-Out): 0.4868421052631579

K-Fold (10-split) scores: [0.55263158 0.39473684 0.55263158 0.57894737 0.42105263 0.52631579
 0.42105263 0.36842105 0.57894737 0.60526316]
Mean accuracy (K-Fold): 0.5000000000000001

Stratified K-Fold (10-split) scores: [0.44736842 0.52631579 0.5        0.36842105 0.47368421 0.5
 0.5        0.44736842 0.60526316 0.57894737]
Mean accuracy (Stratified K-Fold): 0.4947368421052632


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, StratifiedKFold, LeaveOneOut, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# === 1. Load Dataset ===
df = pd.read_csv("../Data/time-domain-cancer.csv")
X = df[["SDNN", "RMSSD", "pNN50"]]
y = df["Class"]

# Normalisasi fitur
scaler = StandardScaler()
X = scaler.fit_transform(X)

# === 2. Generate kombinasi parameter otomatis ===
shrinkage_values = [None, "auto", 0.95] + [round(i, 1) for i in np.arange(0.1, 1.1, 0.1)]
priors_values = [None, [0.3, 0.7], [0.5, 0.5], [0.2, 0.8], [0.4, 0.6]]
solvers = ["svd", "lsqr", "eigen"]

param_list = []
for solver in solvers:
    for shrinkage in shrinkage_values:
        for priors in priors_values:
            # Filter kombinasi yang tidak valid
            if solver == "svd" and shrinkage not in [None]:
                continue  # 'svd' tidak mendukung shrinkage
            # 'lsqr' dan 'eigen' bisa pakai semua shrinkage
            param_list.append({
                "solver": solver,
                "shrinkage": shrinkage,
                "priors": priors,
                "n_components": 1
            })

print(f"Total kombinasi parameter: {len(param_list)}")

# === 3. Evaluasi setiap kombinasi ===
results = []

for i, p in enumerate(param_list, start=1):
    lda = LinearDiscriminantAnalysis(
        solver=p["solver"],
        shrinkage=p["shrinkage"],
        priors=p["priors"],
        n_components=p["n_components"]
    )

    # Cross-validation methods
    kf5 = KFold(n_splits=5, shuffle=True, random_state=42)
    skf5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    loo = LeaveOneOut()
    kf10 = KFold(n_splits=10, shuffle=True, random_state=42)
    skf10 = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    scores = {
        "K-Fold (5)": cross_val_score(lda, X, y, cv=kf5).mean(),
        "Stratified K-Fold (5)": cross_val_score(lda, X, y, cv=skf5).mean(),
        "Leave-One-Out": cross_val_score(lda, X, y, cv=loo).mean(),
        "K-Fold (10)": cross_val_score(lda, X, y, cv=kf10).mean(),
        "Stratified K-Fold (10)": cross_val_score(lda, X, y, cv=skf10).mean(),
    }

    for cv_name, acc in scores.items():
        results.append({
            "No": i,
            "solver": p["solver"],
            "shrinkage": p["shrinkage"],
            "priors": p["priors"],
            "n_components": p["n_components"],
            "Cross-Val": cv_name,
            "Acc": round(acc, 3)
        })

# === 4. Simpan hasil ke DataFrame ===
df_results = pd.DataFrame(results)

# Simpan ke CSV
output_path = "selection2-augmented-param.csv"
df_results.to_csv(output_path, index=False)
print(f"\n✅ Hasil evaluasi disimpan ke: {output_path}")

# Tampilkan 10 baris pertama
print(df_results.head(10))


Total kombinasi parameter: 135

✅ Hasil evaluasi disimpan ke: selection2-augmented-param.csv
   No solver shrinkage      priors  n_components               Cross-Val  \
0   1    svd      None        None             1              K-Fold (5)   
1   1    svd      None        None             1   Stratified K-Fold (5)   
2   1    svd      None        None             1           Leave-One-Out   
3   1    svd      None        None             1             K-Fold (10)   
4   1    svd      None        None             1  Stratified K-Fold (10)   
5   2    svd      None  [0.3, 0.7]             1              K-Fold (5)   
6   2    svd      None  [0.3, 0.7]             1   Stratified K-Fold (5)   
7   2    svd      None  [0.3, 0.7]             1           Leave-One-Out   
8   2    svd      None  [0.3, 0.7]             1             K-Fold (10)   
9   2    svd      None  [0.3, 0.7]             1  Stratified K-Fold (10)   

     Acc  
0  0.516  
1  0.532  
2  0.500  
3  0.508  
4  0.513  
5  0