# **Model for LightGBM**


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.base import clone
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline


In [None]:

features_disease = np.load('Inhouse_health.npy')
features_no_disease = np.load('Inhouse_unhealth.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)

clf = LGBMClassifier(boosting_type='gbdt', objective='binary', metric='binary_logloss', num_leaves=31, n_estimators=100, learning_rate=0.1, feature_fraction=1,bagging_fraction=0.8, bagging_freq=10)

clf.fit(X_train, y_train)


y_pred = clf.predict(X_test)

 ## **In-house Experiments**

In [2]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                              Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian                  Approximate Entropy    0.347 (0.327–0.365) 0.715 / 0.615 (0.700–0.734 / 0.597–0.630) 0.992 (0.987–0.997) 0.008 / 0.003 (0.005–0.013 / 0.002–0.003)
    Gaussian          Detrended Fractal Dimension    0.331 (0.320–0.351) 0.689 / 0.658 (0.667–0.710 / 0.642–0.674) 0.562 (0.538–0.588) 0.397 / 0.482 (0.381–0.417 / 0.462–0.504)
    Gaussian            Higuchi Fractal Dimension    0.357 (0.339–0.375) 0.622 / 0.685 (0.599–0.643 / 0.666–0.709) 0.713 (0.692–0.736) 0.286 / 0.294 (0.269–0.305 / 0.280–0.312)
    Gaussian               Katz Fractal Dimension    0.351 (0.333–0.372) 0.710 / 0.597 (0.692–0.732 / 0.579–0.620) 0.676 (0.654–0.700) 0.293 / 0.360 (0.279–0.312 / 0.343–0.383)
    Gaussian                       Sample Entropy    0.358 (0.340–0.380) 0.706 / 0.597 (0.686–0.729 / 0.580–0.621) 

## **PTBXL**

In [None]:

features_disease = np.load('PTXBL_unhealth.npy')
features_no_disease = np.load('PTBXL_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)

clf = LGBMClassifier(boosting_type='gbdt', objective='binary', metric='binary_logloss', num_leaves=31, n_estimators=100, learning_rate=0.1, feature_fraction=1,bagging_fraction=0.8, bagging_freq=10)

clf.fit(X_train, y_train)


y_pred = clf.predict(X_test)

In [3]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                              Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian                  Approximate Entropy    0.524 (0.509–0.539) 0.463 / 0.494 (0.446–0.490 / 0.472–0.516) 0.654 (0.638–0.673) 0.352 / 0.259 (0.334–0.373 / 0.239–0.279)
    Gaussian          Detrended Fractal Dimension    0.604 (0.589–0.624) 0.505 / 0.339 (0.487–0.527 / 0.320–0.359) 0.610 (0.594–0.628) 0.476 / 0.380 (0.457–0.499 / 0.361–0.404)
    Gaussian            Higuchi Fractal Dimension    0.528 (0.514–0.545) 0.568 / 0.429 (0.551–0.590 / 0.411–0.448) 0.536 (0.520–0.555) 0.510 / 0.446 (0.491–0.532 / 0.428–0.466)
    Gaussian               Katz Fractal Dimension    0.573 (0.557–0.590) 0.372 / 0.435 (0.363–0.395 / 0.418–0.453) 0.570 (0.554–0.586) 0.434 / 0.436 (0.416–0.457 / 0.419–0.456)
    Gaussian                       Sample Entropy    0.608 (0.592–0.624) 0.442 / 0.375 (0.424–0.464 / 0.357–0.397) 

## **InHouse and PTBXL**

In [None]:

features_disease = np.load('Inhouse_PTBXL_unhealth.npy')
features_no_disease = np.load('Inhouse_PTBXL_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)

clf = LGBMClassifier(boosting_type='gbdt', objective='binary', metric='binary_logloss', num_leaves=31, n_estimators=100, learning_rate=0.1, feature_fraction=1,bagging_fraction=0.8, bagging_freq=10)

clf.fit(X_train, y_train)


y_pred = clf.predict(X_test)

In [4]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                              Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian                  Approximate Entropy    0.694 (0.676–0.714) 0.315 / 0.300 (0.292–0.334 / 0.289–0.317) 0.701 (0.685–0.724) 0.318 / 0.298 (0.296–0.339 / 0.278–0.316)
    Gaussian          Detrended Fractal Dimension    0.593 (0.575–0.616) 0.429 / 0.389 (0.410–0.452 / 0.367–0.414) 0.562 (0.545–0.583) 0.456 / 0.436 (0.435–0.479 / 0.421–0.462)
    Gaussian            Higuchi Fractal Dimension    0.724 (0.709–0.744) 0.310 / 0.259 (0.294–0.328 / 0.241–0.279) 0.587 (0.571–0.609) 0.490 / 0.406 (0.471–0.516 / 0.383–0.429)
    Gaussian               Katz Fractal Dimension    0.669 (0.652–0.688) 0.319 / 0.357 (0.300–0.377 / 0.337–0.377) 0.570 (0.552–0.590) 0.428 / 0.402 (0.410–0.450 / 0.384–0.424)
    Gaussian                       Sample Entropy    0.750 (0.732–0.768) 0.226 / 0.316 (0.205–0.245 / 0.296–0.339) 

# Model for XGBoost

In [None]:
from xgboost import XGBClassifier

features_disease = np.load('Inhouse_unhealth.npy')
features_no_disease = np.load('Inhouse_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)


clf = XGBClassifier(
    booster="gbtree",
    objective="binary:logistic",
    eval_metric="logloss",
    n_estimators=100,
    learning_rate=0.1,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=1.0,
    random_state=42,
    n_jobs=-1,
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


y_pred = clf.predict(X_test)


## **In-house**


In [5]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                              Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian                  Approximate Entropy    0.353 (0.330–0.376) 0.703 / 0.598 (0.679–0.732 / 0.583–0.619) 0.993 (0.989–0.996) 0.132 / 0.003 (0.111–0.146 / 0.001–0.006)
    Gaussian          Detrended Fractal Dimension    0.360 (0.349–0.379) 0.653 / 0.632 (0.631–0.678 / 0.611–0.654) 0.596 (0.578–0.614) 0.415 / 0.407 (0.396–0.433 / 0.387–0.428)
    Gaussian            Higuchi Fractal Dimension    0.393 (0.375–0.410) 0.800 / 0.200 (0.780–0.830 / 0.185–0.225) 0.703 (0.683–0.721) 0.296 / 0.305 (0.277–0.313 / 0.289–0.327)
    Gaussian               Katz Fractal Dimension    0.322 (0.308–0.336) 0.741 / 0.628 (0.717–0.769 / 0.605–0.651) 0.666 (0.648–0.685) 0.303 / 0.369 (0.290–0.319 / 0.347–0.390)
    Gaussian                       Sample Entropy    0.333 (0.320–0.347) 0.726 / 0.618 (0.703–0.752 / 0.600–0.637) 

## **PTBXL**

In [None]:
from xgboost import XGBClassifier

features_disease = np.load('PTXBL_unhealth.npy')
features_no_disease = np.load('PTBXL_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)


clf = XGBClassifier(
    booster="gbtree",
    objective="binary:logistic",
    eval_metric="logloss",
    n_estimators=100,
    learning_rate=0.1,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=1.0,
    random_state=42,
    n_jobs=-1,
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


y_pred = clf.predict(X_test)

In [6]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                              Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian                  Approximate Entropy    0.510 (0.492–0.528) 0.437 / 0.549 (0.419–0.456 / 0.530–0.571) 0.450 (0.434–0.467) 0.565 / 0.547 (0.546–0.585 / 0.527–0.571)
    Gaussian          Detrended Fractal Dimension    0.424 (0.407–0.440) 0.555 / 0.600 (0.536–0.578 / 0.579–0.622) 0.401 (0.384–0.419) 0.701 / 0.487 (0.681–0.723 / 0.469–0.508)
    Gaussian            Higuchi Fractal Dimension    0.357 (0.341–0.375) 0.597 / 0.699 (0.577–0.619 / 0.680–0.721) 0.462 (0.444–0.480) 0.574 / 0.499 (0.555–0.595 / 0.480–0.520)
    Gaussian               Katz Fractal Dimension    0.537 (0.521–0.556) 0.454 / 0.480 (0.437–0.473 / 0.461–0.500) 0.473 (0.457–0.490) 0.480 / 0.581 (0.461–0.545 / 0.561–0.603)
    Gaussian                       Sample Entropy    0.483 (0.466–0.500) 0.575 / 0.457 (0.557–0.597 / 0.438–0.478) 

## **Aggregated**



In [None]:
from xgboost import XGBClassifier

features_disease = np.load('Aggregated_unhealth.npy')
features_no_disease = np.load('Aggregated_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)


clf = XGBClassifier(
    booster="gbtree",
    objective="binary:logistic",
    eval_metric="logloss",
    n_estimators=100,
    learning_rate=0.1,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=1.0,
    random_state=42,
    n_jobs=-1,
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


y_pred = clf.predict(X_test)

In [7]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                              Feature Normalization Accuracy                 Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian                  Approximate Entropy    0.664 (0.635–0.693) 0.45 / 0.26 (0.401–0.499 / 0.225–0.295) 0.647 (0.617–0.677)   0.48 / 0.27 (0.431–0.529 / 0.234–0.306)
    Gaussian          Detrended Fractal Dimension    0.526 (0.495–0.557) 0.65 / 0.36 (0.603–0.697 / 0.322–0.398) 0.557 (0.526–0.588)   0.58 / 0.36 (0.532–0.628 / 0.322–0.398)
    Gaussian            Higuchi Fractal Dimension    0.669 (0.640–0.698) 0.40 / 0.29 (0.352–0.448 / 0.254–0.326) 0.554 (0.523–0.585)   0.53 / 0.39 (0.481–0.579 / 0.351–0.429)
    Gaussian               Katz Fractal Dimension    0.652 (0.622–0.682) 0.45 / 0.28 (0.401–0.499 / 0.244–0.316) 0.531 (0.500–0.562)   0.50 / 0.45 (0.451–0.549 / 0.410–0.490)
    Gaussian                       Sample Entropy    0.689 (0.660–0.718) 0.43 / 0.24 (0.381–0.479 / 0.206–0.274) 0.724 (0.696

# **Model for Random Forest**

In [None]:
from xgboost import XGBClassifier

features_disease = np.load('Inhouse_unhealth.npy')
features_no_disease = np.load('Inhouse_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)


from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(
    n_estimators=100,
    max_depth=12,
    max_features=1.0,
    bootstrap=True,
    n_jobs=-1,
    random_state=42
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

## **Inhouse**

In [8]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                              Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian                  Approximate Entropy    0.474 (0.446–0.502) 0.633 / 0.420 (0.586–0.680 / 0.372–0.468) 0.893 (0.876–0.910) 0.130 / 0.085 (0.097–0.163 / 0.058–0.112)
    Gaussian          Detrended Fractal Dimension    0.613 (0.585–0.641) 0.465 / 0.310 (0.416–0.514 / 0.265–0.355) 0.559 (0.531–0.587) 0.528 / 0.353 (0.479–0.577 / 0.306–0.400)
    Gaussian            Higuchi Fractal Dimension    0.494 (0.466–0.522) 0.608 / 0.405 (0.560–0.656 / 0.357–0.453) 0.545 (0.517–0.573) 0.545 / 0.365 (0.496–0.594 / 0.318–0.412)
    Gaussian               Katz Fractal Dimension    0.483 (0.455–0.511) 0.620 / 0.415 (0.573–0.667 / 0.367–0.463) 0.576 (0.548–0.604) 0.508 / 0.340 (0.459–0.557 / 0.293–0.387)
    Gaussian                       Sample Entropy    0.536 (0.508–0.564) 0.558 / 0.370 (0.510–0.606 / 0.322–0.418) 

## **PTBXL**

In [None]:

features_disease = np.load('PTBXL_unhealth.npy')
features_no_disease = np.load('PTBXL_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)


from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(
    n_estimators=100,
    max_depth=12,
    max_features=1.0,
    bootstrap=True,
    n_jobs=-1,
    random_state=42
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [9]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                              Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian                  Approximate Entropy    0.644 (0.616–0.672) 0.700 / 0.298 (0.655–0.745 / 0.253–0.343) 0.683 (0.653–0.713) 0.383 / 0.253 (0.339–0.427 / 0.209–0.297)
    Gaussian          Detrended Fractal Dimension    0.687 (0.659–0.715) 0.762 / 0.347 (0.721–0.803 / 0.309–0.385) 0.672 (0.643–0.701) 0.625 / 0.347 (0.578–0.672 / 0.309–0.385)
    Gaussian            Higuchi Fractal Dimension    0.618 (0.590–0.646) 0.468 / 0.312 (0.430–0.506 / 0.275–0.349) 0.638 (0.610–0.666) 0.445 / 0.295 (0.401–0.489 / 0.260–0.330)
    Gaussian               Katz Fractal Dimension    0.653 (0.625–0.681) 0.407 / 0.273 (0.369–0.445 / 0.229–0.317) 0.663 (0.635–0.691) 0.395 / 0.265 (0.351–0.439 / 0.223–0.307)
    Gaussian                       Sample Entropy    0.596 (0.568–0.624) 0.492 / 0.328 (0.453–0.531 / 0.288–0.368) 

## **Aggregated**

In [None]:

features_disease = np.load('Aggregated_unhealth.npy')
features_no_disease = np.load('Aggregated_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)


from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(
    n_estimators=100,
    max_depth=12,
    max_features=1.0,
    bootstrap=True,
    n_jobs=-1,
    random_state=42
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [10]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                     Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian         Approximate Entropy    0.541 (0.516–0.566) 0.700 / 0.298 (0.655–0.745 / 0.262–0.335) 0.778 (0.748–0.808) 0.333 / 0.148 (0.286–0.379 / 0.120–0.177)
    Gaussian Detrended Fractal Dimension    0.492 (0.456–0.528) 0.763 / 0.338 (0.719–0.807 / 0.300–0.376) 0.583 (0.547–0.619) 0.625 / 0.278 (0.578–0.672 / 0.242–0.313)
    Gaussian   Higuchi Fractal Dimension    0.693 (0.658–0.728) 0.450 / 0.205 (0.401–0.499 / 0.173–0.237) 0.665 (0.630–0.700) 0.502 / 0.223 (0.460–0.544 / 0.191–0.255)
    Gaussian      Katz Fractal Dimension    0.598 (0.564–0.632) 0.603 / 0.268 (0.555–0.650 / 0.233–0.304) 0.697 (0.663–0.731) 0.455 / 0.202 (0.413–0.497 / 0.170–0.234)
    Gaussian              Sample Entropy    0.691 (0.656–0.726) 0.525 / 0.165 (0.476–0.574 / 0.135–0.195) 0.792 (0.757–0.827) 0.325 / 0.165 (0.279–0.371 / 0.135

# **Model for SVM**

In [None]:
from xgboost import XGBClassifier

features_disease = np.load('Inhouse_unhealth.npy')
features_no_disease = np.load('Inhouse_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)

from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

clf = make_pipeline(
    StandardScaler(),
    SVC(
        kernel="linear",
        C=1.0,
        probability=False,
        class_weight=None,
        random_state=42
    )
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


# **Inhouse**

In [11]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                              Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian                  Approximate Entropy    0.496 (0.461–0.531) 0.347 / 0.661 (0.300–0.394 / 0.615–0.707) 0.778 (0.749–0.807) 0.319 / 0.125 (0.273–0.365 / 0.093–0.157)
    Gaussian          Detrended Fractal Dimension    0.457 (0.423–0.492) 0.707 / 0.379 (0.662–0.752 / 0.331–0.427) 0.563 (0.529–0.597) 0.603 / 0.271 (0.555–0.651 / 0.227–0.315)
    Gaussian            Higuchi Fractal Dimension    0.431 (0.397–0.465) 0.517 / 0.621 (0.468–0.566 / 0.573–0.669) 0.516 (0.481–0.551) 0.623 / 0.345 (0.575–0.671 / 0.298–0.392)
    Gaussian               Katz Fractal Dimension    0.464 (0.429–0.499) 0.613 / 0.459 (0.565–0.661 / 0.410–0.508) 0.566 (0.532–0.600) 0.547 / 0.321 (0.498–0.596 / 0.275–0.367)
    Gaussian                       Sample Entropy    0.499 (0.464–0.534) 0.298 / 0.704 (0.253–0.343 / 0.659–0.749) 

## **PTBXL**

In [None]:
from xgboost import XGBClassifier

features_disease = np.load('PTBXL_unhealth.npy')
features_no_disease = np.load('PTBXL_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)

from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

clf = make_pipeline(
    StandardScaler(),
    SVC(
        kernel="linear",
        C=1.0,
        probability=False,
        class_weight=None,
        random_state=42
    )
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


In [12]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                              Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian                  Approximate Entropy    0.619 (0.585–0.653) 0.489 / 0.273 (0.440–0.538 / 0.229–0.317) 0.662 (0.629–0.695) 0.389 / 0.287 (0.341–0.437 / 0.243–0.331)
    Gaussian          Detrended Fractal Dimension    0.653 (0.620–0.686) 0.372 / 0.322 (0.325–0.419 / 0.276–0.368) 0.637 (0.604–0.670) 0.397 / 0.329 (0.349–0.445 / 0.283–0.375)
    Gaussian            Higuchi Fractal Dimension    0.589 (0.555–0.623) 0.513 / 0.309 (0.464–0.562 / 0.264–0.354) 0.585 (0.551–0.619) 0.487 / 0.342 (0.438–0.536 / 0.296–0.389)
    Gaussian               Katz Fractal Dimension    0.572 (0.538–0.606) 0.473 / 0.383 (0.424–0.522 / 0.335–0.431) 0.619 (0.585–0.653) 0.482 / 0.299 (0.433–0.531 / 0.254–0.344)
    Gaussian                       Sample Entropy    0.556 (0.521–0.591) 0.521 / 0.367 (0.472–0.570 / 0.320–0.414) 

## **Aggregated**

In [None]:
from xgboost import XGBClassifier

features_disease = np.load('Aggregated_unhealth.npy')
features_no_disease = np.load('Aggregated_health.npy')

random_indices_disease = np.random.choice(features_disease.shape[0], 1000, replace=False)
random_indices_no_disease = np.random.choice(features_no_disease.shape[0], 1000, replace=False)

features_disease_random = features_disease[random_indices_disease]
features_no_disease_random = features_no_disease[random_indices_no_disease]

features_no_disease_random = features_no_disease_random[0:features_disease_random.shape[0]]

labels_disease = np.ones(features_disease_random.shape[0])
labels_no_disease = np.zeros(features_no_disease_random.shape[0])

features = np.concatenate([features_disease_random, features_no_disease_random], axis=0)
labels = np.concatenate([labels_disease, labels_no_disease], axis=0)



plt.hist(features_disease, bins=100, alpha=0.5, label='disease')
plt.hist(features_no_disease, bins=100, alpha=0.5, label='no disease')
plt.show()

data = pd.DataFrame(features)
data['label'] = labels

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1].values, data.iloc[:, -1].values, random_state=42, stratify=data.iloc[:, -1].values, test_size=0.2)

from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

clf = make_pipeline(
    StandardScaler(),
    SVC(
        kernel="linear",
        C=1.0,
        probability=False,
        class_weight=None,
        random_state=42
    )
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


In [13]:
np.set_printoptions(suppress=True)

def _metrics_from_cm(cm):
    tn, fp, fn, tp = cm.ravel()
    acc = (tp + tn) / cm.sum()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else np.nan
    fnr = fn / (fn + tp) if (fn + tp) > 0 else np.nan
    return acc, fpr, fnr

def _bootstrap_ci(model, X, y, n_boot=500, seed=2025):
    rng = np.random.RandomState(seed)
    y_hat_all = model.predict(X)
    accs, fprs, fnrs = [], [], []
    for _ in range(n_boot):
        idx = rng.choice(len(y), len(y), replace=True)
        cm = confusion_matrix(y[idx], y_hat_all[idx], labels=[0,1])
        acc, fpr, fnr = _metrics_from_cm(cm)
        accs.append(acc); fprs.append(fpr); fnrs.append(fnr)
    def ci(arr):
        arr = np.asarray(arr)
        lo, hi = np.nanpercentile(arr, [2.5, 97.5])
        return float(np.nanmean(arr)), float(lo), float(hi)
    return ci(accs), ci(fprs), ci(fnrs)

def _fmt_triplet(t):
    mean, lo, hi = t
    return f"{mean:.3f} ({lo:.3f}–{hi:.3f})"

def evaluate_pair(name_dist, name_feat, X_tr, y_tr, X_te, y_te,
                  model_norm, model_fusion):
    model_norm.fit(X_tr, y_tr)
    cm_norm = confusion_matrix(y_te, model_norm.predict(X_te), labels=[0,1])
    acc_n, fpr_n, fnr_n = _metrics_from_cm(cm_norm)
    (acc_n_m, acc_n_lo, acc_n_hi), (fpr_n_m, fpr_n_lo, fpr_n_hi), (fnr_n_m, fnr_n_lo, fnr_n_hi) = _bootstrap_ci(model_norm, X_te, y_te)

    model_fusion.fit(X_tr, y_tr)
    cm_fu = confusion_matrix(y_te, model_fusion.predict(X_te), labels=[0,1])
    acc_f, fpr_f, fnr_f = _metrics_from_cm(cm_fu)
    (acc_f_m, acc_f_lo, acc_f_hi), (fpr_f_m, fpr_f_lo, fpr_f_hi), (fnr_f_m, fnr_f_lo, fnr_f_hi) = _bootstrap_ci(model_fusion, X_te, y_te)

    row = {
        "Distribution": name_dist,
        "Feature": name_feat,
        "Normalization Accuracy": f"{acc_n_m:.3f} ({acc_n_lo:.3f}–{acc_n_hi:.3f})",
        "Normalization FPR / FNR": f"{fpr_n_m:.3f} / {fnr_n_m:.3f} ({fpr_n_lo:.3f}–{fpr_n_hi:.3f} / {fnr_n_lo:.3f}–{fnr_n_hi:.3f})",
        "Fusion Accuracy": f"{acc_f_m:.3f} ({acc_f_lo:.3f}–{acc_f_hi:.3f})",
        "Fusion FPR / FNR": f"{fpr_f_m:.3f} / {fnr_f_m:.3f} ({fpr_f_lo:.3f}–{fpr_f_hi:.3f} / {fnr_f_lo:.3f}–{fnr_f_hi:.3f})",
    }
    return row


feature_groups = {
    "Gaussian": {
        "Approximate Entropy": slice(0, 1),
        "Detrended Fractal Dimension": slice(1, 2),
        "Higuchi Fractal Dimension": slice(2, 3),
        "Katz Fractal Dimension": slice(3, 4),
        "Sample Entropy": slice(4, 5),
        "Spectral Entropy": slice(5, 6),
    },
    "Non-Gaussian": {
        "Petrosian Fractal Dimension": slice(6, 7),
        "Permutation Entropy": slice(7, 8),
        "Singular Value Decomposition Entropy": slice(8, 9),
    },
    "Mixed": {
        "Combined": slice(None),
    },
}

rows = []
for dist_name, groups in feature_groups.items():
    for feat_name, cols in groups.items():
        Xtr = X_train[:, cols]
        Xte = X_test[:, cols]
        norm_model = make_pipeline(StandardScaler(), clone(clf))
        fusion_model = clone(clf)
        rows.append(
            evaluate_pair(dist_name, feat_name, Xtr, y_train, Xte, y_test,
                          norm_model, fusion_model)
        )

df = pd.DataFrame(rows, columns=[
    "Distribution","Feature",
    "Normalization Accuracy","Normalization FPR / FNR",
    "Fusion Accuracy","Fusion FPR / FNR"
])


Distribution                     Feature Normalization Accuracy                   Normalization FPR / FNR     Fusion Accuracy                          Fusion FPR / FNR
    Gaussian         Approximate Entropy    0.562 (0.528–0.596) 0.365 / 0.487 (0.318–0.412 / 0.447–0.527) 0.763 (0.737–0.789) 0.283 / 0.207 (0.239–0.327 / 0.175–0.239)
    Gaussian Detrended Fractal Dimension    0.518 (0.487–0.549) 0.542 / 0.442 (0.493–0.590 / 0.402–0.482) 0.571 (0.540–0.602) 0.336 / 0.491 (0.290–0.382 / 0.451–0.531)
    Gaussian   Higuchi Fractal Dimension    0.667 (0.638–0.696) 0.525 / 0.205 (0.476–0.574 / 0.173–0.237) 0.674 (0.645–0.703) 0.489 / 0.217 (0.440–0.538 / 0.184–0.250)
    Gaussian      Katz Fractal Dimension    0.564 (0.533–0.595) 0.481 / 0.406 (0.432–0.530 / 0.367–0.445) 0.619 (0.589–0.649) 0.421 / 0.354 (0.373–0.469 / 0.316–0.392)
    Gaussian              Sample Entropy    0.679 (0.650–0.708) 0.452 / 0.234 (0.403–0.501 / 0.200–0.268) 0.747 (0.720–0.774) 0.355 / 0.205 (0.279–0.371 / 0.173