## General

This section of for imports, getting an overview of all experiments, and loading the most suitable model.

In [1]:
import sys
import os
import warnings
import logging
from absl import logging as absl_logging

# ✅ Ensure Correct TensorFlow Configuration
os.environ['TFDS_DATA_DIR'] = r"/data/newc6477/VAE/Single_Beat/5_percent_Physionet/"
os.environ['TF_ENABLE_ONEDNN_OPTS'] = "0"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"

# ✅ Ensure Correct Working Directory
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

print(f"📂 Project Root: {PROJECT_ROOT}")

# ✅ Ensure `src/` is in Python's Path
SRC_DIR = os.path.join(PROJECT_ROOT, "src")
if SRC_DIR not in sys.path:
    sys.path.append(SRC_DIR)

print(f"✅ Updated sys.path:\n{sys.path}")

# ✅ Suppress Warnings
warnings.filterwarnings('ignore')
warnings.simplefilter(action='ignore', category=FutureWarning)
absl_logging.set_verbosity(absl_logging.ERROR)

# ✅ Import Modules
try:
    from src.utils.helper import Helper
    from src.evaluate.visualizations import Visualizations
    print("✅ Successfully imported `Helper` and `Visualizations`")
except ModuleNotFoundError as e:
    print(f"❌ Import Error: {e}")
    print("🔍 Check if `src/` has `__init__.py` and is in `sys.path`.")

# ✅ Import Other Required Libraries
import tensorflow as tf
import pandas as pd
import numpy as np
import glob
from neurokit2.signal import signal_smooth

from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split

import ipywidgets as widgets
from matplotlib import pyplot as plt
import seaborn as sns



📂 Project Root: /users/newc6477/VAE/12_Lead_VECG
✅ Updated sys.path:
['/users/newc6477/Benchmark_ISIBrno/ENTER/envs/my_env/lib/python310.zip', '/users/newc6477/Benchmark_ISIBrno/ENTER/envs/my_env/lib/python3.10', '/users/newc6477/Benchmark_ISIBrno/ENTER/envs/my_env/lib/python3.10/lib-dynload', '', '/users/newc6477/Benchmark_ISIBrno/ENTER/envs/my_env/lib/python3.10/site-packages', '/users/newc6477/VAE/12_Lead_VECG', '/users/newc6477/VAE/12_Lead_VECG/src']


2025-04-04 16:59:58.863065: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-04 16:59:58.863292: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-04 16:59:58.865587: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


✅ Successfully imported `Helper` and `Visualizations`


In [2]:
# The resolution for saving images
DPI = 300

# The source path of the experiments and models
PATH = r"/users/newc6477/VAE/12_Lead_VECG/results/Hope/test_is_split1"

# Some operations take some time in computation.
# Therefore, the stored intermediate results can be used to skip the respective computation.
USE_PRECOMPUTED = True

# 2. Anomaly Detection

## 🔹 Funcs used to train and test

In [3]:
import json
from sklearn.multioutput import MultiOutputClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multioutput import MultiOutputClassifier
import numpy as np
import pandas as pd

def evaluate_models(df_physionet_train, df_physionet_test, hyperparams_list):
    """
    Evaluate a list of models using KNN classification and return a summary table.
    
    Args:
        df_physionet_train (List[pd.DataFrame]): List of train embedding DataFrames (1 per model).
        df_physionet_test (List[pd.DataFrame]): List of test embedding DataFrames (1 per model).
        hyperparams_list (List[dict]): Corresponding list of hyperparameter dictionaries.
    
    Returns:
        pd.DataFrame: Summary table with accuracy, F1 scores, hyperparameters.
    """
    results = []

    for i in range(len(df_physionet_train)):
        print(f"\n🔍 Evaluating model {i+1}/{len(df_physionet_train)}...")

        # Extract hyperparameters and latent dimension
        hparams = hyperparams_list[i]
        latent_dim = hparams.get("latent_dimension", 8)

        # Prepare feature matrices and labels
        X_train = df_physionet_train[i].iloc[:, :latent_dim].values  
        X_test = df_physionet_test[i].iloc[:, :latent_dim].values  
        y_train = np.array(df_physionet_train[i]['diagnostic'].tolist(), dtype=int)  
        y_test = np.array(df_physionet_test[i]['diagnostic'].tolist(), dtype=int)  

        # KNN classification with hyperparameter tuning
        knn = KNeighborsClassifier()
        multi_knn = MultiOutputClassifier(knn, n_jobs=-1)

        param_grid = {"estimator__n_neighbors": [3, 5, 7, 9, 11]}
        grid = GridSearchCV(multi_knn, param_grid, scoring="accuracy", cv=3, n_jobs=-1)
        grid.fit(X_train, y_train)

        best_model = grid.best_estimator_
        y_pred = best_model.predict(X_test)

        # Evaluation metrics
        overall_acc = accuracy_score(y_test, y_pred)
        f1_micro = f1_score(y_test, y_pred, average="micro")
        f1_macro = f1_score(y_test, y_pred, average="macro")
        accuracy_per_label = (y_pred == y_test).mean(axis=0)

        # Print evaluation
        print("✅ Overall Accuracy:", overall_acc)
        print("✅ F1 Score (Macro):", f1_macro)
        print("✅ F1 Score (Micro):", f1_micro)
        print("✅ Accuracy per label:", accuracy_per_label)

        # Classification report
        print("✅ Classification Report:\n")
        print(classification_report(
            y_test, y_pred,
            target_names=[str(i) for i in range(y_test.shape[1])]
        ))

        # Collect results
        results.append({
            "model_index": i,
            "best_k": grid.best_params_["estimator__n_neighbors"],
            "accuracy": overall_acc,
            "f1_micro": f1_micro,
            "f1_macro": f1_macro,
            **hparams
        })

    summary_df = pd.DataFrame(results)
    return summary_df


def extract_hyperparams_from_json(json_path):
    with open(json_path, 'r') as f:
        params = json.load(f)

    # Extract key hyperparameters
    hyperparams = {
        "alpha": params["coefficients"]["alpha"],
        "beta": params["coefficients"]["beta"],
        "gamma": params["coefficients"]["gamma"],
        "latent_dimension": params["latent_dimension"],
        "learning_rate": params["learning_rate"],
        "epochs": params["epochs"]
    }

    return hyperparams

from IPython.display import display
def summarize_by_latent_dimension(results_df):
    unique_dims = results_df["latent_dimension"].unique()
    summary_tables = {}

    for dim in sorted(unique_dims):
        filtered = results_df[results_df["latent_dimension"] == dim]
        sorted_table = filtered.sort_values(by="f1_macro", ascending=False)
        summary_tables[dim] = sorted_table

        print(f"\n📏 Latent Dimension: {dim}")
        display(sorted_table)

    return summary_tables


## 🔹 Lead I Evaluation

In [4]:
import os
import tensorflow as tf

BASE = os.path.join(PATH,'I')  # Set the correct path to your BASE directory
models = []
hyperparams_list = []

# Get all model folders in BASE directory
folders = [f for f in os.listdir(BASE) if os.path.isdir(os.path.join(BASE, f))]

for model in folders:
    model_path = os.path.join(BASE,model,'model_best.keras')
    hyperparams_path = os.path.join(BASE,model,'params.json')
    hyperparams = extract_hyperparams_from_json(hyperparams_path)
    hyperparams_list.append(hyperparams)
    if os.path.exists(model_path):
        print(f"Loading model from: {model_path}")
        model = tf.keras.models.load_model(model_path,compile=False)
        models.append(model)
    else:
        print(f"Warning: Model file not found at {model_path}")

print(f"Loaded {len(models)} models successfully.")

Loading model from: /users/newc6477/VAE/12_Lead_VECG/results/Hope/test_is_split1/I/2025-03-25_22-44-31/model_best.keras
Loading model from: /users/newc6477/VAE/12_Lead_VECG/results/Hope/test_is_split1/I/2025-03-25_22-58-08/model_best.keras
Loading model from: /users/newc6477/VAE/12_Lead_VECG/results/Hope/test_is_split1/I/2025-03-25_23-11-36/model_best.keras
Loading model from: /users/newc6477/VAE/12_Lead_VECG/results/Hope/test_is_split1/I/2025-03-25_23-25-03/model_best.keras
Loading model from: /users/newc6477/VAE/12_Lead_VECG/results/Hope/test_is_split1/I/2025-03-25_23-37-01/model_best.keras
Loading model from: /users/newc6477/VAE/12_Lead_VECG/results/Hope/test_is_split1/I/2025-03-25_23-46-05/model_best.keras
Loading model from: /users/newc6477/VAE/12_Lead_VECG/results/Hope/test_is_split1/I/2025-03-25_23-59-37/model_best.keras
Loading model from: /users/newc6477/VAE/12_Lead_VECG/results/Hope/test_is_split1/I/2025-03-26_00-13-04/model_best.keras
Loading model from: /users/newc6477/VAE/

In [5]:
train_splits = ['split2', 'split3', 'split4', 'split5']
dataset_config = {
    'name': ['physionet'],
    'split': train_splits,
    'shuffle_size': 1024,
    'batch_size': 1024,
}
test_splits = ['split1']
dataset_test = {
    'name': ['physionet'],
    'split': test_splits,
    'shuffle_size': 1024,
    'batch_size': 1024,
}

In [6]:
df_physionet_train, ld = Helper.get_embeddings_multiple_model(models, datasets=dataset_config, lead='I')
df_physionet_test, ld = Helper.get_embeddings_multiple_model(models, datasets=dataset_test, lead='I')

#df_physionet_train = df_physionet_train[0]
#df_physionet_test = df_physionet_test[0]
#print(type(df_physionet_train))



📦 Loading dataset 'physionet' with splits: ['split2', 'split3', 'split4', 'split5']
🔍 Processing lead: I
     43/Unknown - 2s 35ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 31ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 32ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 33ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 34ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 33ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 32ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 33ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 33ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 33ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 34ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 33ms/step✅ Generator exhausted normally.
     42/Unknown - 1s 31ms/step✅ Generator exhausted normally.
     43/Unknown - 1s 32ms/step✅ Generator exhausted normally.
     41/Unknown - 1s 33ms/

In [7]:

results_df = evaluate_models(df_physionet_train,df_physionet_test, hyperparams_list)


🔍 Evaluating model 1/18...


✅ Overall Accuracy: 0.2704626334519573
✅ F1 Score (Macro): 0.08984689221908834
✅ F1 Score (Micro): 0.23345764517847628
✅ Accuracy per label: [0.92686833 0.89519573 0.97437722 0.99928826 0.98505338 0.95542705
 0.97046263 0.98096085 0.93131673 0.97758007 0.99724199 0.98122776
 0.97989324 0.98816726 0.72758007 0.94741993 0.99297153 0.99119217
 0.98451957 0.97642349 0.9730427  0.96530249 0.88585409 0.87241993
 0.88763345 0.9725089 ]
✅ Classification Report:

              precision    recall  f1-score   support

           0       0.18      0.01      0.02       791
           1       0.42      0.05      0.09      1155
           2       0.00      0.00      0.00       272
           3       0.00      0.00      0.00         6
           4       0.21      0.13      0.16       124
           5       0.64      0.31      0.41       580
           6       0.07      0.00      0.01       319
           7       0.00      0.00      0.00       186
           8       0.12      0.01      0.01       740


In [8]:
results_df = results_df.drop(columns=["model_index"])

display(results_df.sort_values(by="f1_macro", ascending=False))


Unnamed: 0,best_k,accuracy,f1_micro,f1_macro,alpha,beta,gamma,latent_dimension,learning_rate,epochs
12,5,0.240302,0.263931,0.126812,0.05,0.02,0.005,16,0.002,100
6,7,0.274555,0.264767,0.109891,0.05,0.02,0.005,12,0.002,100
11,9,0.261477,0.254712,0.098732,0.3,1.2,0.3,12,0.002,100
14,7,0.240925,0.246673,0.096523,0.05,0.2,0.05,16,0.002,100
8,9,0.265925,0.241418,0.092228,0.05,0.2,0.05,12,0.002,100
0,11,0.270463,0.233458,0.089847,0.05,0.02,0.005,8,0.002,100
15,5,0.236833,0.239177,0.088478,0.1,0.4,0.1,16,0.002,100
5,11,0.240125,0.232246,0.083682,0.3,1.2,0.3,8,0.002,100
17,9,0.238968,0.228496,0.077251,0.3,1.2,0.3,16,0.002,100
10,11,0.262633,0.239958,0.074551,0.2,0.8,0.2,12,0.002,100
