In [None]:
import os 
import numpy as np
import pandas as pd

from huggingface_hub import snapshot_download

# Define hugging face token
hugging_face_token = "" # Set your Hugging Face token here

# Download SSL Pretrained Model

In [24]:
ssl_pretrained_model = snapshot_download(
    repo_id="heartwise/ssl_pretrained_model", 
    local_dir="ssl_pretrained_model", 
    repo_type="model", 
    token=hugging_face_token
)
print(f"SSL Pretrained Model downloaded to {ssl_pretrained_model}") 


Fetching 3 files: 100%|██████████| 3/3 [00:26<00:00,  8.69s/it]

SSL Pretrained Model downloaded to /home/jdelfrates/DeepECG_Docker/notebooks/ssl_pretrained_model





# Prepare Finetuning Dataset

In [4]:
# Download ptb-xl lite data from Hugging Face
dataset_dir = snapshot_download(
    repo_id="heartwise/PTB-XL_lite", 
    local_dir="ptb-xl_lite", 
    repo_type="dataset", 
    token=hugging_face_token
)

print(f"Dataset downloaded to {dataset_dir}") 

Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 25090.55it/s]

Dataset downloaded to /home/jdelfrates/DeepECG_Docker/notebooks/ptb-xl_lite





##### For binary classification Fairseq-signals expect a csv file with the path to the data and the labels

In [49]:
x_train_aray = np.load(dataset_dir + '/ptb_xl_afib_2y_train_subset.npy', allow_pickle=False)
print(x_train_aray.shape)

list_ecg_path = []
ecg_signals_path = dataset_dir + '/ecg_files'
os.makedirs(ecg_signals_path, exist_ok=True)
for i in range(x_train_aray.shape[0]):
    ecg_file_path = ecg_signals_path + f'/ecg_{i}.npy'
    list_ecg_path.append(ecg_file_path)
    np.save(ecg_file_path, x_train_aray[i], allow_pickle=False)
    
y_train_labels = pd.read_parquet(dataset_dir + '/ptb_xl_afib_2y_train_subset_labels.parquet')
labels = y_train_labels['label_2y'].to_numpy()
print(labels.shape)

# Save labels to a csv file
labels_df = pd.DataFrame({'npy_path': list_ecg_path, 'afib_2y': labels})
labels_df.to_csv(dataset_dir + '/afib_2y_labels.csv', index=False)

(2000, 2500, 12)
(2000,)


##### Generate SSL manifests

In [50]:
# Set up dataset_dir/afib_2y_labels.csv path 
# to SSL manifests $(pwd)/ssl_manifests/finetuning/afib_2y
!ls -l $(pwd)/ptb-xl_lite/afib_2y_labels.csv
!ls -l $(pwd)/ssl_manifests/finetuning/afib_2y/train.tsv

-rw-rw-r--. 1 jdelfrates jdelfrates 156907 Jan 27 07:19 /home/jdelfrates/DeepECG_Docker/notebooks/ptb-xl_lite/afib_2y_labels.csv
-rw-r--r--. 1 jdelfrates jdelfrates 133 Jan 27 07:04 /home/jdelfrates/DeepECG_Docker/notebooks/ssl_manifests/finetuning/afib_2y/train.tsv


# Setup Project configuration

In [28]:
def generate_train_cli(
    devices=1,
    encoder='_last',
    task = 'afib_2y',
    num_labels = 77,
    mode='ft', # possible values are 'ft', 'le', 'e2e'
    is_df=True,
    cls='',
    wd=0,
    criterion='binary_cross_entropy_with_logits'
):    
    def loss_str():
        if criterion == 'asymmetric':
            return 'as'
        if criterion == 'binary_focal':
            return 'bf'
        if criterion == 'mse':
            return 'mse'
        if criterion == 'binary_cross_entropy_with_logits':
            return 'bce'
        if criterion == 'mlsml':
            return 'mlsml'
        assert False, 'Invalid error'
    
    cli = f'CUDA_VISIBLE_DEVICES={devices} fairseq-hydra-train '
    if criterion == 'mse':
        cli += f'common.fp16=false '
    else:
        cli +=f'common.fp16=true '
    cli += f'task.data=$(pwd)/ssl_manifests/finetuning/{task} '
    if mode == 'e2e':
        cli += f'model.no_pretrained_weights=true '
        encoder = '_e2e'
    else:
        cli += f'model.model_path=$(pwd)/ssl_pretrained_model/SSL_pretrained.pt '
    if cls == 'attn':
        cli += f'model._name=ecg_transformer_attn_classifier '
    if wd:
        cli += f'optimizer.weight_decay={wd} '
    if mode == 'le':
        cli += f'model.linear_evaluation=true '
    
    if is_df:
        cli += f'+task.df_dataset=true '
    else:
        cli += f'+task.npy_dataset=true '

    cli += f'model.num_labels={num_labels} '
    cli += f'criterion._name={criterion} '
    if cls == 'attn':
        cls = '-attn'
    cli += f'checkpoint.save_dir=checkpoint{encoder}-{mode}-{task}-{loss_str()}{cls} '
    cli += '--config-dir ../fairseq-signals/examples/w2v_cmsc/config/finetuning/ecg_transformer --config-name diagnosis'

    return cli

In [29]:
# Generate train command for fairseq-sginals fine tuning
train_cli = generate_train_cli(
    is_df=True,
    num_labels=1,
    criterion='binary_cross_entropy_with_logits'
)
print(train_cli)

CUDA_VISIBLE_DEVICES=1 fairseq-hydra-train common.fp16=true task.data=$(pwd)/ssl_manifests/finetuning/afib_2y model.model_path=$(pwd)/ssl_pretrained_model/SSL_pretrained.pt +task.df_dataset=true model.num_labels=1 criterion._name=binary_cross_entropy_with_logits checkpoint.save_dir=checkpoint_last-ft-afib_2y-bce --config-dir ../fairseq-signals/examples/w2v_cmsc/config/finetuning/ecg_transformer --config-name diagnosis


In [30]:
os.system(train_cli)

[2025-01-27 11:30:42,983][fairseq_cli.train][INFO] - {'_name': None,
 'checkpoint': {'_name': None, 'save_dir': 'checkpoint_last-ft-afib_2y-bce', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 0, 'keep_interval_updates': -1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': 1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': False, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'loss', 'maximize_best_checkpoint_metric': False, 'patience': 30, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False},
 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 10, 'log_format': 'json', 'log_file': None, 'wandb_project': 'diagnosis-prediction', 'wandb_entity': None, 'seed': 1



[2025-01-27 11:30:44,580][fairseq_signals.models.transformer][INFO] - Loaded pre-trained model parameters from /home/jdelfrates/DeepECG_Docker/notebooks/ssl_pretrained_model/SSL_pretrained.pt
########## {'_name': 'ecg_transformer_classifier', 'all_gather': False, 'normalize': False, 'filter': False, 'data': '/home/jdelfrates/DeepECG_Docker/notebooks/ssl_manifests/finetuning/afib_2y', 'args': {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 25, 'log_format': 'json', 'log_file': None, 'wandb_project': 'wav2vec2-pretraining', 'wandb_entity': None, 'seed': 1, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'profile': False, 'reset_logging': False, 'suppress_crashes': False}, 'com

0

# Prepare Validation Dataset

In [31]:
x_train_aray = np.load(dataset_dir + '/ptb_xl_afib_2y_val_subset.npy', allow_pickle=False)
print(x_train_aray.shape)

list_ecg_path = []
ecg_signals_path = dataset_dir + '/ecg_files'
os.makedirs(ecg_signals_path, exist_ok=True)
for i in range(x_train_aray.shape[0]):
    ecg_file_path = ecg_signals_path + f'/ecg_{i}.npy'
    list_ecg_path.append(ecg_file_path)
    np.save(ecg_file_path, x_train_aray[i], allow_pickle=False)
    
y_train_labels = pd.read_parquet(dataset_dir + '/ptb_xl_afib_2y_val_subset_labels.parquet')
labels = y_train_labels['label_2y'].to_numpy()
print(labels.shape)

# Save labels to a csv file
labels_df = pd.DataFrame({'npy_path': list_ecg_path, 'afib_2y': labels})
labels_df.to_csv(dataset_dir + '/afib_2y_val_labels.csv', index=False)


(2000, 2500, 12)
(2000,)


In [32]:
def generate_test_cli(
    devices=1,
    checkpoint_type='_best',
    task='afib_2y',
    subset=None,
    eval_path='',
    is_df=True,
    num_labels=1
):   
    task_path = os.path.join(eval_path, f'checkpoint{checkpoint_type}.pt')
    cli = f'CUDA_VISIBLE_DEVICES={devices} fairseq-hydra-inference '
    cli += f'task.data=$(pwd)/ssl_manifests/finetuning/{task} '
    cli += f'common_eval.path={task_path} '
    cli += f'common_eval.results_path={eval_path} '
    if is_df:
        cli += f'task.df_dataset=true '
    else:
        cli += f'task.npy_dataset=true '
    cli += f'model.num_labels={num_labels} '
    if subset is not None:
        cli += f'dataset.valid_subset={subset} '
    cli += '--config-dir ../fairseq-signals/examples/w2v_cmsc/config/finetuning/ecg_transformer --config-name eval'
    return cli


In [33]:

testcli = generate_test_cli(
    devices=0,
    checkpoint_type='_best',
    eval_path='$(pwd)/outputs/2025-01-27/07-21-43/checkpoint_last-ft-afib_2y-bce',
    task='afib_2y',
    is_df=True,
    subset='test',
    num_labels=1
)

print(testcli)


CUDA_VISIBLE_DEVICES=0 fairseq-hydra-inference task.data=$(pwd)/ssl_manifests/finetuning/afib_2y common_eval.path=$(pwd)/outputs/2025-01-27/07-21-43/checkpoint_last-ft-afib_2y-bce/checkpoint_best.pt common_eval.results_path=$(pwd)/outputs/2025-01-27/07-21-43/checkpoint_last-ft-afib_2y-bce task.df_dataset=true model.num_labels=1 dataset.valid_subset=test --config-dir ../fairseq-signals/examples/w2v_cmsc/config/finetuning/ecg_transformer --config-name eval


In [34]:
os.system(testcli)

---------- {'task': {'data': '/home/jdelfrates/DeepECG_Docker/notebooks/ssl_manifests/finetuning/afib_2y', 'npy_dataset': False}, 'df_dataset': True} None {'_name': 'ecg_classification', 'data': '/home/jdelfrates/DeepECG_Docker/notebooks/ssl_manifests/finetuning/afib_2y', 'leads_to_load': None, 'leads_bucket': None, 'bucket_selection': 'uniform', 'sample_rate': None, 'filter': False, 'normalize': False, 'mean_path': None, 'std_path': None, 'enable_padding': True, 'enable_padding_leads': False, 'max_sample_size': None, 'min_sample_size': None, 'num_batch_buckets': 0, 'precompute_mask_indices': False, 'perturbation_mode': None, 'p': [1.0], 'max_amplitude': 0.1, 'min_amplitude': 0.0, 'dependency': True, 'shift_ratio': 0.2, 'num_segment': 1, 'max_freq': 0.2, 'min_freq': 0.01, 'k': 3, 'mask_leads_selection': 'random', 'mask_leads_prob': 0.5, 'mask_leads_condition': [4, 5], 'inferred_w2v_config': None, 'inferred_3kg_config': None, 'criterion_name': 'binary_cross_entropy_with_logits', 'model_



[2025-01-27 11:32:48,790][fairseq_signals.models.transformer][INFO] - Loaded pre-trained model parameters from /mnt/data1/achilsowa/results/fairseq/outputs/2024-09-22/03-16-32/checkpoints-all/checkpoint_last.pt
########## {'_name': 'ecg_transformer_classifier', 'all_gather': False, 'normalize': False, 'filter': False, 'data': '/home/jdelfrates/DeepECG_Docker/notebooks/ssl_manifests/finetuning/afib_2y', 'args': {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 25, 'log_format': 'json', 'log_file': None, 'wandb_project': 'wav2vec2-pretraining', 'wandb_entity': None, 'seed': 1, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'profile': False, 'reset_logging': False, 'suppress_cra

0

In [35]:
# Check the results
!ls -l $(pwd)/outputs/2025-01-27/07-21-43/checkpoint_last-ft-afib_2y-bce/outputs_test_header.pkl
!ls -l $(pwd)/outputs/2025-01-27/07-21-43/checkpoint_last-ft-afib_2y-bce/outputs_test.npy

-rw-rw-r--. 1 jdelfrates jdelfrates 49 Jan 27 11:32 /home/jdelfrates/DeepECG_Docker/notebooks/outputs/2025-01-27/07-21-43/checkpoint_last-ft-afib_2y-bce/outputs_test_header.pkl
-rw-rw-r--. 1 jdelfrates jdelfrates 4000 Jan 27 11:32 /home/jdelfrates/DeepECG_Docker/notebooks/outputs/2025-01-27/07-21-43/checkpoint_last-ft-afib_2y-bce/outputs_test.npy
