In [15]:
!python3 run_vqnsp_training.py \
    --log_dir ./log/vqnsp/ \
    --model vqnsp_encoder_base_decoder_3x200x12 \
    --codebook_n_emd 8192 \
    --codebook_emd_dim 64 \
    --quantize_kmeans_init \
    --batch_size 4 \
    --device cpu \
    --eval \
    --pretrained-weight checkpoints/vqnsp.pth \
    --dataset-config-path "configs/vqnsp/data_config.json"

Not using distributed mode
Namespace(batch_size=4, epochs=100, save_ckpt_freq=20, model='vqnsp_encoder_base_decoder_3x200x12', codebook_n_emd=8192, codebook_emd_dim=64, ema_decay=0.99, quantize_kmeans_init=True, input_size=1600, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, weight_decay=0.0001, weight_decay_end=None, lr=5e-05, warmup_lr=1e-06, min_lr=1e-05, warmup_epochs=5, warmup_steps=-1, output_dir='', log_dir='./log/vqnsp/', device='cpu', seed=0, resume='', auto_resume=True, dist_eval=True, disable_eval=False, eval=True, pretrained_weight='checkpoints/vqnsp.pth', calculate_codebook_usage=False, start_epoch=0, num_workers=10, pin_mem=True, dataset_config_path='configs/vqnsp/data_config.json', distributed=False, world_size=1, local_rank=-1, dist_on_itp=False, dist_url='env://')
{'pretrained_cfg': None, 'pretrained_cfg_overlay': None}
Final encoder config {'EEG_size': 1600, 'patch_size': 200, 'in_chans': 1, 'num_classes': 0, 'embed_dim': 200, 'depth': 12, 'num_heads': 10

In [1]:
import torch
from timm.models import create_model
import os
from pathlib import Path
import contextlib
from modeling_vqnsp import vqnsp_encoder_base_decoder_3x200x12
from data_processor.dataset_new import SingleEDFDataset


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_model(pretrained_weight_path, device='cuda'):
    model = create_model(
        'vqnsp_encoder_base_decoder_3x200x12',
        pretrained=True,
        pretrained_weight="checkpoints/vqnsp.pth",
        as_tokenzer=True,
        n_code=8192,
        code_dim=64,
        EEG_size=1600,
        decay=0.99,
        quantize_kmeans_init=True
    )
    model.to('cpu')
    model.eval()
    return model

In [3]:
def get_embeddings(model, file_path, device='cuda'):
    dataset = SingleEDFDataset(file_path, window_size=16.0, step_size=16.0, threshold_std=5, mask_percentage=0.1)
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, num_workers=1, pin_memory=True)

    embeddings = []
    with torch.no_grad():
        for data in data_loader:
            data = data.to('cpu').float()
            x = rearrange(x, 'B N (A T) -> B N A T', T=200)
            quantize, embed_ind, emb_loss = model.encode(data, input_chans=dataset.get_ch_names())
            embeddings.append(quantize.cpu())

    return torch.cat(embeddings, dim=0)

In [4]:
def compare_embeddings(embedding1, embedding2):
    embedding1_mean = embedding1.mean(dim=0)
    embedding2_mean = embedding2.mean(dim=0)
    
    embedding1_mean_flat = embedding1_mean.view(-1)
    embedding2_mean_flat = embedding2_mean.view(-1)
    
    cosine_similarity = torch.nn.functional.cosine_similarity(embedding1_mean_flat, embedding2_mean_flat, dim=0).item()
    euclidean_distance = torch.nn.functional.pairwise_distance(embedding1_mean_flat.unsqueeze(0), embedding2_mean_flat.unsqueeze(0)).item()
    
    return cosine_similarity, euclidean_distance

In [8]:
dir_path = "../../DiagnosisDataset/DiagnosisDataset/"
project_path = "/home/dmedvedeva/denis/LaBraM/"
pretrained_weight_path = project_path + 'checkpoints/vqnsp.pth'
device = 'cuda'

In [9]:
def compare_eeg_embeds(file1, file2):
    model = load_model(pretrained_weight_path, device)
    file1 = os.path.join(dir_path, file1) 
    file2 = os.path.join(dir_path, file2)
    
    with open(os.devnull, 'w') as fnull:
        with contextlib.redirect_stdout(fnull), contextlib.redirect_stderr(fnull):
            embedding1 = get_embeddings(model, file1, device)
            embedding2 = get_embeddings(model, file2, device)

    cosine_similarity, euclidean_distance = compare_embeddings(embedding1, embedding2)

    print(f"\nCosine similarity: {cosine_similarity}")
    print(f"Euclidean distance: {euclidean_distance}")

In [10]:
print("---------------------------------------------------------------------")
compare_eeg_embeds("D0000026.edf", "D0000062.edf") # здоровая+молодая+ж и нездоровый+немолодой+м
print("Should be different.")
print("---------------------------------------------------------------------")

print("---------------------------------------------------------------------")
compare_eeg_embeds("D0000097.edf", "D0000178.edf") # оба здоровые, молодые м
print("Should be similar.")
print("---------------------------------------------------------------------")

---------------------------------------------------------------------
{'pretrained_cfg': None, 'pretrained_cfg_overlay': None}
Final encoder config {'EEG_size': 1600, 'patch_size': 200, 'in_chans': 1, 'num_classes': 0, 'embed_dim': 200, 'depth': 12, 'num_heads': 10, 'mlp_ratio': 4.0, 'qkv_bias': True, 'qk_scale': None, 'drop_rate': 0.0, 'attn_drop_rate': 0.0, 'drop_path_rate': 0.0, 'norm_layer': functools.partial(<class 'torch.nn.modules.normalization.LayerNorm'>, eps=1e-06), 'init_values': 0.0, 'use_abs_pos_emb': True, 'use_rel_pos_bias': False, 'use_shared_rel_pos_bias': False, 'use_mean_pooling': True, 'init_scale': 0.001}
Final decoder config {'EEG_size': 8, 'patch_size': 1, 'in_chans': 64, 'num_classes': 0, 'embed_dim': 200, 'depth': 3, 'num_heads': 10, 'mlp_ratio': 4.0, 'qkv_bias': True, 'qk_scale': None, 'drop_rate': 0.0, 'attn_drop_rate': 0.0, 'drop_path_rate': 0.0, 'norm_layer': functools.partial(<class 'torch.nn.modules.normalization.LayerNorm'>, eps=1e-06), 'init_values': 0.

RuntimeError: torch.cat(): expected a non-empty list of Tensors