In [1]:
import os 
from omegaconf import OmegaConf
import pandas as pd
import numpy as np
import torch

torch.random.manual_seed(0)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
# Load decoder config
decoder_cfg = OmegaConf.load("../configs/config.yaml")

# Load encoder's training config
encoder_config_path = os.path.join(
    decoder_cfg.model_to_decode_path,
    ".hydra",
    "config.yaml"
)
encoder_cfg = OmegaConf.load(encoder_config_path)

# Override dataset_folder which conains a mistake
encoder_cfg["dataset_folder"] = decoder_cfg["dataset_folder"]

# Only resolve the dataset part to avoid errors in unrelated keys
region = list(encoder_cfg.dataset.keys())[0]
print("Region:", region)

dataset_info = OmegaConf.to_container(encoder_cfg.dataset[region], resolve=True)

# Access relevant dataset info

print("Numpy path:", dataset_info['numpy_all'])
print("Subjects path:", dataset_info['subjects_all'])
print("subject column name:", dataset_info['subject_column_name'])
print("Input size:", dataset_info['input_size'])

Region: CINGULATE_right
Numpy path: /neurospin/dico/data/deep_folding/current/datasets/UkBioBank40/crops/2mm/CINGULATE./mask/Rskeleton.npy
Subjects path: /neurospin/dico/data/deep_folding/current/datasets/UkBioBank40/crops/2mm/CINGULATE./mask/Rskeleton_subject.csv
subject column name: Subject
Input size: (1, 16, 37, 37)


In [3]:
skels = np.load(dataset_info['numpy_all'])
list_sub = pd.read_csv(dataset_info['subjects_all'])
print(region, skels.shape, len(list_sub))

CINGULATE_right (42433, 16, 37, 37, 1) 42433


In [4]:
rootpath = os.path.join(decoder_cfg["model_to_decode_path"])
train_path = os.path.join(rootpath, decoder_cfg["train_csv"])
val_test_path = os.path.join(rootpath, decoder_cfg["val_test_csv"])

In [5]:
train_data = pd.read_csv(train_path)
a = pd.read_csv(val_test_path)
a['IID'] = a['ID'].apply(lambda x : int(x[4:]))
val_data = a[a['IID']%2 ==0].drop('IID', axis=1)
test_data = a[a['IID']%2 ==1].drop('IID', axis=1)

#train_data = train_data.drop('ID', axis=1)
#val_data = val_data.drop('ID', axis=1)
#test_data = test_data.drop('ID', axis=1)

print(f"Train set size: {len(train_data)}") 
print(f"Validation set size: {len(val_data)}") 
print(f"Test set size: {len(test_data)}")

Train set size: 38190
Validation set size: 2092
Test set size: 2151


In [6]:
train_data.head(3)

Unnamed: 0,ID,dim1,dim2,dim3,dim4,dim5,dim6,dim7,dim8,dim9,...,dim23,dim24,dim25,dim26,dim27,dim28,dim29,dim30,dim31,dim32
0,sub-1000021,23.591688,-34.179787,-13.138007,-5.852693,-17.080248,2.921948,7.086022,11.879954,2.124793,...,-47.075867,34.400513,-16.015812,-8.731407,26.657213,-21.295017,7.269122,-29.090183,-18.034906,-2.400088
1,sub-1000325,43.94501,-39.637356,2.319227,-27.171787,62.515137,28.131823,3.678916,-7.92497,13.552408,...,30.171541,-38.58324,-11.15136,3.457533,-18.338762,-14.182381,10.836757,-8.103463,13.514843,-26.685957
2,sub-1000575,20.67688,-5.10188,8.129474,-18.579628,36.006264,-2.904237,-30.171307,-25.44316,-49.99496,...,-25.555567,12.490367,-9.33281,-8.427656,2.542956,-5.423203,33.596405,11.949966,36.58783,-6.631192


In [7]:
dataset_info['numpy_all']

'/neurospin/dico/data/deep_folding/current/datasets/UkBioBank40/crops/2mm/CINGULATE./mask/Rskeleton.npy'

In [8]:
from dataloader import LatentTargetDataset

sbu_list = ["sub-1003534", "sub-1000715", "sub-1006724", "sub-1004594", "sub-1002539" , "sub-1005312", ]
list_indi = [29, 5, 58, 36, 14, 45]

dataset = LatentTargetDataset(
    latent_csv_path=train_path,
    target_npy_path=dataset_info['numpy_all'],
    subjects_all_path=dataset_info['subjects_all'],
    subject_list=sbu_list
)

print(16*37*37)

# Test shape and values
for i in range(len(dataset)):
    latent, volume = dataset[i]
    print(f"Sample {i}")
    print(f"  Latent: {(latent==torch.tensor((train_data[train_data['ID']==sbu_list[i]].drop('ID', axis=1)).values, dtype=torch.float32)).sum()}")
    print(f"  Volume shape: {(volume == torch.tensor(skels[list_indi[i]],dtype=torch.float32).permute(3, 2, 1, 0)).sum()}")  # Should be [C, D, H, W]


21904
Sample 0
  Latent: 32
  Volume shape: 21904
Sample 1
  Latent: 32
  Volume shape: 21904
Sample 2
  Latent: 32
  Volume shape: 21904
Sample 3
  Latent: 32
  Volume shape: 21904
Sample 4
  Latent: 32
  Volume shape: 21904
Sample 5
  Latent: 32
  Volume shape: 21904


In [110]:
from dataloader import DataModule_Learning

# Load configs
decoder_cfg = OmegaConf.load("../configs/config.yaml")
encoder_config_path = os.path.join(decoder_cfg.model_to_decode_path, ".hydra", "config.yaml")
encoder_cfg = OmegaConf.load(encoder_config_path)
encoder_cfg["dataset_folder"] = decoder_cfg["dataset_folder"]
region = list(encoder_cfg.dataset.keys())[0]
dataset_info = OmegaConf.to_container(encoder_cfg.dataset[region], resolve=True)

# Instantiate and setup the datamodule
dm = DataModule_Learning(decoder_cfg, dataset_info)
dm.setup()

# Get the data loaders
train_loader = dm.train_dataloader()
val_loader = dm.val_dataloader()
test_loader = dm.test_dataloader()

# Example loop
for latent_vector, target_volume in train_loader:
    print(latent_vector.shape, target_volume.shape)
    break  # Just show one batch

torch.Size([32, 32]) torch.Size([32, 1, 16, 37, 37])
