In [1]:
import wandb
from src.data import DataModule
from src.config import radiomics_folder, lesion_level_labels_csv


wandb.init()
wandb.config.roi_selection_method = "crop"
wandb.config.aggregation_function = "min"
wandb.config.roi_size = 150
wandb.config.optimizer = "adamw"
wandb.config.weight_decay = 0.00001
wandb.config.model = "SEResNet50"
wandb.config.dropout = 0.07292136035956572
wandb.config.momentum = 0
wandb.config.pretrained = False
wandb.config.learning_rate_max = 0.000023059510738335888
wandb.config.sampler = "label_organ_stratified"
wandb.config.dim = 2
wandb.config.size = 128 if wandb.config.dim == 3 else 256
wandb.config.test_center = None  # "amphia"
wandb.config.lesion_target = "lesion_response"
wandb.config.patient_target = "response"
wandb.config.max_batch_size = 6 if wandb.config.dim == 3 else 32
wandb.config.seed = 0
wandb.config.max_epochs = 100
wandb.config.patience = 10
wandb.config.lr_min = 1e-7
wandb.config.T_0 = 10

dm = DataModule(
    radiomics_folder,
    lesion_level_labels_csv,
    wandb.config,
)

2023-01-17 10:55:03,569 - Created a temporary directory at /tmp/tmp1j4y1_kv
2023-01-17 10:55:03,571 - Writing /tmp/tmp1j4y1_kv/_remote_module_non_scriptable.py
2023-01-17 10:55:03,793 - Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mrenstermaat[0m ([33mpremium[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
dm.setup()

In [23]:
from pathlib import Path
import numpy as np

dl = dm.train_dataloader()

batches = []
for batch in dl:
    batches.append(np.stack([
        batch['label'].numpy(),
        batch['organ'],
        [Path(f).name for f in batch['img_meta_dict']['filename_or_obj']]
    ], axis=1))

Loading dataset: 100%|██████████| 1359/1359 [00:00<00:00, 1072245.89it/s]


In [34]:
batches

[array([['0.0', 'adrenal', 'PREM_IS_097_0.nii.gz'],
        ['0.0', 'bone', 'PREM_VU_101abdomen_0.nii.gz'],
        ['0.0', 'liver', 'PREM_IS_124_0.nii.gz'],
        ['0.0', 'liver', 'IM_075_2.nii.gz'],
        ['0.0', 'liver', 'PREM_VU_110_4.nii.gz'],
        ['0.0', 'lung', 'PREM_IS_120_3.nii.gz'],
        ['0.0', 'lung', 'MAX_260_3.nii.gz'],
        ['0.0', 'lung', 'PREM_VU_128_2.nii.gz'],
        ['0.0', 'lymph', 'PREM_IS_066_0.nii.gz'],
        ['0.0', 'lymph', 'PREM_LU_768_4.nii.gz'],
        ['0.0', 'lymph', 'PREM_MS_236_3.nii.gz'],
        ['0.0', 'lymph', 'PREM_VU_077_1.nii.gz'],
        ['0.0', 'other', 'IM_042_3.nii.gz'],
        ['0.0', 'subcutis', 'PREM_ZU_003_4.nii.gz'],
        ['1.0', 'abdomen', 'PREM_VU_093_2.nii.gz'],
        ['1.0', 'bone', 'UNI_186_4.nii.gz'],
        ['1.0', 'liver', 'IM_062_4.nii.gz'],
        ['1.0', 'lung', 'PREM_AM_070_0.nii.gz'],
        ['1.0', 'lung', 'PREM_IS_060_1.nii.gz'],
        ['1.0', 'lung', 'PREM_LU_503_2.nii.gz'],
        ['1.0', '

In [32]:
possible = [case['img'].split('/')[-1] for case in dm.train_data]

In [20]:
batch['organ']

['adrenal',
 'bone',
 'liver',
 'liver',
 'liver',
 'lung',
 'lung',
 'lung',
 'lymph',
 'lymph',
 'lymph',
 'lymph',
 'other',
 'abdomen',
 'abdomen',
 'bone',
 'liver',
 'lung',
 'lung',
 'lung',
 'lung',
 'lung',
 'lung',
 'lung',
 'lymph',
 'lymph',
 'lymph',
 'lymph',
 'lymph',
 'other',
 'subcutis',
 'subcutis']

In [16]:

filenames = np.array([Path(f).name for f in batch['img_meta_dict']['filename_or_obj']])
filenames

['PREM_LU_516_3.nii.gz',
 'PREM_IS_140_1.nii.gz',
 'PREM_IS_067_2.nii.gz',
 'MAX_051_3.nii.gz',
 'PREM_VU_025_4.nii.gz',
 'PREM_AM_095_2.nii.gz',
 'PREM_LU_572_4.nii.gz',
 'UNI_067thorax_0.nii.gz',
 'PREM_AM_101_0.nii.gz',
 'PREM_LU_313_1.nii.gz',
 'MAX_269_3.nii.gz',
 'PREM_VU_025_0.nii.gz',
 'PREM_ZU_016_1.nii.gz',
 'PREM_IS_136_4.nii.gz',
 'IM_214abdomen_3.nii.gz',
 'PREM_VU_055abdomen_1.nii.gz',
 'PREM_LU_277_0.nii.gz',
 'PREM_VU_148abdomen_4.nii.gz',
 'PREM_IS_021_0.nii.gz',
 'PREM_LU_025_1.nii.gz',
 'MAX_201_1.nii.gz',
 'IM_101_0.nii.gz',
 'UNI_160_3.nii.gz',
 'PREM_VU_123_2.nii.gz',
 'PREM_ZU_025_3.nii.gz',
 'PREM_IS_060_3.nii.gz',
 'PREM_LU_521_4.nii.gz',
 'PREM_MS_223_3.nii.gz',
 'PREM_VU_095abdomen_2.nii.gz',
 'PREM_ZU_027_0.nii.gz',
 'PREM_AM_084_2.nii.gz',
 'PREM_VU_055abdomen_3.nii.gz']

In [10]:
x['img_meta_dict']['filename_or_obj']

['/mnt/c/Users/user/data/dl_radiomics/dim-2_size-256_method-crop_roi_size-150/maxima/MAX_029_2.nii.gz',
 '/mnt/c/Users/user/data/dl_radiomics/dim-2_size-256_method-crop_roi_size-150/maxima/MAX_089_0.nii.gz',
 '/mnt/c/Users/user/data/dl_radiomics/dim-2_size-256_method-crop_roi_size-150/isala/PREM_IS_078_4.nii.gz',
 '/mnt/c/Users/user/data/dl_radiomics/dim-2_size-256_method-crop_roi_size-150/maxima/MAX_198_0.nii.gz',
 '/mnt/c/Users/user/data/dl_radiomics/dim-2_size-256_method-crop_roi_size-150/vumc/PREM_VU_051_4.nii.gz',
 '/mnt/c/Users/user/data/dl_radiomics/dim-2_size-256_method-crop_roi_size-150/amphia/PREM_AM_121_4.nii.gz',
 '/mnt/c/Users/user/data/dl_radiomics/dim-2_size-256_method-crop_roi_size-150/maxima/MAX_017_2.nii.gz',
 '/mnt/c/Users/user/data/dl_radiomics/dim-2_size-256_method-crop_roi_size-150/umcu/UNI_083_3.nii.gz',
 '/mnt/c/Users/user/data/dl_radiomics/dim-2_size-256_method-crop_roi_size-150/amphia/PREM_AM_118_3.nii.gz',
 '/mnt/c/Users/user/data/dl_radiomics/dim-2_size-256_

In [46]:
import numpy as np

data = dm.val_data
groups = [(x['label'], x['organ']) for x in data]



In [47]:
groups = np.array(['_'.join([str(el) for el in x]) for x in groups])
unique_groups = np.unique(groups, axis=0)

In [48]:
group_indices = []
for unique_group in unique_groups:
    indices = np.where(groups == unique_group)[0]
    group_indices.append(indices)

In [41]:
np.concatenate(group_indices).reshape(15,-1)

ValueError: cannot reshape array of size 565 into shape (15,newaxis)

In [None]:
import numpy as np

labels = [(case['label'], case['patient']) for case in data]
np.unique(labels, axis=0)

In [22]:
import pandas as pd

df = pd.read_csv("/mnt/c/Users/user/data/tables/lesion_followup_curated_v4.csv", sep=';')

In [28]:


organs = []
for name in df['name'].tolist():
    name = name.lower()

    if 'lung' in name or 'pleur' in name:
        organ = 'lung'
    elif 'liver' in name:
        organ = 'liver'
    elif 'lymph' in name:
        organ = 'lymph'
    elif 'subcut' in name:
        organ = 'subcutis'
    elif 'adren' in name:
        organ = 'adrenal'
    elif 'abdomen' in name:
        organ = 'abdomen'
    elif 'bone' in name:
        organ = 'bone'
    elif 'spleen' in name:
        organ = 'spleen'
    else:
        organ = 'other'

    organs.append((name, organ))

In [35]:
organ_synonyms = {
    'lung':['lung','pleur','pulm'],
    'liver':['liver'],
    'lymph':['lymph', 'lymoh', 'inguin', 'supraclav'],
    'subcutis':['subcut'],
    'adrenal':['adren'],
    'abdomen':['abd', 'mesent', 'intest', 'perit','mesent','adbom'],
    'bone':['bone', 'os', 'th5', 'ileum'],
    'spleen':['spleen']
}

organs = []
for name in df['name'].tolist():
    name = name.lower()

    found = False
    for organ, synonyms in organ_synonyms.items():
        if any([synonym in name for synonym in synonyms]):
            organs.append(organ)
            found = True
            break

    if not found:
        organs.append('other')
        
df['organ'] = organs

In [38]:
df.drop(columns=['Unnamed: 0']).to_csv("/mnt/c/Users/user/data/tables/lesion_followup_curated_master.csv")

In [24]:
val_dl = dm.val_dataloader()

val_batches = []
for batch in val_dl:
    val_batches.append(
        list(zip(batch['patient'], batch['label'].numpy().tolist()))
    )

Loading dataset: 100%|██████████| 568/568 [00:00<00:00, 596485.90it/s]


In [43]:
from collections import defaultdict

def count_per_patient(batches):
    count = defaultdict(int)
    for batch in batches:
        for case in batch:
            count[case[0]] += 1

    assert max(list(count.values())) <= 5

def no_overlap(train, val):
    train_patients = set()
    for batch in train:
        for case in batch:
            train_patients.add(case[0])

    val_patients = set()
    for batch in val:
        for case in batch:
            val_patients.add(case[0])

    assert train_patients.isdisjoint(val_patients)

count_per_patient(batches)
no_overlap(batches, val_batches)