In [5]:
import sys
sys.path.append("..")
from config import *
import nibabel as nib
from scipy import ndimage
import pandas as pd
from tqdm import tqdm

In [2]:
def analyze_patient(dataset_id, subject):
    dataset = AVAILABLE_DATASETS[dataset_id]
    
    # we load the segmentation untouched
    segm_fn = dataset.get(return_type="filename", subject=f"{subject:03d}", **CONTRASTS["SEGMENTATION"])[0]
    segm = nib.load(segm_fn).get_fdata()
    labeled_mask_segm, num_les_mask = ndimage.label(segm)
    labels_segm = list(np.unique(labeled_mask_segm)[1:])
    
    # we load the GT
    mask_fn = dataset.get(return_type="filename", subject=f"{subject:03d}", **CONTRASTS["MASK"])[0]
    mask = nib.load(mask_fn).get_fdata().astype(int)
    gt_ids = list(np.unique(mask)[1:])
    
    # we match them
    confluent_lesions = []
    for label in labels_segm:
        lesion_segm = (labeled_mask_segm == label).astype(int)
        #if np.sum(lesion_segm) > 20:
        #    lesion_segm = ndimage.morphology.binary_erosion(lesion_segm)
        coincident = np.unique(np.multiply(mask, lesion_segm))[1:]
        if len(coincident) > 1:
            for lesion in coincident:
                aux = list(coincident)
                aux.remove(lesion)
                confluent_lesions.append((dataset_id, subject, lesion, aux))
    
    return pd.DataFrame(confluent_lesions, columns=["database", "patient", "lesion", "confluences"])

def analyze_patients():
    return pd.concat([analyze_patient(db_id, int(pat)) for db_id in range(len(AVAILABLE_DATASETS)) for pat in tqdm(AVAILABLE_DATASETS[db_id].get_subjects())])

In [3]:
final_df = analyze_patients()
final_df["rim_pos_confluent"] = final_df["confluences"].apply(lambda conf: 1 in np.array(conf) // 1000)
final_df

100%|██████████| 69/69 [1:52:31<00:00, 97.85s/it]   
100%|██████████| 55/55 [1:04:11<00:00, 70.02s/it] 


Unnamed: 0,database,patient,lesion,confluences,rim_pos_confluent
0,0,60,2012,[2013],False
1,0,60,2013,[2012],False
2,0,60,1000,"[2025, 2029]",False
3,0,60,2025,"[1000, 2029]",True
4,0,60,2029,"[1000, 2025]",True
...,...,...,...,...,...
2,1,54,2021,"[1000, 2010]",True
3,1,54,2001,[2040],False
4,1,54,2040,[2001],False
5,1,54,2035,[2036],False


In [4]:
final_df.to_csv("/Users/german/Desktop/results_confluent.csv")

In [97]:

final_df[(final_df["confluences"].str.len() > 5) & (final_df["lesion"] // 1000 == 1)]

Unnamed: 0,database,patient,lesion,confluences,confluences_2,rim_pos_confluent
0,0,64,1001,"[1003, 1004, 1007, 1009, 1010, 2055, 2068, 207...","[1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]",True
1,0,64,1003,"[1001, 1004, 1007, 1009, 1010, 2055, 2068, 207...","[1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]",True
2,0,64,1004,"[1001, 1003, 1007, 1009, 1010, 2055, 2068, 207...","[1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]",True
3,0,64,1007,"[1001, 1003, 1004, 1009, 1010, 2055, 2068, 207...","[1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]",True
4,0,64,1009,"[1001, 1003, 1004, 1007, 1010, 2055, 2068, 207...","[1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]",True
...,...,...,...,...,...,...
31,1,51,1015,"[1001, 1002, 1007, 1008, 1011, 2002, 2007, 201...","[1, 1, 1, 1, 1, 2, 2, 2, 2]",True
36,1,51,1022,"[1024, 1031, 1032, 2014, 2015, 2022, 2052, 205...","[1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]",True
37,1,51,1024,"[1022, 1031, 1032, 2014, 2015, 2022, 2052, 205...","[1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]",True
38,1,51,1031,"[1022, 1024, 1032, 2014, 2015, 2022, 2052, 205...","[1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]",True


In [15]:
num_pos_conf = len(final_df[(final_df["lesion"] // 1000 == 1)].index)
num_neg_conf = len(final_df[(final_df["lesion"] // 2000 == 1)].index)

num_neg_conf

391

In [None]:
print(len(final_df[(final_df["rim_pos_confluent"]) & (final_df["lesion"] // 1000 == 1)].index))
print(len(final_df[final_df["lesion"] // 1000 == 1].index))

In [None]:
from utils import read_patients_metadata

def update_confluent_lesions(patch_size: np.array):
    meta = read_patients_metadata(patch_size)
    for db_id in range(len(AVAILABLE_DATASETS)):
        dataset = AVAILABLE_DATASETS[db_id]
        for pat in dataset.get_subjects():
            subject = int(pat)
    
            pipeline = DERIVATIVES["LESIONS"]["pipeline"].format(patch_size[0], patch_size[1], patch_size[2])
            # We retrieve the lesions of the patient
            json_lesions_path = dataset.get(return_type="filename", subject=f"{subject:03d}", scope=DERIVATIVES["LESIONS"]["pipeline"].format(patch_size[0], patch_size[1], patch_size[2]), extension='json')[0]
            
            if os.path.exists(json_lesions_path):
                # READ
                with open(json_lesions_path) as inp:
                    pat_metadata = json.load(inp)
                  
                # UPDATE
                for les_id in pat_metadata.keys():
                    # if in the dataframe => update
                    if int(les_id) in lesions[db_id][subject]:
                        pat_metadata[les_id]["ignore"] = to_ignore
                        pat_metadata[les_id]["reasons_to_ignore"] = reason
                        pat_metadata[les_id]["reasons_to_ignore_ext"] = comment
                    
                # SAVE
                os.remove(json_lesions_path)
                with open(json_lesions_path, "w") as outfile:
                    json.dump(pat_metadata, outfile)
                # tota la lògica aqui
                # IMPORTANT: modificar dataset_description amb la strategy usada per netejar
                # que cada cop que es runegi el cleaner, es faci desde zero
                # Arreglar tambe les deformed versions perque hi hagi una flag de cleaner,
                # perque nomes es generaran les deformed versions dels que han passat el cleaner.
                # TODO: modificar el reader afegint una flag permetre que nomes llegeixi les lesions que 
                # hagin passat el cleaner.
            else:         
                print(f"[ERROR] Patient {pat} skipped: no json file found.")

In [36]:

for label in labels_segm:
    lesion_segm = (labeled_mask_segm == label).astype(int)
    coincident = np.unique(np.multiply(mask, lesion_segm))[1:]
    #limit = 0
    #while len(coincident) == 0 and limit <= 5:
    #    lesion_segm = ndimage.morphology.binary_dilation(lesion_segm)
    #    coincident = np.unique(np.multiply(mask, lesion_segm))[1:]
    #    limit += 1
    print(coincident)



[2008]
[1000 2021]
[1003]
[2024]
[2017]
[2019]
[2005]
[2012]
[2022]
[2000]
[2014]
[2010]
[2006]
[2007]
[2013]
[1001 2004 2015]
[2011]
[2001]
[2020]
[2018]
[1002]
[2009]
[2002]
[2003]


In [None]:
rim_dic = {}
messages = []

# DEBUGGING
print(f"> CASE {i}")

rims_ids = np.unique(gt_rim)[1:]

for rim_id in tqdm(rims_ids):
    #one_lesion = np.zeros_like(labeled_mask)
    coincidences = list(np.unique(((gt_rim == rim_id).astype(int) * labeled_mask)))
    if 0 in coincidences:
        coincidences.remove(0)

    if len(coincidences) == 0:
        messages.append(f"[WARNING] Rim {rim_id} is not segmented.")
    elif len(coincidences) > 1:
        messages.append(f"[WARNING] Rim {rim_id} intersects with more than one independent lesion segmentations.")
    else:
        if coincidences[0] not in rim_dic.keys():
            rim_dic[coincidences[0]] = []
        rim_dic[coincidences[0]].append(rim_id)
    #mean = np.sum(aux) / np.sum((aux > 0).astype(int))
    #print(aux)
successful = True
for message in messages:
    print(message)
    successful = False
for key in rim_dic.keys():
    if len(rim_dic[key]) > 1:
        print(f"[WARNING] Rims {rim_dic[key]} are confluent.")
        successful = Fals