In [1]:
from config import *

import json
import os
import numpy as np
from nibabel import load as load_nii
import nibabel as nib
from scipy import ndimage
from scipy.ndimage.morphology import generate_binary_structure as gbs
from scipy.ndimage.measurements import label
import matplotlib.pyplot as plt
from skimage.feature import hessian_matrix, hessian_matrix_eigvals
from tqdm import tqdm
%matplotlib inline
import sys
sys.path.append("..")
from config import *
from utils import generate_BIDS_path, get_dataframe_from_metadata, normalize_patch, load_lesions

from confluent_split import load_patient_split_lesions
import cv2
import pandas as pd

Loading configuration...
Configuration loaded successfully!
_____________________________



In [2]:
VERSION = "annotated_7T_rimpos"
MIN_VOLUME = 45

In [3]:
def create_folder_for_lesions(dataset_id, pat, der_folder, pipeline, session=1):
    # we create folder if it does not exist
    der_path = os.path.join(AVAILABLE_DATASETS_ROOTS[dataset_id], "derivatives", der_folder)
    if not os.path.exists(der_path):
        try:
            os.makedirs(der_path)
            print(f"[INFO] Derivatives folder for '{der_folder}' successfully created.")
        except: # Sometimes in multiprocessing this check is true for several processes and crashes
            pass
        
    # we create the description of the derivatives if it does not exist
    dataset_description_path = os.path.join(der_path, "dataset_description.json")
    if not os.path.exists(os.path.join(dataset_description_path)):
        descriptor = {
            "Name": der_folder,
            "BIDSVersion": BIDS_VERSION,
            "PipelineDescription": {
                "Name": pipeline,
                "version": VERSION,
            }
        }
        with open(dataset_description_path, "w") as outfile:
            json.dump(descriptor, outfile)
        print(f"[INFO] Description file for '{der_folder}' successfully created.")
    
    # we create the path for the generated file
    folder = os.path.join(der_path, f"sub-{pat:03d}", f"ses-{session:02d}")
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    return folder
    

In [4]:
def match_lesions_with_GT(dataset_id, pat, replace=False):
    folder_name, pipeline = SPLIT_LESIONS_METADATA[VERSION]["folder_name"], SPLIT_LESIONS_METADATA[VERSION]["pipeline"]
    where_to_save = create_folder_for_lesions(dataset_id, pat, folder_name, pipeline)
    
    # Loading of paths.
    dataset = AVAILABLE_DATASETS[dataset_id]
    rimpos_annotations_paths = dataset.get(return_type="filename", subject=f"{pat:03d}", **CONTRASTS["EXPERTS_ANNOTATIONS"])
    segmentations_paths = dataset.get(return_type="filename", subject=f"{pat:03d}", **CONTRASTS["SEGMENTATION"])
    meta_lesions_path = generate_BIDS_path(dataset_id, subject=f"{pat:03d}", scope=SPLIT_LESIONS_METADATA[VERSION]["pipeline"], suffix=SPLIT_LESIONS_METADATA[VERSION]["suffix"], acquisition=None, extension="csv")
    
    if os.path.exists(meta_lesions_path) and not replace:
        print(f"Patient {dataset_id}.{pat} skipped because metadata of split lesions for version {VERSION} already exists.")
        return
    
    if len(rimpos_annotations_paths) > 0:
        gt_lesions = nib.load(rimpos_annotations_paths[0]).get_fdata()
    else:
        print(f"[{dataset_id}-{pat}] Experts annotations not available.")
        return
    
    if len(segmentations_paths) > 0:
        seg_lesions = nib.load(segmentations_paths[0]).get_fdata()
    else:
        print(f"[{dataset_id}-{pat}] Segmentation not available.")
        return
    
    # where we will append all lesions
    result_data = []
    
    # RIM+ matching
    rimpos_centers = []

    labels, num_labels = ndimage.measurements.label(gt_lesions, structure = gbs(3,2))
    labels_GT = np.unique(labels)[1:]

    for lab in labels_GT:
        c = [int(el) for el in ndimage.measurements.center_of_mass(labels == lab)]
        vol = np.sum(labels == lab)
        result_data.append((dataset_id, pat, 1000 + lab, c[0], c[1], c[2], 100, vol, True))

    
    # RIM- matching
    labels, num_labels = ndimage.measurements.label(seg_lesions)
    labels_seg = np.unique(labels)[1:]
    
    counter = 0
    for lab in labels_seg:
        lesion_mask = labels == lab
        gt_match = (lesion_mask * gt_lesions != 0).any()
        if gt_match:
            continue # we already included it as Rim+
        # FILTER BY VOLUME??
        vol = np.sum(lesion_mask)
        if vol >= MIN_VOLUME:
            c = [int(el) for el in ndimage.measurements.center_of_mass(lesion_mask)]
            result_data.append((dataset_id, pat, 2000 + counter, c[0], c[1], c[2], "", "", True))
            counter += 1
    
    # *real* column is meant for those cases where data augmentation is applied in this phase => "False" to avoid using them in testing
    df = pd.DataFrame(result_data, columns=["dataset_id", "patient", "lesion", "x", "y", "z", "percentage_rims", "voxels_rims", "real"])
    df.to_csv(meta_lesions_path, index=False)
    print(f'{dataset_id}- Pat {pat:02d}: {len(df[df["lesion"] // 1000 == 1].index)}/{len(df[df["lesion"] // 2000 == 1].index)}')
    return df


In [5]:
# RUN
for pat in tqdm(DATASET_NIH7T.get_subjects()):
    df = match_lesions_with_GT(DATASET_NIH7T_ID, int(pat), replace=True)

2- Pat 20: 7/18


In [5]:
def read_all_patients_metadata():
    to_concat = []
    for dataset_id in range(len(AVAILABLE_DATASETS)):
        dataset = AVAILABLE_DATASETS[dataset_id]
        for pat in dataset.get_subjects():
            paths = dataset.get(return_type="filename", subject=f"{pat}", scope=SPLIT_LESIONS_METADATA[VERSION]["pipeline"], suffix=SPLIT_LESIONS_METADATA[VERSION]["suffix"], acquisition=None, extension="csv")
            if len(paths) == 1:
                to_concat.append(pd.read_csv(paths[0]))
                #pd.read_csv(paths[0])[["dataset_id", "patient", "lesion", "x", "y", "z", "percentage_rims", "voxels_rims", "real"]].to_csv(paths[0], index=False)

    df = pd.concat(to_concat)
    return df

In [29]:
df = read_all_patients_metadata()

In [37]:
for pat, grouped in df.groupby("patient"):
    print(f"{pat} - {len(grouped.index)}")

1 - 1
2 - 3
3 - 22
4 - 6
5 - 11
6 - 12
7 - 4
8 - 5
9 - 5
10 - 1
11 - 4
12 - 9
13 - 27
14 - 41
15 - 21
16 - 3
17 - 8
18 - 18
19 - 4
20 - 7


### Thresholding of PMaps

In [3]:
TH = 0.3

dataset = DATASET_NIH7T
for patient in ("010", ):#dataset.get_subjects():
    print(f"Generating segmentation for patient {patient}...")
    pmap_path = dataset.get(return_type="filename", subject=f"{patient}", **CONTRASTS["PMAP"])
    if len(pmap_path) == 0:
        print(f"PROBLEM: {patient}")
        continue
    pmap_path = pmap_path[0]
    
    im = nib.load(pmap_path)
    image = im.get_fdata()
    
    thresholded = np.zeros_like(image)
    thresholded[image >= TH] = 1
    
    new_image = nib.Nifti1Image(thresholded, im.affine, im.header)
    
    segmentation_path = generate_BIDS_path(DATASET_NIH7T_ID, subject=patient, **CONTRASTS["SEGMENTATION"])
    nib.save(new_image, segmentation_path)

Generating segmentation for patient 010...
