# Libraries

In [1]:
import os
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import SimpleITK as sitk

In [3]:
subfolder_path = Path.cwd()
notebooks_path = subfolder_path.parent
repo_path = notebooks_path.parent
os.chdir(str(subfolder_path))
#print current working directory
print(f'Current folder is: {os.getcwd()}\n')
thispath = Path.cwd().resolve()
import sys; sys.path.insert(0, str(thispath.parent.parent)) if sys.path[0] != str(thispath.parent.parent) else None

#Import paths and patients classes
from notebooks.info import path_label

Current folder is: /home/ricardino/Documents/MAIA/tercer_semestre/Extra_activities/INCan/Added_value_feature_uncertainty/notebooks/initials



# Funtions and classes

In [4]:
#Valid patients in the dataset
pat_list = [x for x in range(1,46) if x not in [1, 6, 10, 13, 17, 18, 19, 22, 23, 27, 30, 32]] #list of valid patients

## Metadata creation

In [5]:
def get_PathsAndNums():
    """Returns files and patient numbers, NOT sorted

    Returns:
        2 lists: files and numbers lists
    """
    #Get paths  
    folder_path = str(repo_path) + '/data/images/SET'
    p = Path(folder_path).glob('**/*')
    files = [x.relative_to(repo_path) for x in p if x.is_file()]
    #Get patient number
    p = Path(folder_path).glob('**/*')
    pat_num = [int(str(x.relative_to(repo_path).name)[4:6].replace('_','')) for x in p if x.is_file()] #Get patient number
    
    return files, pat_num

def get_PathsAndNums_seg(radiologist, time, stype):
    """Returns files and patient numbers, NOT sorted

    Returns:
        2 lists: files and numbers lists
    """
    #Get paths  
    folder_path = str(repo_path) + f'/data/segmentations/{radiologist}_{time}_seg'
    p = Path(folder_path).glob(f'*_{stype}_{radiologist}.seg.nrrd')
    files = [x.relative_to(repo_path) for x in p if x.is_file()]
    #Get patient number
    p = Path(folder_path).glob(f'*_{stype}_{radiologist}.seg.nrrd')
    pat_num = [int(str(x.relative_to(repo_path).name)[:2].replace('_','')) for x in p if x.is_file()] #Get patient number
    
    return files, pat_num

def get_pathsAndNums_preSet(sequence, t = 't1'):
    folder_path = str(repo_path) + f'/data/images/pre-SET'
    p = Path(folder_path).glob(f'*_{sequence}.tif') if sequence == 'SMC' else Path(folder_path).glob(f'*_{sequence}_{t}.tif')
    files = [x.relative_to(repo_path) for x in p if x.is_file()]
    #Get patient number
    p = Path(folder_path).glob(f'*_{sequence}.tif') if sequence == 'SMC' else Path(folder_path).glob(f'*_{sequence}_{t}.tif')
    pat_num = [int(str(x.relative_to(repo_path).name)[4:6].replace('_','')) for x in p if x.is_file()] #Get patient number
    
    return files, pat_num
    

def info_SET_dataframe(pat_list):
    """returns dataframe with metadata

    Returns:
        _type_: _description_
    """
    #image dataframe
    df = pd.DataFrame() #Save paths in df
    paths_list, pat_nums = get_PathsAndNums() #Not sorted paths and nums
    paths = pd.DataFrame({ #save in frame
        'path_SET': paths_list, 
        'pat_num': pat_nums
    })
    paths = paths.sort_values('pat_num') #Sort paths by alphabetic order
    df = pd.concat([df,paths], ignore_index=True) #Concatenate with previous info
    
    df = df[df['pat_num'].isin(pat_list)].reset_index(drop=True) #Filter by patient list
    
    return df

In [6]:
def add_segColumns(df, radiologist, time, stype):
    """get dataframe with column of segmentation paths added

    Args:
        df (dataframe): input dataframe
        radiologist (str): radiologist first letter
        time (srt): first or second time (1,2)
        stype (srt): General (G) or focal (F)

    Returns:
        dataframe: output dataframe
    """
    paths_list, pat_nums = get_PathsAndNums_seg(radiologist, time, stype)
    column_name = f'{radiologist}_{time}_{stype}'
    paths = pd.DataFrame({ #save in frame
        column_name: paths_list, 
        'pat_num': pat_nums
    })
    paths = paths.sort_values('pat_num') #Sort paths by alphabetic order
    
    paths = paths[paths['pat_num'].isin(pat_list)].reset_index(drop=True) #Filter by patient list
    df[column_name] = paths[column_name] #add to main df
    
    return df

def add_preSETColumns(df, sequence, t='t1'):
    """get dataframe adding the column of the pre-set sequence

    Args:
        df (dataframe): input dataframe
        sequence (str): contrast or no contrast image
        t (str): tim of the CMC sequenced

    Returns:
        dataframe: output dataframe
    """
    paths_list, pat_nums = get_pathsAndNums_preSet(sequence, t)
    column_name = 'path_SMC' if sequence=='SMC' else f'path_CMC_{t}'
    paths = pd.DataFrame({
        column_name: paths_list,
        'pat_num': pat_nums
    })
    paths = paths.sort_values('pat_num') #Sort paths by alphabetic order
    paths = paths[paths['pat_num'].isin(pat_list)].reset_index(drop=True) #Filter by patient list
    df[column_name] = paths[column_name] #add to main df
    
    return df
    

In [7]:
def get_metadata(pat_list, save = False):
    """Go thorugh all radiologists, times and ROI type to get all metadata

    Args:
        pat_list (_type_): _description_

    Returns:
        _type_: _description_
    """
    #Get SET images paths and pat num
    df = info_SET_dataframe(pat_list)
    
    #Add segmentation paths to dataframe
    for radiologist in ['L', 'V', 'M']:
        for time in ['1', '2']:
            for stype in ['G', 'F']:
                df = add_segColumns(df, radiologist, time, stype)
    
    #Add pre-SET paths to dataframe
    for sequence in ['SMC', 'CMC']:
        for t in ['t1', 't2', 't3', 't4']:
            if sequence=='SMC': break #Not need to make loop if SMC
            df = add_preSETColumns(df, sequence, t)
        df = add_preSETColumns(df, sequence)
    
    #Save metadata as csv
    df.to_csv(str(repo_path) + '/data/metadata.csv', index=False) if save else None
    
    return df

# IMP

## Get primary metadata
From the list of permissable patients

In [7]:
# Calling the metadata function
meta = get_metadata(pat_list, save = False)
meta

Unnamed: 0,path_SET,pat_num,L_1_G,L_1_F,L_2_G,L_2_F,V_1_G,V_1_F,V_2_G,V_2_F,M_1_G,M_1_F,M_2_G,M_2_F,path_SMC,path_CMC_t1,path_CMC_t2,path_CMC_t3,path_CMC_t4
0,data/images/SET/Pat_2_SET_SMC_to_CMC_1min.tif,2,data/segmentations/L_1_seg/2_G_L.seg.nrrd,data/segmentations/L_1_seg/2_F_L.seg.nrrd,data/segmentations/L_2_seg/2_G_L.seg.nrrd,data/segmentations/L_2_seg/2_F_L.seg.nrrd,data/segmentations/V_1_seg/2_G_V.seg.nrrd,data/segmentations/V_1_seg/2_F_V.seg.nrrd,data/segmentations/V_2_seg/2_G_V.seg.nrrd,data/segmentations/V_2_seg/2_F_V.seg.nrrd,data/segmentations/M_1_seg/2_G_M.seg.nrrd,data/segmentations/M_1_seg/2_F_M.seg.nrrd,data/segmentations/M_2_seg/2_G_M.seg.nrrd,data/segmentations/M_2_seg/2_F_M.seg.nrrd,data/images/pre-SET/Pat_02_RCC_SMC.tif,data/images/pre-SET/Pat_02_RCC_CMC_t1.tif,data/images/pre-SET/Pat_02_RCC_CMC_t2.tif,data/images/pre-SET/Pat_02_RCC_CMC_t3.tif,data/images/pre-SET/Pat_02_RCC_CMC_t4.tif
1,data/images/SET/Pat_3_SET_SMC_to_CMC_1min.tif,3,data/segmentations/L_1_seg/3_G_L.seg.nrrd,data/segmentations/L_1_seg/3_F_L.seg.nrrd,data/segmentations/L_2_seg/3_G_L.seg.nrrd,data/segmentations/L_2_seg/3_F_L.seg.nrrd,data/segmentations/V_1_seg/3_G_V.seg.nrrd,data/segmentations/V_1_seg/3_F_V.seg.nrrd,data/segmentations/V_2_seg/3_G_V.seg.nrrd,data/segmentations/V_2_seg/3_F_V.seg.nrrd,data/segmentations/M_1_seg/3_G_M.seg.nrrd,data/segmentations/M_1_seg/3_F_M.seg.nrrd,data/segmentations/M_2_seg/3_G_M.seg.nrrd,data/segmentations/M_2_seg/3_F_M.seg.nrrd,data/images/pre-SET/Pat_03_LCC_SMC.tif,data/images/pre-SET/Pat_03_LCC_CMC_t1.tif,data/images/pre-SET/Pat_03_LCC_CMC_t2.tif,data/images/pre-SET/Pat_03_LCC_CMC_t3.tif,data/images/pre-SET/Pat_03_LCC_CMC_t4.tif
2,data/images/SET/Pat_4_SET_SMC_to_CMC_1min.tif,4,data/segmentations/L_1_seg/4_G_L.seg.nrrd,data/segmentations/L_1_seg/4_F_L.seg.nrrd,data/segmentations/L_2_seg/4_G_L.seg.nrrd,data/segmentations/L_2_seg/4_F_L.seg.nrrd,data/segmentations/V_1_seg/4_G_V.seg.nrrd,data/segmentations/V_1_seg/4_F_V.seg.nrrd,data/segmentations/V_2_seg/4_G_V.seg.nrrd,data/segmentations/V_2_seg/4_F_V.seg.nrrd,data/segmentations/M_1_seg/4_G_M.seg.nrrd,data/segmentations/M_1_seg/4_F_M.seg.nrrd,data/segmentations/M_2_seg/4_G_M.seg.nrrd,data/segmentations/M_2_seg/4_F_M.seg.nrrd,data/images/pre-SET/Pat_04_LCC_SMC.tif,data/images/pre-SET/Pat_04_LCC_CMC_t1.tif,data/images/pre-SET/Pat_04_LCC_CMC_t2.tif,data/images/pre-SET/Pat_04_LCC_CMC_t3.tif,data/images/pre-SET/Pat_04_LCC_CMC_t4.tif
3,data/images/SET/Pat_5_SET_SMC_to_CMC_1min.tif,5,data/segmentations/L_1_seg/5_G_L.seg.nrrd,data/segmentations/L_1_seg/5_F_L.seg.nrrd,data/segmentations/L_2_seg/5_G_L.seg.nrrd,data/segmentations/L_2_seg/5_F_L.seg.nrrd,data/segmentations/V_1_seg/5_G_V.seg.nrrd,data/segmentations/V_1_seg/5_F_V.seg.nrrd,data/segmentations/V_2_seg/5_G_V.seg.nrrd,data/segmentations/V_2_seg/5_F_V.seg.nrrd,data/segmentations/M_1_seg/5_G_M.seg.nrrd,data/segmentations/M_1_seg/5_F_M.seg.nrrd,data/segmentations/M_2_seg/5_G_M.seg.nrrd,data/segmentations/M_2_seg/5_F_M.seg.nrrd,data/images/pre-SET/Pat_05_RCC_SMC.tif,data/images/pre-SET/Pat_05_RCC_CMC_t1.tif,data/images/pre-SET/Pat_05_RCC_CMC_t2.tif,data/images/pre-SET/Pat_05_RCC_CMC_t3.tif,data/images/pre-SET/Pat_05_RCC_CMC_t4.tif
4,data/images/SET/Pat_7_SET_SMC_to_CMC_1min.tif,7,data/segmentations/L_1_seg/7_G_L.seg.nrrd,data/segmentations/L_1_seg/7_F_L.seg.nrrd,data/segmentations/L_2_seg/7_G_L.seg.nrrd,data/segmentations/L_2_seg/7_F_L.seg.nrrd,data/segmentations/V_1_seg/7_G_V.seg.nrrd,data/segmentations/V_1_seg/7_F_V.seg.nrrd,data/segmentations/V_2_seg/7_G_V.seg.nrrd,data/segmentations/V_2_seg/7_F_V.seg.nrrd,data/segmentations/M_1_seg/7_G_M.seg.nrrd,data/segmentations/M_1_seg/7_F_M.seg.nrrd,data/segmentations/M_2_seg/7_G_M.seg.nrrd,data/segmentations/M_2_seg/7_F_M.seg.nrrd,data/images/pre-SET/Pat_07_RCC_SMC.tif,data/images/pre-SET/Pat_07_RCC_CMC_t1.tif,data/images/pre-SET/Pat_07_RCC_CMC_t2.tif,data/images/pre-SET/Pat_07_RCC_CMC_t3.tif,data/images/pre-SET/Pat_07_RCC_CMC_t4.tif
5,data/images/SET/Pat_8_SET_SMC_to_CMC_1min.tif,8,data/segmentations/L_1_seg/8_G_L.seg.nrrd,data/segmentations/L_1_seg/8_F_L.seg.nrrd,data/segmentations/L_2_seg/8_G_L.seg.nrrd,data/segmentations/L_2_seg/8_F_L.seg.nrrd,data/segmentations/V_1_seg/8_G_V.seg.nrrd,data/segmentations/V_1_seg/8_F_V.seg.nrrd,data/segmentations/V_2_seg/8_G_V.seg.nrrd,data/segmentations/V_2_seg/8_F_V.seg.nrrd,data/segmentations/M_1_seg/8_G_M.seg.nrrd,data/segmentations/M_1_seg/8_F_M.seg.nrrd,data/segmentations/M_2_seg/8_G_M.seg.nrrd,data/segmentations/M_2_seg/8_F_M.seg.nrrd,data/images/pre-SET/Pat_08_LCC_SMC.tif,data/images/pre-SET/Pat_08_LCC_CMC_t1.tif,data/images/pre-SET/Pat_08_LCC_CMC_t2.tif,data/images/pre-SET/Pat_08_LCC_CMC_t3.tif,data/images/pre-SET/Pat_08_LCC_CMC_t4.tif
6,data/images/SET/Pat_9_SET_SMC_to_CMC_1min.tif,9,data/segmentations/L_1_seg/9_G_L.seg.nrrd,data/segmentations/L_1_seg/9_F_L.seg.nrrd,data/segmentations/L_2_seg/9_G_L.seg.nrrd,data/segmentations/L_2_seg/9_F_L.seg.nrrd,data/segmentations/V_1_seg/9_G_V.seg.nrrd,data/segmentations/V_1_seg/9_F_V.seg.nrrd,data/segmentations/V_2_seg/9_G_V.seg.nrrd,data/segmentations/V_2_seg/9_F_V.seg.nrrd,data/segmentations/M_1_seg/9_G_M.seg.nrrd,data/segmentations/M_1_seg/9_F_M.seg.nrrd,data/segmentations/M_2_seg/9_G_M.seg.nrrd,data/segmentations/M_2_seg/9_F_M.seg.nrrd,data/images/pre-SET/Pat_09_RCC_SMC.tif,data/images/pre-SET/Pat_09_RCC_CMC_t1.tif,data/images/pre-SET/Pat_09_RCC_CMC_t2.tif,data/images/pre-SET/Pat_09_RCC_CMC_t3.tif,data/images/pre-SET/Pat_09_RCC_CMC_t4.tif
7,data/images/SET/Pat_11_SET_SMC_to_CMC_1min.tif,11,data/segmentations/L_1_seg/11_G_L.seg.nrrd,data/segmentations/L_1_seg/11_F_L.seg.nrrd,data/segmentations/L_2_seg/11_G_L.seg.nrrd,data/segmentations/L_2_seg/11_F_L.seg.nrrd,data/segmentations/V_1_seg/11_G_V.seg.nrrd,data/segmentations/V_1_seg/11_F_V.seg.nrrd,data/segmentations/V_2_seg/11_G_V.seg.nrrd,data/segmentations/V_2_seg/11_F_V.seg.nrrd,data/segmentations/M_1_seg/11_G_M.seg.nrrd,data/segmentations/M_1_seg/11_F_M.seg.nrrd,data/segmentations/M_2_seg/11_G_M.seg.nrrd,data/segmentations/M_2_seg/11_F_M.seg.nrrd,data/images/pre-SET/Pat_11_LCC_SMC.tif,data/images/pre-SET/Pat_11_LCC_CMC_t1.tif,data/images/pre-SET/Pat_11_LCC_CMC_t2.tif,data/images/pre-SET/Pat_11_LCC_CMC_t3.tif,data/images/pre-SET/Pat_11_LCC_CMC_t4.tif
8,data/images/SET/Pat_12_SET_SMC_to_CMC_1min.tif,12,data/segmentations/L_1_seg/12_G_L.seg.nrrd,data/segmentations/L_1_seg/12_F_L.seg.nrrd,data/segmentations/L_2_seg/12_G_L.seg.nrrd,data/segmentations/L_2_seg/12_F_L.seg.nrrd,data/segmentations/V_1_seg/12_G_V.seg.nrrd,data/segmentations/V_1_seg/12_F_V.seg.nrrd,data/segmentations/V_2_seg/12_G_V.seg.nrrd,data/segmentations/V_2_seg/12_F_V.seg.nrrd,data/segmentations/M_1_seg/12_G_M.seg.nrrd,data/segmentations/M_1_seg/12_F_M.seg.nrrd,data/segmentations/M_2_seg/12_G_M.seg.nrrd,data/segmentations/M_2_seg/12_F_M.seg.nrrd,data/images/pre-SET/Pat_12_LCC_SMC.tif,data/images/pre-SET/Pat_12_LCC_CMC_t1.tif,data/images/pre-SET/Pat_12_LCC_CMC_t2.tif,data/images/pre-SET/Pat_12_LCC_CMC_t3.tif,data/images/pre-SET/Pat_12_LCC_CMC_t4.tif
9,data/images/SET/Pat_14_SET_SMC_to_CMC_1min.tif,14,data/segmentations/L_1_seg/14_G_L.seg.nrrd,data/segmentations/L_1_seg/14_F_L.seg.nrrd,data/segmentations/L_2_seg/14_G_L.seg.nrrd,data/segmentations/L_2_seg/14_F_L.seg.nrrd,data/segmentations/V_1_seg/14_G_V.seg.nrrd,data/segmentations/V_1_seg/14_F_V.seg.nrrd,data/segmentations/V_2_seg/14_G_V.seg.nrrd,data/segmentations/V_2_seg/14_F_V.seg.nrrd,data/segmentations/M_1_seg/14_G_M.seg.nrrd,data/segmentations/M_1_seg/14_F_M.seg.nrrd,data/segmentations/M_2_seg/14_G_M.seg.nrrd,data/segmentations/M_2_seg/14_F_M.seg.nrrd,data/images/pre-SET/Pat_14_RCC_SMC.tif,data/images/pre-SET/Pat_14_RCC_CMC_t1.tif,data/images/pre-SET/Pat_14_RCC_CMC_t2.tif,data/images/pre-SET/Pat_14_RCC_CMC_t3.tif,data/images/pre-SET/Pat_14_RCC_CMC_t4.tif


## Testing classes for accesing information using meta

In [11]:
#Access to general info of the whole dataset
info = path_label()
paths = info.path(sequence='SMC')
paths

['data/images/pre-SET/Pat_02_RCC_SMC.tif',
 'data/images/pre-SET/Pat_03_LCC_SMC.tif',
 'data/images/pre-SET/Pat_04_LCC_SMC.tif',
 'data/images/pre-SET/Pat_05_RCC_SMC.tif',
 'data/images/pre-SET/Pat_07_RCC_SMC.tif',
 'data/images/pre-SET/Pat_08_LCC_SMC.tif',
 'data/images/pre-SET/Pat_09_RCC_SMC.tif',
 'data/images/pre-SET/Pat_11_LCC_SMC.tif',
 'data/images/pre-SET/Pat_12_LCC_SMC.tif',
 'data/images/pre-SET/Pat_14_RCC_SMC.tif',
 'data/images/pre-SET/Pat_15_LCC_SMC.tif',
 'data/images/pre-SET/Pat_16_RCC_SMC.tif',
 'data/images/pre-SET/Pat_20_LCC_SMC.tif',
 'data/images/pre-SET/Pat_21_LCC_SMC.tif',
 'data/images/pre-SET/Pat_24_LCC_SMC.tif',
 'data/images/pre-SET/Pat_25_LCC_SMC.tif',
 'data/images/pre-SET/Pat_26_LCC_SMC.tif',
 'data/images/pre-SET/Pat_28_RCC_SMC.tif',
 'data/images/pre-SET/Pat_29_LCC_SMC.tif',
 'data/images/pre-SET/Pat_31_LCC_SMC.tif',
 'data/images/pre-SET/Pat_33_LCC_SMC.tif',
 'data/images/pre-SET/Pat_34_RCC_SMC.tif',
 'data/images/pre-SET/Pat_35_LCC_SMC.tif',
 'data/imag

In [12]:
#examples of how to access the segmentation
seg_paths = info.seg('L', '2', 'F')
print(seg_paths)
print(len(seg_paths))

['data/segmentations/L_2_seg/2_F_L.seg.nrrd', 'data/segmentations/L_2_seg/3_F_L.seg.nrrd', 'data/segmentations/L_2_seg/4_F_L.seg.nrrd', 'data/segmentations/L_2_seg/5_F_L.seg.nrrd', 'data/segmentations/L_2_seg/7_F_L.seg.nrrd', 'data/segmentations/L_2_seg/8_F_L.seg.nrrd', 'data/segmentations/L_2_seg/9_F_L.seg.nrrd', 'data/segmentations/L_2_seg/11_F_L.seg.nrrd', 'data/segmentations/L_2_seg/12_F_L.seg.nrrd', 'data/segmentations/L_2_seg/14_F_L.seg.nrrd', 'data/segmentations/L_2_seg/15_F_L.seg.nrrd', 'data/segmentations/L_2_seg/16_F_L.seg.nrrd', 'data/segmentations/L_2_seg/20_F_L.seg.nrrd', 'data/segmentations/L_2_seg/21_F_L.seg.nrrd', 'data/segmentations/L_2_seg/24_F_L.seg.nrrd', 'data/segmentations/L_2_seg/25_F_L.seg.nrrd', 'data/segmentations/L_2_seg/26_F_L.seg.nrrd', 'data/segmentations/L_2_seg/28_F_L.seg.nrrd', 'data/segmentations/L_2_seg/29_F_L.seg.nrrd', 'data/segmentations/L_2_seg/31_F_L.seg.nrrd', 'data/segmentations/L_2_seg/33_F_L.seg.nrrd', 'data/segmentations/L_2_seg/34_F_L.seg.n

## Visualize

In [10]:
im_path = str(repo_path) + '/' +  info.paths_SET[0]
#read tiff image
im = sitk.ReadImage(im_path)
#show images
fig, axs = plt.subplots(1, 2, figsize=(10, 10))
axs[0].imshow(im.pixel_array, cmap='gray')
axs[1].imshow(im.pixel_array, cmap='gray')