# Libraries

In [1]:
import os
from pathlib import Path
from glob import glob
import pandas as pd
from pydicom import dcmread
import matplotlib.pyplot as plt
import numpy as np

In [2]:
subfolder_path = Path.cwd()
notebooks_path = subfolder_path.parent
repo_path = notebooks_path.parent
os.chdir(str(subfolder_path))
#print current working directory
print(f'Current folder is: {os.getcwd()}\n')
thispath = Path.cwd().resolve()
import sys; sys.path.insert(0, str(thispath.parent.parent)) if sys.path[0] != str(thispath.parent.parent) else None

#Import paths and patients classes
from notebooks.info import path_label

Current folder is: /home/ricardino/Documents/MAIA/tercer_semestre/Extra_activities/INCan/Added_value_feature_uncertainty/notebooks/initials



# Funtions and classes

In [3]:
#Valid patients in the dataset
pat_list = [x for x in range(1,46) if x not in [1, 6, 10, 13, 17, 18, 19, 22, 23, 27, 30, 32]] #list of valid patients

## Metadata creation

In [4]:
def get_PathsAndNums():
    """Returns files and patient numbers, NOT sorted

    Returns:
        2 lists: files and numbers lists
    """
    #Get paths  
    folder_path = str(repo_path) + '/data/images/SET'
    p = Path(folder_path).glob('**/*')
    files = [x.relative_to(repo_path) for x in p if x.is_file()]
    #Get patient number
    p = Path(folder_path).glob('**/*')
    pat_num = [int(str(x.relative_to(repo_path).name)[4:6].replace('_','')) for x in p if x.is_file()] #Get patient number
    
    return files, pat_num

def get_PathsAndNums_seg(radiologist, time, stype):
    """Returns files and patient numbers, NOT sorted

    Returns:
        2 lists: files and numbers lists
    """
    #Get paths  
    folder_path = str(repo_path) + f'/data/segmentations/{radiologist}_{time}_seg'
    p = Path(folder_path).glob(f'*_{stype}_{radiologist}.seg.nrrd')
    files = [x.relative_to(repo_path) for x in p if x.is_file()]
    #Get patient number
    p = Path(folder_path).glob(f'*_{stype}_{radiologist}.seg.nrrd')
    pat_num = [int(str(x.relative_to(repo_path).name)[:2].replace('_','')) for x in p if x.is_file()] #Get patient number
    
    return files, pat_num

def info_SET_dataframe(pat_list):
    """returns dataframe with metadata

    Returns:
        _type_: _description_
    """
    #image dataframe
    df = pd.DataFrame() #Save paths in df
    paths_list, pat_nums = get_PathsAndNums() #Not sorted paths and nums
    paths = pd.DataFrame({ #save in frame
        'path_SET': paths_list, 
        'pat_num': pat_nums
    })
    paths = paths.sort_values('pat_num') #Sort paths by alphabetic order
    df = pd.concat([df,paths], ignore_index=True) #Concatenate with previous info
    
    df = df[df['pat_num'].isin(pat_list)].reset_index(drop=True) #Filter by patient list
    
    return df

In [5]:
def add_segColumns(df, radiologist, time, stype):
    """get dataframe with column of segmentation paths added

    Args:
        df (dataframe): input dataframe
        radiologist (str): radiologist first letter
        time (srt): first or second time (1,2)
        stype (srt): General (G) or focal (F)

    Returns:
        dataframe: output dataframe
    """
    paths_list, pat_nums = get_PathsAndNums_seg(radiologist, time, stype)
    column_name = f'{radiologist}_{time}_{stype}'
    paths = pd.DataFrame({ #save in frame
        column_name: paths_list, 
        'pat_num': pat_nums
    })
    paths = paths.sort_values('pat_num') #Sort paths by alphabetic order
    
    
    paths = paths[paths['pat_num'].isin(pat_list)].reset_index(drop=True) #Filter by patient list
    df[column_name] = paths[column_name]
    
    return df
    

In [6]:
def get_metadata(pat_list, save = False):
    """Go thorugh all radiologists, times and ROI type to get all metadata

    Args:
        pat_list (_type_): _description_

    Returns:
        _type_: _description_
    """
    df = info_SET_dataframe(pat_list)
    for radiologist in ['L', 'V', 'M']:
        for time in ['1', '2']:
            for stype in ['G', 'F']:
                df = add_segColumns(df, radiologist, time, stype)
                
    df.to_csv(str(repo_path) + '/data/metadata.csv', index=False) if save else None
    
    return df

# IMP

## Get primary metadata
From the list of permissable patients

In [7]:
meta = get_metadata(pat_list, save = True)

In [8]:
#Access to geenral info of the whole dataset
info = path_label()

In [22]:
#examples of how to access the segmentation
seg_paths = info.seg('L', '2', 'F')
print(seg_paths)
print(len(seg_paths))

['data/segmentations/L_2_seg/2_F_L.seg.nrrd', 'data/segmentations/L_2_seg/3_F_L.seg.nrrd', 'data/segmentations/L_2_seg/4_F_L.seg.nrrd', 'data/segmentations/L_2_seg/5_F_L.seg.nrrd', 'data/segmentations/L_2_seg/7_F_L.seg.nrrd', 'data/segmentations/L_2_seg/8_F_L.seg.nrrd', 'data/segmentations/L_2_seg/9_F_L.seg.nrrd', 'data/segmentations/L_2_seg/11_F_L.seg.nrrd', 'data/segmentations/L_2_seg/12_F_L.seg.nrrd', 'data/segmentations/L_2_seg/14_F_L.seg.nrrd', 'data/segmentations/L_2_seg/15_F_L.seg.nrrd', 'data/segmentations/L_2_seg/16_F_L.seg.nrrd', 'data/segmentations/L_2_seg/20_F_L.seg.nrrd', 'data/segmentations/L_2_seg/21_F_L.seg.nrrd', 'data/segmentations/L_2_seg/24_F_L.seg.nrrd', 'data/segmentations/L_2_seg/25_F_L.seg.nrrd', 'data/segmentations/L_2_seg/26_F_L.seg.nrrd', 'data/segmentations/L_2_seg/28_F_L.seg.nrrd', 'data/segmentations/L_2_seg/29_F_L.seg.nrrd', 'data/segmentations/L_2_seg/31_F_L.seg.nrrd', 'data/segmentations/L_2_seg/33_F_L.seg.nrrd', 'data/segmentations/L_2_seg/34_F_L.seg.n

## Visualize

In [10]:
# path_SC = str(repo_path) + '/data/images/pre-SET/Pat_2_RCC_SMC.tif' 
# path_CM = str(repo_path) + '/data/images/pre-SET/Pat_2_RCC_CMC_t1.tif'
# ds_SC = dcmread(path_SC)
# ds_CM = dcmread(path_CM)
# #show images
# fig, axs = plt.subplots(1, 2, figsize=(10, 10))
# axs[0].imshow(ds_SC.pixel_array, cmap='gray')
# axs[1].imshow(ds_CM.pixel_array, cmap='gray')

In [11]:
# function to translate the segmentation into a mask
def get_mask(path):
    """Returns a mask from a segmentation file

    Args:
        path (str): path to the segmentation file

    Returns:
        numpy.array: mask
    """

    data, header = nrrd.read(path)
    mask = np.zeros(data.shape)
    mask[data == 1] = 1
    return mask