In [1]:
import numpy as np
import pandas as pd
import pydicom
import matplotlib.pyplot as plt
from PIL import Image

See original code in /lspseg/scripts/dicom_file_selection_and_png_conversion.ipynb

And see /lspseg/scripts/more_than_12_slices.ipynb for getting "all_mid_slice_filepaths.csv"

In [2]:
df_filepaths = pd.read_csv("../datasets/all_T2_SAG_mid-slice_filepaths.csv")

In [3]:
selection = pd.read_csv("../datasets/annotation_filepaths.csv")

In [16]:
selection = pd.merge(selection, df_filepaths, on='project_ID')

In [4]:
selection = selection[(selection['Iteration'] == 2)]

In [5]:
selection

Unnamed: 0,Notes,Iteration,Annotated,project_ID,Phenotype,FullPathDICOM,FileName,SeriesDescription,sub_folder
217,,2,,P0741_889,,/2013/NEAGENIM/PAT00470/STU00000/SER00001/IMG0...,,,
218,,2,,P0741_889,,/2013/NEAGENIM/PAT00470/STU00000/SER00001/IMG0...,,,
219,,2,,P0741_988,,/2011-2012/NEAGENIM/PAT00196/STU00000/SER00001...,,,
220,,2,,P0741_988,,/2011-2012/NEAGENIM/PAT00196/STU00000/SER00001...,,,
221,,2,,P0741_1025,,/2014-2015/NEAGENIM/PAT00320/STU00000/SER00001...,,,
222,,2,,P0741_1142,,/2014-2015/NEAGENIM/PAT00491/STU00000/SER00001...,,,
223,,2,,P0741_13311,,/2011-2012/NEAGENIM/PAT00034/STU00000/SER00001...,,,
224,,2,,P0741_1348,,/2014-2015/NEAGENIM/PAT00185/STU00000/SER00001...,,,
225,,2,,P0741_1381,,/2013/NEAGENIM/PAT00166/STU00000/SER00001/IMG0...,,,
226,,2,,P0741_1494,,/2014-2015/NEAGENIM/PAT00341/STU00000/SER00001...,,,


In [6]:
selection['FileName'] = selection['FullPathDICOM'].str.split(pat='/').str[6]
selection

Unnamed: 0,Notes,Iteration,Annotated,project_ID,Phenotype,FullPathDICOM,FileName,SeriesDescription,sub_folder
217,,2,,P0741_889,,/2013/NEAGENIM/PAT00470/STU00000/SER00001/IMG0...,IMG00005,,
218,,2,,P0741_889,,/2013/NEAGENIM/PAT00470/STU00000/SER00001/IMG0...,IMG00006,,
219,,2,,P0741_988,,/2011-2012/NEAGENIM/PAT00196/STU00000/SER00001...,IMG00005,,
220,,2,,P0741_988,,/2011-2012/NEAGENIM/PAT00196/STU00000/SER00001...,IMG00006,,
221,,2,,P0741_1025,,/2014-2015/NEAGENIM/PAT00320/STU00000/SER00001...,IMG00005,,
222,,2,,P0741_1142,,/2014-2015/NEAGENIM/PAT00491/STU00000/SER00001...,IMG00006,,
223,,2,,P0741_13311,,/2011-2012/NEAGENIM/PAT00034/STU00000/SER00001...,IMG00005,,
224,,2,,P0741_1348,,/2014-2015/NEAGENIM/PAT00185/STU00000/SER00001...,IMG00006,,
225,,2,,P0741_1381,,/2013/NEAGENIM/PAT00166/STU00000/SER00001/IMG0...,IMG00005,,
226,,2,,P0741_1494,,/2014-2015/NEAGENIM/PAT00341/STU00000/SER00001...,IMG00005,,


### Updated segmentation selection

These are the selected filepaths for the pngs selected for annotation after updating to include correct slices for sequences with more than 12 slices. 

In [31]:
#selection.to_csv("updated_segmentation_selection.csv")
# old_selection = pd.read_csv("segmentation_selection.csv")
# missed = selection[~selection.FullPathDICOM.isin(old_selection.FullPathDICOM)]
#missed.to_csv("missed_segmentation_selection.csv")

In [7]:
def read_dicom_image(full_path):
    """
    Function to convert DICOM arrays to numpy arrays.
    :param full_path: Full path of the image in the directory.
    :return: The image as a numpy array
    """
    d = pydicom.read_file(full_path, force=True)
    img = d.pixel_array
    return np.array(img)

### Re-saving annotation pngs

Considering these updated filepaths top include correct slices, here the pngs are getting saved again so that missing ones can be added to CVAT and incorrect ones replaced. 

In [8]:
data_dir = "/mnt/sda1/nfbc" # Location of the DICOM files (see on work machine).
output_dir = "/mnt/sda1/nfbc/annotation_3_pngs/"
filepaths = selection["FullPathDICOM"].values.tolist()
project_ID = selection["project_ID"].values.tolist()
filenames = selection["FileName"].values.tolist()

for i in range(len(filepaths)):
    img = data_dir + filepaths[i]
    j = output_dir + project_ID[i] + '_' + filenames[i]
    array = read_dicom_image(img)
    png = Image.fromarray(array)
    j = j + ".png"
    plt.imsave(f'{j}', png, cmap="gray")

### Ensuring pngs are available for all slices in the dataset

In [7]:
filepaths = df_filepaths["FullPathDICOM"].values.tolist()
#data_dir = "/mnt/sda1/nfbc" # Location of the DICOM files on work machine.
data_dir = "/mnt/d/NFBC" # Location from external hard drive on laptop.

# A loop to iterate over these lists, save DICOM slice as both PNG and npy array if necessary in the same location
# as the original file.
for idx, i in enumerate(filepaths): 
    i = data_dir + i
    array = read_dicom_image(i)
    png = Image.fromarray(array)
    i = i + ".png"
    plt.imsave(f'{i}', png, cmap="gray")

In [5]:
pwd

'/home/traolach/lspseg/scripts'