# Loading available images from the filtered parquet files

## First thing first install the package available on  ...

In [7]:
import os
import importlib.util
 
package_name = 'ProCanLoad'
 
if importlib.util.find_spec(package_name) is None:
    os.exec('')
else:
    from ProCanLoad.ImageLoader import ImageLoader

## Inputs:

    1. You may load the filtered parquets as pandas DataFrames or just give the path to parquet file.

    2. Define the location of your files, the path should be organized:

        {your-directory}
        ├─  patient_id
        |   ├─  study_uid
        │   |   ├─  series_uid
        |   |   |   ├─  *.dcm
        |   |   |   .
        |   |   |   └─    ...
        |   |   .
        |   |   └─ ...
        |   .
        |   └─  ...
        .
        └─  ...

    3. You can select which columns you want to keep as metadata for each patient. (See the default columns bellow)

    4. You can select to extract the all the .dcm files to .nii.gz OR you may perform this action later.

In [8]:
#Example load a parquet file for input
import pandas as pd
UC1_df = pd.read_parquet('data/UseCase1-v1.parquet')

In [9]:
loader = ImageLoader(images_directory_path='DICOM_images', #path to directory of the files from rsync, after they were extracted!!!
                     parquet_series=UC1_df, #Set-up a pd.Dataframe...
                     parquet_segmentations='data/Seg_UseCase1-v1.parquet', # ...or just add the path
                     add_columns='carcinoma_classification', # Default is True, assume the user want to keep this column also. You may give a list of column names also
                     extract_nii=True,  # Default is True, Perform extraction to Niifty after reading dcm files. False to check image  
                     reset_logger= True) # Default is True, this will create "./issues/image_loader.json" (empty dict) that logs issues and warnings found. Reset will empty the .json

Default values 

In [5]:
loader.GetImageLoader()

Reading ... :   0%|          | 0/440 [00:00<?, ?it/s]

Extract to .nii.gz :   0%|          | 0/440 [00:00<?, ?it/s]

## Outputs:

    1. image_loader.json:

        {patient_id:
            study_id:
                "image_series":  #where image_series is (T2, ADC, DWI, SEG)
                    "key_series": #where key_series is 'N/A' for T2, ADC, b-value for DWI, label segmentation for SEG
                        meta: ... #metadata for series
                        (Only MR sequences)
                        dcm_path: 
                            'image slice location: #str(float) slice location 
                                path: ... #path to .dcm file
                                ImagePositionPatient: ... #dicom tag  Image Position (Patient) (0020,0032)
                        (Only SEG)
                        nii_path: #path to extracted *.nii.gz segmentation file
        }

    2. issues/image_loader.json: issues/warnings upon reading the dcm files

    3. (if selected) nifty_files.json keys are the MRsequences or name of the segmentation and value the path to .nii.gz files


# If you want to extract .nii.gz after checking "image_loader.json"

Set ImageLoader's extract_nii = False

And call:

In [1]:
from ProCanLoad.ImageLoader import DICOM2NII

In [3]:
extractor = DICOM2NII(image_loader='image_loader.json',
              keep_max_bvalue= True, #This will keep from DWI sequences only the largest bvalue, default = True
              )
extractor.Excecute()

  0%|          | 0/440 [00:00<?, ?it/s]