In [1]:
import subprocess
from pathlib import Path

import numpy as np
from fmcib.utils import download_LUNG1

from readii_idc_notebooks import index_and_submit_saves

# SETUP 

In [2]:
COLLECTION_ID = "nsclc_radiomics"

# Save data to local directory
DATA_DIR = Path("data") / "images" / COLLECTION_ID

# After sorting, convert them to nifti and save them in the niftis directory
NIFTI_OUTPUT_DIR = DATA_DIR / "niftis"

# Modalities of interest,
MODALITIES = ["CT", "RTSTRUCT"]

RANDOM_SEED = 10

IMAGE_TYPES = [
    "shuffled_full",
    "shuffled_roi",
    "shuffled_non_roi",
    "randomized_full",
    "randomized_roi",
    "randomized_non_roi",
    "randomized_sampled_full",
    "randomized_sampled_roi",
    "randomized_sampled_non_roi",
]

NIFTI_FILENAME_FORMAT = (
    "SubjectID-{PatientID}/{Modality}_SeriesUID-{SeriesInstanceUID}/{IMAGE_ID}.nii.gz"
)

ROI_NAME = "GTV"
roi_match_pattern = {
    ROI_NAME: "^(GTV.*)$"
}  # Use a regex to match the ROI name to rois like "GTV 1", "GTV 2"

In [3]:
# set numpy random seed


np.random.seed(RANDOM_SEED)

download_LUNG1(str(DATA_DIR), 5)
print("New Directory Structure: ")
subprocess.run(["tree", "-d", DATA_DIR])

2025-01-06 18:11:02.493 | INFO     | fmcib.utils.idc_helper:download_LUNG1:155 - Downloading LUNG1 manifest from Dropbox ...
2025-01-06 18:11:04.232 | INFO     | fmcib.utils.idc_helper:download_from_manifest:99 - Downloading DICOM data from IDC (Imaging Data Commons) ...
100%|██████████| 593/593 [00:03<00:00, 173.67it/s]

New Directory Structure: 
[01;34mdata/images/nsclc_radiomics[00m
├── [01;34mdicom[00m
│   ├── [01;34mLUNG1-100[00m
│   │   └── [01;34m1.3.6.1.4.1.32722.99.99.73012855591005609709750757985153279317[00m
│   ├── [01;34mLUNG1-241[00m
│   │   └── [01;34m1.3.6.1.4.1.32722.99.99.9749522862227904459662052993782066003[00m
│   ├── [01;34mLUNG1-254[00m
│   │   └── [01;34m1.3.6.1.4.1.32722.99.99.101385144304667221218962368127740145808[00m
│   ├── [01;34mLUNG1-261[00m
│   │   └── [01;34m1.3.6.1.4.1.32722.99.99.315921065862774785801495962476548705528[00m
│   └── [01;34mLUNG1-289[00m
│       └── [01;34m1.3.6.1.4.1.32722.99.99.113571361328418150338693333135534997557[00m
└── [01;34mniftis[00m
    └── [01;34mSubjectID-0_LUNG1-254[00m
        ├── [01;34mCT_SeriesUID-65670[00m
        └── [01;34mRTSTRUCT_SeriesUID-44321[00m

15 directories





CompletedProcess(args=['tree', '-d', PosixPath('data/images/nsclc_radiomics')], returncode=0)

In [4]:
csv_path = index_and_submit_saves(
    input_dir=DATA_DIR.absolute(),
    modalities=MODALITIES,
    roi_match_pattern=roi_match_pattern,
    update_imgtools_index=True,
    n_jobs=-1,
    nifti_output_dir=NIFTI_OUTPUT_DIR,
    filename_format=NIFTI_FILENAME_FORMAT,
    overwrite=True,
    random_seed=RANDOM_SEED,
    negative_control_list=IMAGE_TYPES,
)

print(f"Saved dataset index to {csv_path}")

  self.df_new.edge_type.str.contains(f'(?:{regex_term})', regex=True),


Loading data for subject_ID 1/5: 0_LUNG1-254 (PatientID : LUNG1-254)
Generating negative control 1/9 shuffled_full
Generating negative control 2/9 shuffled_roi
Generating negative control 3/9 shuffled_non_roi
Generating negative control 4/9 randomized_full
Generating negative control 5/9 randomized_roi
Generating negative control 6/9 randomized_non_roi
Generating negative control 7/9 randomized_sampled_full
Generating negative control 8/9 randomized_sampled_roi
Generating negative control 9/9 randomized_sampled_non_roi
Loading data for subject_ID 2/5: 1_LUNG1-289 (PatientID : LUNG1-289)
Generating negative control 1/9 shuffled_full
Generating negative control 2/9 shuffled_roi
Generating negative control 3/9 shuffled_non_roi
Generating negative control 4/9 randomized_full
Generating negative control 5/9 randomized_roi
Generating negative control 6/9 randomized_non_roi
Generating negative control 7/9 randomized_sampled_full
Generating negative control 8/9 randomized_sampled_roi
Generatin

In [5]:
# Print the directory structure but hide the actual dicom files to avoid clutter
! tree -F $NIFTI_OUTPUT_DIR.parent -I "*.dcm"

[01;34mdata/images/nsclc_radiomics[00m
├── [01;34mdicom[00m/
│   ├── [01;34mLUNG1-100[00m/
│   │   └── [01;34m1.3.6.1.4.1.32722.99.99.73012855591005609709750757985153279317[00m/
│   ├── [01;34mLUNG1-241[00m/
│   │   └── [01;34m1.3.6.1.4.1.32722.99.99.9749522862227904459662052993782066003[00m/
│   ├── [01;34mLUNG1-254[00m/
│   │   └── [01;34m1.3.6.1.4.1.32722.99.99.101385144304667221218962368127740145808[00m/
│   ├── [01;34mLUNG1-261[00m/
│   │   └── [01;34m1.3.6.1.4.1.32722.99.99.315921065862774785801495962476548705528[00m/
│   └── [01;34mLUNG1-289[00m/
│       └── [01;34m1.3.6.1.4.1.32722.99.99.113571361328418150338693333135534997557[00m/
├── [01;34mniftis[00m/
│   ├── dataset_index.csv
│   ├── [01;34mSubjectID-0_LUNG1-254[00m/
│   │   ├── [01;34mCT_SeriesUID-65670[00m/
│   │   │   ├── [01;31moriginal.nii.gz[00m
│   │   │   ├── [01;31mrandomized_full.nii.gz[00m
│   │   │   ├── [01;31mrandomized_non_roi.nii.gz[00m
│   │   │   ├── [01;31mrandomized_r