In [1]:
# !pip install tciaclient pydicom matplotlib numpy
import os
import zipfile
from tqdm import tqdm
from pathlib import Path
from tciaclient.core import TCIAClient
import matplotlib.pyplot as plt
import pydicom
import numpy as np

client = TCIAClient()


In [None]:
# Set constants
COLLECTION = "4D-Lung"
DATA_DIR = Path("data/raw") / COLLECTION
DATA_DIR.mkdir(parents=True, exist_ok=True)

# Initialize TCIA API client
client = TCIAClient()

# List all patients in the collection
patients = client.get_patient(collection=COLLECTION)
print(f"Found {len(patients)} patients.")

# Optional: preview a few
for p in patients[:3]:
    print("-", p["PatientID"])

In [3]:
# Loop over all patients and download all CT series
for p in tqdm(patients, desc=f"Downloading {COLLECTION}"):
    patient_id = p["PatientID"]
    try:
        studies = client.get_patient_study(collection=COLLECTION, patientId=patient_id)
        for study in studies:
            study_uid = study["StudyInstanceUID"]
            series_list = client.get_series(collection=COLLECTION, studyInstanceUid=study_uid, modality="CT")

            for series in series_list:
                series_uid = series["SeriesInstanceUID"]
                out_dir = DATA_DIR / patient_id / series_uid
                zip_file = out_dir / "series.zip"

                if out_dir.exists() and any(out_dir.glob("*.dcm")):
                    continue  # already exists

                os.makedirs(out_dir, exist_ok=True)
                client.get_image(seriesInstanceUid=series_uid, downloadPath=out_dir, zipFileName="series.zip")

                with zipfile.ZipFile(zip_file, "r") as zip_ref:
                    zip_ref.extractall(out_dir)
                os.remove(zip_file)

    except Exception as e:
        print(f"[ERROR] {patient_id}: {e}")


Downloading RIDER Lung CT: 100%|██████████| 64/64 [4:22:25<00:00, 246.02s/it]  
