In [2]:
# !pip install tciaclient pydicom matplotlib numpy
import os
import zipfile
from tqdm import tqdm
from pathlib import Path
from tciaclient.core import TCIAClient
import matplotlib.pyplot as plt
import pydicom
import numpy as np

client = TCIAClient()


In [16]:
collections = client.get_collection_values()
MAX_PATIENTS = 3  # for fast scan

print(f"{'Idx':>4} | {'Collection':<35} | {'#Patients':>8} | {'~#Studies':>9}")
print("-" * 65)

for idx, c in enumerate(collections):
    col_name = c['Collection']
    try:
        patients = client.get_patient(collection=col_name)
        num_patients = len(patients)
        # Sample up to MAX_PATIENTS for study count
        sample_patients = patients[:MAX_PATIENTS]
        studies_per_patient = []
        for p in sample_patients:
            studies = client.get_patient_study(collection=col_name, patientId=p['PatientID'])
            studies_per_patient.append(len(studies))
        # Estimate total studies by average * num_patients
        avg_studies = sum(studies_per_patient) / max(len(studies_per_patient),1)
        est_total_studies = int(avg_studies * num_patients)
        print(f"{idx:4} | {col_name:<35} | {num_patients:8} | {est_total_studies:9}")
    except Exception as e:
        print(f"{idx:4} | {col_name:<35} | {'ERROR':>8} | {'ERROR':>9}")


 Idx | Collection                          | #Patients | ~#Studies
-----------------------------------------------------------------
   0 | 4D-Lung                             |       20 |       413
   1 | ACRIN-FLT-Breast                    |       83 |       221
   2 | ACRIN-NSCLC-FDG-PET                 |      242 |      1129
   3 | APOLLO                              |       32 |         0
   4 | Anti-PD-1_Lung                      |       46 |        76
   5 | BREAST-DIAGNOSIS                    |      138 |       644
   6 | Breast-MRI-NACT-Pilot               |      128 |       512
   7 | C4KC-KiTS                           |      210 |       210
   8 | CBIS-DDSM                           |     6671 |      6671
   9 | CC-Radiomics-Phantom                |       17 |        17
  10 | CC-Radiomics-Phantom-2              |      251 |       251
  11 | CC-Radiomics-Phantom-3              |       95 |        95
  12 | CPTAC-CCRCC                         |      130 |       433
  13 | CP

In [None]:
# Set constants
COLLECTION = "RIDER Lung CT"
DATA_DIR = Path("/mnt/tcia_data/raw") / COLLECTION
DATA_DIR.mkdir(parents=True, exist_ok=True)

# Initialize TCIA API client
client = TCIAClient()

# List all patients in the collection
patients = client.get_patient(collection=COLLECTION)
print(f"Found {len(patients)} patients.")

# Optional: preview a few
for p in patients[:3]:
    print("-", p["PatientID"])

Found 64 patients.
- RIDER-1129164940
- RIDER-1129164940
- RIDER-1225316081


In [12]:
# Pick first patient
PATIENT_ID = patients[1]["PatientID"]

# Get studies
studies = client.get_patient_study(collection=COLLECTION, patientId=PATIENT_ID)
print(f"Found {len(studies)} studies for patient {PATIENT_ID}")
for s in studies:
    print("-", s["StudyInstanceUID"])

# Get CT series from the first study
STUDY_UID = studies[0]["StudyInstanceUID"]
series_list = client.get_series(collection=COLLECTION, studyInstanceUid=STUDY_UID, modality="CT")
print(f"Found {len(series_list)} CT series in study {STUDY_UID}")

for s in series_list:
    desc = s.get("SeriesDescription", "[No Description]")
    print("-", desc, "| UID:", s["SeriesInstanceUID"])

Found 18 studies for patient RIDER-1129164940
- 1.3.6.1.4.1.9328.50.1.216116555221814778114703363464001196508
- 1.2.276.0.7230010.3.1.2.1565948079.2848.1345144152.475
- 1.2.276.0.7230010.3.1.2.505801377.3020.1345983492.611
- 1.2.276.0.7230010.3.1.2.1231426215.1548.1345734062.488
- 1.2.276.0.7230010.3.1.2.4148549909.1376.1344859631.573
- 1.2.276.0.7230010.3.1.2.1231426215.1056.1345551432.403
- 1.3.6.1.4.1.14519.5.2.1.295526028989915648257590762384148204592
- 1.3.6.1.4.1.14519.5.2.1.216887537006644446680797968072501840065
- 1.3.6.1.4.1.14519.5.2.1.143105562673914638086740723575109688216
- 1.3.6.1.4.1.9328.50.1.216116555221814778114703363464001196508
- 1.2.276.0.7230010.3.1.2.1565948079.2848.1345144152.475
- 1.2.276.0.7230010.3.1.2.505801377.3020.1345983492.611
- 1.2.276.0.7230010.3.1.2.1231426215.1548.1345734062.488
- 1.2.276.0.7230010.3.1.2.4148549909.1376.1344859631.573
- 1.2.276.0.7230010.3.1.2.1231426215.1056.1345551432.403
- 1.3.6.1.4.1.14519.5.2.1.2955260289899156482575907623841482

In [6]:
# Loop over all patients and download all CT series
for p in tqdm(patients, desc=f"Downloading {COLLECTION}"):
    patient_id = p["PatientID"]
    try:
        studies = client.get_patient_study(collection=COLLECTION, patientId=patient_id)
        for study in studies:
            study_uid = study["StudyInstanceUID"]
            series_list = client.get_series(collection=COLLECTION, studyInstanceUid=study_uid, modality="CT")

            for series in series_list:
                series_uid = series["SeriesInstanceUID"]
                out_dir = DATA_DIR / patient_id / series_uid
                zip_file = out_dir / "series.zip"

                if out_dir.exists() and any(out_dir.glob("*.dcm")):
                    continue  # already exists

                os.makedirs(out_dir, exist_ok=True)
                client.get_image(seriesInstanceUid=series_uid, downloadPath=out_dir, zipFileName="series.zip")

                with zipfile.ZipFile(zip_file, "r") as zip_ref:
                    zip_ref.extractall(out_dir)
                os.remove(zip_file)

    except Exception as e:
        print(f"[ERROR] {patient_id}: {e}")


Downloading 4D-Lung:   0%|          | 0/20 [03:34<?, ?it/s]


KeyboardInterrupt: 