In [23]:
# %% [markdown]
# # LongiTumorSense Model Training
# **Training on MU-Glioma-Post Dataset**
# - Segmentation: nnUNet
# - Classification: 3D DenseNet
# - Survival: CoxPH Model

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import os
import shutil


raw_root = "/content/drive/My Drive/MU-Glioma-Post"


output_root = "/content/nnUNet_raw_data_base/Task001_MU-Glioma-Post"


imagesTr = os.path.join(output_root, "imagesTr")
labelsTr = os.path.join(output_root, "labelsTr")
os.makedirs(imagesTr, exist_ok=True)
os.makedirs(labelsTr, exist_ok=True)

print("Raw dataset path:", raw_root)
print("nnU-Net dataset path:", output_root)


progress_file = os.path.join(output_root, "converted_cases.txt")

if os.path.exists(progress_file):
    with open(progress_file, "r") as f:
        converted_cases = set(line.strip() for line in f)
else:
    converted_cases = set()
print(f"Found {len(converted_cases)} cases already processed.")

Raw dataset path: /content/drive/My Drive/MU-Glioma-Post
nnU-Net dataset path: /content/nnUNet_raw_data_base/Task001_MU-Glioma-Post
Found 0 cases already processed.


In [3]:
def is_nifti(fname):
  return fname.endswith(".nii") or fname.endswith(".nii.gz")

In [4]:
mod_priority=[
      't1c','t1gd','t1ce',  # contrast-enhanced T1 variants
    't1n','t1',           # native T1
    'flair','t2f','t2flair','t2w','t2' # T2 /flair variants
]

In [5]:
def file_priority(fname):
  lf=fname.lower()
  for i,k in enumerate(mod_priority):
    if k in lf:
      return i
  return len(mod_priority) + hash(lf) % 1000

In [6]:
import re
skipped = []
canonical_modalities = None
for patient_id in sorted(os.listdir(raw_root)):
    patient_path = os.path.join(raw_root, patient_id)
    if not os.path.isdir(patient_path):
        continue

    for tp in sorted(os.listdir(patient_path)):
        tp_path = os.path.join(patient_path, tp)
        if not os.path.isdir(tp_path):
            continue

        tp_clean = re.sub(r"\s+", "_", tp)
        tp_clean = re.sub(r"[^A-Za-z0-9_-]", "_", tp_clean)
        case_id = f"{patient_id}_{tp_clean}"


        if case_id in converted_cases:
            print(f"Skipping {case_id}, already processed.")
            continue

        files = [f for f in os.listdir(tp_path) if is_nifti(f)]
        if not files:
            skipped.append((patient_id, tp, "no nifti files"))
            continue

        label_candidates = [
            f for f in files if any(x in f.lower() for x in ["mask", "tumor", "seg", "label"])
        ]
        if len(label_candidates) == 0:
            skipped.append((patient_id, tp, "no label found"))
            continue

        label_file = label_candidates[0]
        image_files = [f for f in files if f != label_file]
        if len(image_files) == 0:
            skipped.append((patient_id, tp, "no image files"))
            continue

        image_files_sorted = sorted(image_files, key=file_priority)
        if canonical_modalities is None:
            canonical_modalities = image_files_sorted.copy()
            print("Detected modality order (from first sample)")
            for idx, nm in enumerate(canonical_modalities):
                print(f"{idx}:{nm}")
            print("If this order is wrong adjust mod_priority list in the script.")
        else:
            if len(image_files_sorted) != len(canonical_modalities):
                skipped.append(
                    (patient_id, tp, f"modality count mismatch: {len(image_files_sorted)} vs {len(canonical_modalities)}")
                )
                continue

        for i, fname in enumerate(image_files_sorted):
            src = os.path.join(tp_path, fname)
            destination = os.path.join(imagesTr, f"{case_id}_{i:04d}.nii.gz")
            shutil.copy(src, destination)

        shutil.copy2(
            os.path.join(tp_path, label_file),
            os.path.join(labelsTr, f"{case_id}.nii.gz")
        )


        converted_cases.add(case_id)
        with open(progress_file, "a") as f:
            f.write(case_id + "\n")

print(f"\nConversion finished. {len(converted_cases)} total cases processed so far.")

if skipped:
    print(f"{len(skipped)} timepoints skipped (see sample):")
    for s in skipped[:10]:
        print(" ", s)

print(
    f"imagesTr files: {len(os.listdir(imagesTr))}, labelsTr files: {len(os.listdir(labelsTr))}"
)


Detected modality order (from first sample)
0:PatientID_0003_Timepoint_1_brain_t1c.nii.gz
1:PatientID_0003_Timepoint_1_brain_t1n.nii.gz
2:PatientID_0003_Timepoint_1_brain_t2f.nii.gz
3:PatientID_0003_Timepoint_1_brain_t2w.nii.gz
If this order is wrong adjust mod_priority list in the script.

Conversion finished. 594 total cases processed so far.
2 timepoints skipped (see sample):
  ('PatientID_0187', 'Timepoint_3', 'no label found')
  ('PatientID_0191', 'Timepoint_1', 'no label found')
imagesTr files: 2376, labelsTr files: 594


In [7]:
import re
import os
import shutil
from tqdm import tqdm
canonical_modalities = None

skipped = []
new_cases_count = 0


total_timepoints = sum(
    1 for p in sorted(os.listdir(raw_root))
    if os.path.isdir(os.path.join(raw_root, p))
    for tp in sorted(os.listdir(os.path.join(raw_root, p)))
    if os.path.isdir(os.path.join(raw_root, p, tp))
)



with tqdm(total=total_timepoints, desc="Processing cases") as pbar:
    for patient_id in sorted(os.listdir(raw_root)):
        patient_path = os.path.join(raw_root, patient_id)
        if not os.path.isdir(patient_path):
            pbar.update(1)
            continue



        for tp in sorted(os.listdir(patient_path)):
            tp_path = os.path.join(patient_path, tp)
            if not os.path.isdir(tp_path):
                pbar.update(1)
                continue


            tp_clean = re.sub(r"\s+", "_", tp)
            tp_clean = re.sub(r"[^A-Za-z0-9_-]", "_", tp_clean)
            case_id = f"{patient_id}_{tp_clean}"


            if case_id in converted_cases:
                pbar.update(1)
                continue

            files = [f for f in os.listdir(tp_path) if is_nifti(f)]
            if not files:
                skipped.append((patient_id, tp, "no nifti files"))
                pbar.update(1)
                continue

            label_candidates = [f for f in files if any(x in f.lower() for x in ["mask", "tumor", "seg", "label"])]
            if len(label_candidates) == 0:
                skipped.append((patient_id, tp, "no label found"))
                pbar.update(1)
                continue


            label_file = label_candidates[0]

            image_files = [f for f in files if f != label_file]
            if len(image_files) == 0:
                skipped.append((patient_id, tp, "no image files"))
                pbar.update(1)
                continue

            image_files_sorted = sorted(image_files, key=file_priority)
            if canonical_modalities is None:
                canonical_modalities = image_files_sorted.copy()
                print("\nDetected modality order (from first sample)")
                for idx, nm in enumerate(canonical_modalities):
                    print(f"{idx}: {nm}")
                print("If this order is wrong adjust mod_priority list in the script.")

            else:
                if len(image_files_sorted) != len(canonical_modalities):
                    skipped.append(
                        (patient_id, tp, f"modality count mismatch {len(image_files_sorted)} vs {len(canonical_modalities)}")
                    )
                    pbar.update(1)
                    continue


            for i, fname in enumerate(image_files_sorted):
                src = os.path.join(tp_path, fname)
                destination = os.path.join(imagesTr, f"{case_id}_{i:04d}.nii.gz")
                shutil.copy(src, destination)

            shutil.copy2(os.path.join(tp_path, label_file), os.path.join(labelsTr, f"{case_id}.nii.gz"))

            # Save progress immediately
            converted_cases.add(case_id)
            with open(progress_file, "a") as f:
                f.write(case_id + "\n")

            new_cases_count += 1
            pbar.update(1)

print(f"\nConversion finished. {len(converted_cases)} total cases processed so far.")
if skipped:
    print(f"{len(skipped)} timepoints skipped (see sample):")
    for s in skipped[:10]:
        print(" ", s)

print(f"imagesTr files: {len(os.listdir(imagesTr))}, labelsTr files: {len(os.listdir(labelsTr))}")
print(f"Newly processed this run: {new_cases_count}")

Processing cases: 100%|██████████| 596/596 [00:00<00:00, 2379.86it/s]


Conversion finished. 594 total cases processed so far.
2 timepoints skipped (see sample):
  ('PatientID_0187', 'Timepoint_3', 'no label found')
  ('PatientID_0191', 'Timepoint_1', 'no label found')
imagesTr files: 2376, labelsTr files: 594
Newly processed this run: 0





In [19]:

PROGRESS_PATH = "/content/drive/MyDrive/nnUNet_progress"
os.makedirs(PROGRESS_PATH, exist_ok=True)

In [28]:
import re
import os
import shutil
from glob import glob
import json
from datetime import datetime

def process_dataset(raw_root, output_root, task_name="Task001_MU-Glioma-Post"):

    PROGRESS_FILE = f"{PROGRESS_PATH}/{task_name}_progress.json"
    CONVERTED_FILE = f"{output_root}/converted_cases.txt"


    if os.path.exists(PROGRESS_FILE):
        with open(PROGRESS_FILE) as f:
            progress = json.load(f)
        converted_cases = set(progress['converted_cases'])
        skipped = progress['skipped']
        print(f"Resuming with {len(converted_cases)} pre-processed cases")
    else:
        converted_cases = set()
        skipped = []


    imagesTr = os.path.join(output_root, "imagesTr")
    labelsTr = os.path.join(output_root, "labelsTr")
    os.makedirs(imagesTr, exist_ok=True)
    os.makedirs(labelsTr, exist_ok=True)


    def is_case_processed(case_id):
        """Check if files actually exist"""
        has_images = any(f.startswith(case_id) for f in os.listdir(imagesTr))
        has_label = os.path.exists(os.path.join(labelsTr, f"{case_id}.nii.gz"))
        return has_images and has_label

    for patient_id in sorted(os.listdir(raw_root)):
        patient_path = os.path.join(raw_root, patient_id)
        if not os.path.isdir(patient_path):
            continue

        for tp in sorted(os.listdir(patient_path)):
            tp_path = os.path.join(patient_path, tp)
            if not os.path.isdir(tp_path):
                continue


            tp_clean = re.sub(r"\s+", "_", tp)
            tp_clean = re.sub(r"[^A-Za-z0-9_-]", "_", tp_clean)
            case_id = f"{patient_id}_{tp_clean}"


            if case_id in converted_cases and is_case_processed(case_id):
                print(f"✓ {case_id} (already processed)")
                continue



            converted_cases.add(case_id)


            if len(converted_cases) % 5 == 0:
                with open(PROGRESS_FILE, 'w') as f:
                    json.dump({
                        'converted_cases': list(converted_cases),
                        'skipped': skipped,
                        'last_update': str(datetime.now())
                    }, f)
                print(f"Saved progress after {len(converted_cases)} cases")


    with open(PROGRESS_FILE, 'w') as f:
        json.dump({
            'converted_cases': list(converted_cases),
            'skipped': skipped
        }, f)

    print(f"Processing complete. Total cases: {len(converted_cases)}")

**After Disconnect:**

In [None]:
process_dataset(
    raw_root="/content/drive/My Drive/MU-Glioma-Post",
    output_root="/content/nnUNet_raw_data_base/Task001_MU-Glioma-Post"
)

In [None]:
!pip install monai torch torchvision nnunet pyradiomics lifelines pydicom nibabel -q

**Install a Python package directly from its GitHub source code, not from the normal package store (PyPI).”**

In [None]:
!pip install git+https://github.com/MIC-DKFZ/nnUNet.git

In [31]:
import nibabel as nib
import numpy  as np
from sklearn.model_selection import train_test_split

**This function loads an MRI file, converts it to a NumPy array, and scales all values to between 0 and 1 for easier analysis.**

In [32]:
import nibabel as nib
import numpy  as np

def load_and_preprocess(patient_path):
    img = nib.load(patient_path)
    data = img.get_fdata()
    data = (data - np.min(data)) / (np.max(data) - np.min(data))
    return data

In [None]:
/content/Task001_MU-Glioma-Post

In [33]:
import os
import json
import re
import shutil
from glob import glob


os.environ['nnUNet_raw_data_base'] = "/content/nnUNet_raw_data_base"
os.environ['nnUNet_preprocessed'] = "/content/nnUNet_preprocessed"
os.environ['RESULTS_FOLDER'] = "/content/nnUNet_results"

os.makedirs("/content/nnUNet_raw_data_base", exist_ok=True)
os.makedirs("/content/nnUNet_preprocessed", exist_ok=True)
os.makedirs("/content/nnUNet_results", exist_ok=True)


In [34]:
output_root = "/content/nnUNet_raw_data_base/Task001_MU-Glioma-Post"
imagesTr = os.path.join(output_root, "imagesTr")
labelsTr = os.path.join(output_root, "labelsTr")
os.makedirs(imagesTr, exist_ok=True)
os.makedirs(labelsTr, exist_ok=True)

In [81]:
from glob import glob
import re
import shutil
import os

for scan_path in glob("/content/Task001_MU-Glioma-Post/imagesTr/*.nii.gz"):

    filename = os.path.basename(scan_path)
    match = re.match(r"(PatientID_\d+)_Timepoint_(\d+)_(\d{4})\.nii\.gz", filename)

    if match:
        patient_id, timepoint, modality_idx = match.groups()
        case_id = f"{patient_id}_Timepoint_{timepoint}"


        shutil.copy(
            scan_path,
            os.path.join(imagesTr, f"{case_id}_{modality_idx}.nii.gz")
        )
for label_path in glob("/content/Task001_MU-Glioma-Post/labelsTr/*.nii.gz"):
    filename = os.path.basename(label_path)
    match = re.match(r"(PatientID_\d+)_Timepoint_(\d+)\.nii\.gz", filename)

    if match:
        patient_id, timepoint = match.groups()
        case_id = f"{patient_id}_Timepoint_{timepoint}"

        shutil.copy(
            label_path,
            os.path.join(labelsTr, f"{case_id}.nii.gz")
        )

# 3. Verify copies
print(f"Copied {len(os.listdir(imagesTr))} scans to {imagesTr}")
print(f"Copied {len(os.listdir(labelsTr))} labels to {labelsTr}")

Copied 2376 scans to /content/nnUNet_raw_data_base/Task001_MU-Glioma-Post/imagesTr
Copied 594 labels to /content/nnUNet_raw_data_base/Task001_MU-Glioma-Post/labelsTr


In [None]:

source_images = "/content/Task001_MU-Glioma-Post/imagesTr/PatientID_0003_Timepoint_1_0000.nii.gz"  # Scans
source_labels = "/content/Task001_MU-Glioma-Post/labelsTr/PatientID_0003_Timepoint_1.nii.gz"  # Labels

In [35]:
import os
import json
import re

# Paths
output_root = "/content/nnUNet_raw_data_base/Task001_MU-Glioma-Post"
imagesTr_path = os.path.join(output_root, "imagesTr")
labelsTr_path = os.path.join(output_root, "labelsTr")

# Count training cases
num_cases = len([f for f in os.listdir(labelsTr_path) if f.endswith(".nii.gz")])

# Detect modalities by looking at first case
first_case_files = sorted([f for f in os.listdir(imagesTr_path) if f.endswith(".nii.gz")])
modality_count = len(set([re.search(r'_(\d{4})\.nii\.gz$', f).group(1) for f in first_case_files]))

# Build dataset.json dictionary
dataset_json = {
    "name": "MU-Glioma-Post",
    "description": "Post-operative glioma segmentation",
    "reference": "Your reference here",
    "licence": "Your license here",
    "release": "1.0",
    "modality": {str(i): f"MRI_modality_{i}" for i in range(modality_count)},
    "labels": {
        "0": "background",
        "1": "tumor"
    },
    "numTraining": num_cases,
    "file_ending": ".nii.gz"
}


with open(os.path.join(output_root, "dataset.json"), 'w') as f:
    json.dump(dataset_json, f, indent=4)

print(f"dataset.json created at: {os.path.join(output_root, 'dataset.json')}")
print(json.dumps(dataset_json, indent=4))


dataset.json created at: /content/nnUNet_raw_data_base/Task001_MU-Glioma-Post/dataset.json
{
    "name": "MU-Glioma-Post",
    "description": "Post-operative glioma segmentation",
    "reference": "Your reference here",
    "licence": "Your license here",
    "release": "1.0",
    "modality": {
        "0": "MRI_modality_0",
        "1": "MRI_modality_1",
        "2": "MRI_modality_2",
        "3": "MRI_modality_3"
    },
    "labels": {
        "0": "background",
        "1": "tumor"
    },
    "numTraining": 594,
    "file_ending": ".nii.gz"
}


In [36]:
import torch
from monai.networks.nets import DenseNet