In [None]:
# 1. Copy the entire code repository from our "code dataset"
#    This path matches your new setup.
!cp -r /kaggle/input/rsna2025/rsna2025 /kaggle/working/rsna_project

# 2. Move into our new project directory
%cd /kaggle/working/rsna_project

# 3. Install system dependencies (dcm2niix)
print("--- Installing dcm2niix ---")
!apt-get update
!apt-get install -y dcm2niix

# 4. Install all Python packages from the requirements file
print("\n--- Installing Python packages ---")
!pip install -r pip_packages/requirements.txt

# 5. Install the custom nnU-Net
print("\n--- Installing local nnU-Net ---")
!pip install -e ./nnUNet

print("\n✅ Setup complete! All code is local and all packages are installed.")

In [1]:
# We must be in this directory for the scripts to work
%cd /kaggle/working/rsna_project

/kaggle/working/rsna_project


In [15]:
import pandas as pd
import numpy as np

# --- Create the directory structure ---
!mkdir -p /workspace/data
!mkdir -p /workspace/data/nnUNet/nnUNet_raw
!mkdir -p /workspace/data/nnUNet/nnUNet_preprocessed
!mkdir -p /workspace/data/nnUNet/nnUNet_results

# --- Link Competition Data into /workspace/data ---
!ln -s /kaggle/input/rsna-intracranial-aneurysm-detection/series /workspace/data/series
!ln -s /kaggle/input/rsna-intracranial-aneurysm-detection/segmentations /workspace/data/segmentations
!ln -s /kaggle/input/rsna-intracranial-aneurysm-detection/train_localizers.csv /workspace/data/train_localizers.csv

# --- Copy the error file the script was looking for ---
!cp /kaggle/working/rsna_project/data/error_data.yaml /workspace/data/error_data.yaml

# --- Set the environment variables to this new location ---
%env nnUNet_raw="/workspace/data/nnUNet/nnUNet_raw"
%env nnUNet_preprocessed="/workspace/data/nnUNet/nnUNet_preprocessed"
%env nnUNet_results="/workspace/data/nnUNet/nnUNet_results"

print("✅ Fixed environment! '/workspace/data' is now set up.")


env: nnUNet_raw="/workspace/data/nnUNet/nnUNet_raw"
env: nnUNet_preprocessed="/workspace/data/nnUNet/nnUNet_preprocessed"
env: nnUNet_results="/workspace/data/nnUNet/nnUNet_results"
✅ Fixed environment! '/workspace/data' is now set up.


In [16]:
import pandas as pd
import numpy as np

# 1. Load the REAL training file from the competition data
full_train_df = pd.read_csv('/kaggle/input/rsna-intracranial-aneurysm-detection/train.csv')

# 2. Get a list of all unique scan IDs
all_scan_ids = full_train_df['SeriesInstanceUID'].unique()
print(f"Total scans in dataset: {len(all_scan_ids)}")

# 3. Select a small sample of scans (e.g., 100)
np.random.seed(42) # for reproducible results
sample_scan_ids = np.random.choice(all_scan_ids, 100, replace=False)

# 4. Filter the main dataframe to ONLY include these 100 scans
sample_df = full_train_df[full_train_df['SeriesInstanceUID'].isin(sample_scan_ids)]

# 5. Save this SMALL dataframe to the location the code expects
sample_df.to_csv('/workspace/data/train.csv', index=False)

print(f"✅ Created a sample 'train.csv' with {len(sample_scan_ids)} scans at /workspace/data/.")

Total scans in dataset: 4348
✅ Created a sample 'train.csv' with 5 scans at /workspace/data/.


In [4]:
pd.read_csv('/kaggle/input/rsna-intracranial-aneurysm-detection/train.csv')

Unnamed: 0,SeriesInstanceUID,PatientAge,PatientSex,Modality,Left Infraclinoid Internal Carotid Artery,Right Infraclinoid Internal Carotid Artery,Left Supraclinoid Internal Carotid Artery,Right Supraclinoid Internal Carotid Artery,Left Middle Cerebral Artery,Right Middle Cerebral Artery,Anterior Communicating Artery,Left Anterior Cerebral Artery,Right Anterior Cerebral Artery,Left Posterior Communicating Artery,Right Posterior Communicating Artery,Basilar Tip,Other Posterior Circulation,Aneurysm Present
0,1.2.826.0.1.3680043.8.498.10004044428023505108...,64,Female,MRA,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1.2.826.0.1.3680043.8.498.10004684224894397679...,76,Female,MRA,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1.2.826.0.1.3680043.8.498.10005158603912009425...,58,Male,CTA,0,0,0,0,0,0,0,0,0,0,0,0,1,1
3,1.2.826.0.1.3680043.8.498.10009383108068795488...,71,Male,MRA,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1.2.826.0.1.3680043.8.498.10012790035410518400...,48,Female,MRA,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4343,1.2.826.0.1.3680043.8.498.99915610493694667606...,62,Female,MRI T1post,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4344,1.2.826.0.1.3680043.8.498.99920680741054836990...,76,Female,MRA,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4345,1.2.826.0.1.3680043.8.498.99953513260518059135...,44,Female,CTA,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4346,1.2.826.0.1.3680043.8.498.99982144859397209076...,58,Female,MRI T2,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [5]:
!ls -R /workspace

ls: cannot access '/workspace': No such file or directory


In [None]:
import SimpleITK as sitk
import pydicom
import joblib
import subprocess
from tqdm import tqdm
from pathlib import Path
import numpy as np
import pandas as pd
import os
import json

# --- 0. SETUP & SANITY CHECKS ---
# Ensure we are in the correct directory
%cd /kaggle/working/rsna_project
print(f"Current directory: {os.getcwd()}")

# Force-set environment variables again
os.environ['nnUNet_raw'] = "/workspace/data/nnUNet/nnUNet_raw"
os.environ['nnUNet_preprocessed'] = "/workspace/data/nnUNet/nnUNet_preprocessed"
os.environ['nnUNet_results'] = "/workspace/data/nnUNet/nnUNet_results"
print("✅ Environment variables set.")

# --- 1/8: DICOM CONVERSION (ROBUST VERSION) ---
print("\n--- 1/8: Converting DICOM to NIfTI ---")

# 1. Define the config 
class CFG:
    img_dir = Path("/workspace/data/series")
    out_dir = Path("/workspace/data/series_niix")
    csv_dir = Path("/workspace/data")
    error_dir = Path("/workspace/data/error_data")
    num_workers = os.cpu_count()
    
# 2. Define the conversion functions 
def dcm2niix(src_path: Path, dst_path: Path, series_id: str) -> int:
    dst_path.mkdir(parents=True, exist_ok=True)
    cmd = ["dcm2niix", "-o", str(dst_path), "-f", f"{series_id}", "-z", "y", "-b", "n", "-m", "2", str(src_path)]
    try:
        subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=300)
    except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
        return 1
    return 0

def run_dcm2niix(series_ids: list[str]):
    print(f"Running dcm2niix for {len(series_ids)} series...")
    results = joblib.Parallel(n_jobs=CFG.num_workers)(
        joblib.delayed(dcm2niix)(
            src_path=CFG.img_dir / series_id,
            dst_path=CFG.out_dir / series_id,
            series_id=series_id,
        )
        for series_id in tqdm(series_ids, desc="Converting")
    )
    success_count = sum(1 for res in results if res == 0)
    print(f"Done: {success_count} success, {len(results) - success_count} failure")

# 3. Run the conversion on OUR 100 scans
try:
    sample_df = pd.read_csv('/workspace/data/train.csv')
    series_ids_to_process = sample_df['SeriesInstanceUID'].tolist()
    print(f"Found {len(series_ids_to_process)} scans to process.")
    run_dcm2niix(series_ids_to_process)
    
    print("\n✅ DICOM conversion finished.")
except Exception as e:
    print(f"❌ An error occurred during conversion: {e}")

# --- 2/8: Moving error data ---
print("\n--- 2/8: Moving error data ---")
!python src/my_utils/move_error_data.py

# --- 3/8: Creating nnU-Net Datasets ---
print("\n--- 3/8: Creating nnU-Net Dataset 001 & 003 ---")
!python src/nnUnet_utils/create_nnunet_dataset.py
!python src/nnUnet_utils/create_nnunet_dataset.py --dataset-id 3

# --- 4/8: Planning & Preprocessing Dataset 001 (RAM FIX) ---
print("\n--- 4/8: Planning and preprocessing Dataset 001 (3d_fullres only) ---")
# We use -c 3d_fullres to skip the 2d config that crashes RAM
!nnUNetv2_plan_and_preprocess -d 1 --verify_dataset_integrity -pl nnUNetPlannerResEncM -c 3d_fullres -np 2
print("✅ Preprocessing for Dataset 001 complete.")

/kaggle/working/rsna_project
Current directory: /kaggle/working/rsna_project
✅ Environment variables set.

--- 1/8: Converting DICOM to NIfTI ---
Found 5 scans to process.
Running dcm2niix for 5 series...


Converting: 100%|██████████| 5/5 [00:00<00:00, 2944.61it/s]


Done: 4 success, 1 failure

✅ DICOM conversion finished.

--- 2/8: Moving error data ---
2025-11-14 20:32:24,499 - INFO - Error series count: 57
2025-11-14 20:32:24,499 - INFO - Backup root: /workspace/data/series_niix_error_data_backup
Moving error data: 100%|█████████████████████| 57/57 [00:00<00:00, 12679.68it/s]
2025-11-14 20:32:24,514 - INFO - Moved: 57
2025-11-14 20:32:24,514 - INFO - Not found: 0
2025-11-14 20:32:24,515 - INFO - Backup verified: 57/57

Finished moving error data
Total targets: 57
Moved: 57
Not found: 0
Backup root: /workspace/data/series_niix_error_data_backup

--- 3/8: Creating nnU-Net Dataset 001 & 003 ---
Using dataset ID 1 (binarization: Disabled / class remapping: Disabled)
Output directory: /workspace/data/nnUNet/nnUNet_raw/Dataset001_VesselSegmentation
Number of cases to process: 178
Processing: 100%|█████████████████████████████| 178/178 [25:00<00:00,  8.43s/it]

Done:
  Success: 178/178 cases
  Modality distribution: {'CT': 74, 'MR': 103, 'Unknown': 1}


In [None]:
# --- 5/8: Planning & Preprocessing Dataset 003 (CORRECT COMMAND) ---
print("\n--- 5/8: Planning and preprocessing Dataset 003 ---")
# This is the correct command to create the '...ForcedLowres' plan
!nnUNetv2_plan_and_preprocess -d 3 --verify_dataset_integrity -pl nnUNetPlannerResEncMForcedLowres -overwrite_target_spacing 1.0 1.0 1.0 -c 3d_fullres -np 2
print("✅ Planning and Preprocessing for Dataset 003 complete.")

# --- 6/8: JSON Patch (CORRECT FILE NAME) ---
print("\n--- 6/8: Re-running JSON patch with correct file name ---")
plan_file = "/workspace/data/nnUNet/nnUNet_preprocessed/Dataset003_VesselGrouping/nnUNetResEncUNetMPlans_ForcedLowres.json"

try:
    with open(plan_file, 'r') as f: data = json.load(f)
    
    data['plans_per_stage'][0]['patch_size'] = [64, 64, 64]
    
    with open(plan_file, 'w') as f: json.dump(data, f, indent=4)
    print("✅ Patched nnUNet plans.json successfully.")
    
    # 6.2 Re-preprocess Dataset 003 with the new patch size
    print("\n--- 6.2: Re-preprocessing Dataset 003 with new patch size ---")
    !nnUNetv2_preprocess -d 3 -plans_name nnUNetResEncUNetMPlans_ForcedLowres -c 3d_fullres -np 2
    print("✅ Re-preprocessing for Dataset 003 complete.")
except Exception as e:
    print(f"❌ Error patching JSON. The file '{plan_file}' was not found. {e}")

# --- 7/8 & 8/8: Final Steps ---
print("\n--- 7/8: Creating inference dataset ---")
!python src/nnUnet_utils/create_nnunet_inference_dataset.py
print("\n--- 8/8: Creating empty classifier dataset ---")
!mkdir -p /workspace/data/clf_data/imagesTr
!mkdir -p /workspace/data/clf_data/labelsTr

print("\n\n✅✅✅ Final Preprocessing Complete! ✅✅✅")

In [None]:
# Add the local user 'bin' directory to the system PATH
# This is where pip installs the nnU-Net executables
%env PATH=$PATH:/root/.local/bin

In [None]:
# --- 4/8: Planning & Preprocessing Dataset 001 (RAM FIX) ---
print("\n--- 4/8: Planning and preprocessing Dataset 001 (3d_fullres only) ---")
# We use -c 3d_fullres to skip the 2d config that crashes RAM
!nnUNetv2_plan_and_preprocess -d 1 --verify_dataset_integrity -pl nnUNetPlannerResEncM -c 3d_fullres -np 2
print("✅ Preprocessing for Dataset 001 complete.")

# --- 5/8: Planning & Preprocessing Dataset 003 (CORRECT COMMAND) ---
print("\n--- 5/8: Planning and preprocessing Dataset 003 ---")
# This is the correct command to create the '...ForcedLowres' plan
!nnUNetv2_plan_and_preprocess -d 3 --verify_dataset_integrity -pl nnUNetPlannerResEncMForcedLowres -overwrite_target_spacing 1.0 1.0 1.0 -c 3d_fullres -np 2
print("✅ Planning and Preprocessing for Dataset 003 complete.")

# --- 6/8: JSON Patch (CORRECT FILE NAME) ---
print("\n--- 6/8: Re-running JSON patch with correct file name ---")
plan_file = "/workspace/data/nnUNet/nnUNet_preprocessed/Dataset003_VesselGrouping/nnUNetResEncUNetMPlans_ForcedLowres.json"

try:
    with open(plan_file, 'r') as f: data = json.load(f)
    
    data['plans_per_stage'][0]['patch_size'] = [64, 64, 64]
    
    with open(plan_file, 'w') as f: json.dump(data, f, indent=4)
    print("✅ Patched nnUNet plans.json successfully.")
    
    # 6.2 Re-preprocess Dataset 003 with the new patch size
    print("\n--- 6.2: Re-preprocessing Dataset 003 with new patch size ---")
    !nnUNetv2_preprocess -d 3 -plans_name nnUNetResEncUNetMPlans_ForcedLowres -c 3d_fullres -np 2
    print("✅ Re-preprocessing for Dataset 003 complete.")
except Exception as e:
    print(f"❌ Error patching JSON. The file '{plan_file}' was not found. {e}")

# --- 7/8 & 8/8: Final Steps ---
print("\n--- 7/8: Creating inference dataset ---")
!python src/nnUnet_utils/create_nnunet_inference_dataset.py
print("\n--- 8/8: Creating empty classifier dataset ---")
!mkdir -p /workspace/data/clf_data/imagesTr
!mkdir -p /workspace/data/clf_data/labelsTr

print("\n\n✅✅✅ Final Preprocessing Complete! ✅✅✅")

In [None]:
# --- STAGE 1: TRAINING THE nnU-Net SEGMENTATION MODELS ---
# We are training only fold 0, for 15 epochs (which you set manually)

print("--- 1/4: Training Stage 1: Vessel Segmentation (Dataset 001, Fold 0) ---")
!nnUNetv2_train 1 3d_fullres 0 -p nnUNetPlannerResEncM -tr nnUNetTrainerSkeletonRecall_more_DAv3

print("\n--- 2/4: Training Stage 1: Vessel Grouping (Dataset 003, Fold 0) ---")
!nnUNetv2_train 3 3d_fullres 0 -p nnUNetResEncUNetMPlans_ForcedLowres -tr RSNA2025Trainer_moreDAv7

# --- STAGE 1: INFERENCE ---
# Now we use the models we just trained to predict on our 100 scans.
# This creates the ROI (Region of Interest) inputs for the next stage.
print("\n--- 3/4: Running Stage 1 Inference (Creating ROIs) ---")
!python src/my_utils/vessel_segmentation.py

# --- STAGE 2: TRAINING THE CLASSIFIER MODEL ---
# This trains the final model. We override the epochs on the command line.
# trainer.max_epochs=5 tells it to only train for 5 epochs (fast).
print("\n--- 4/4: Training Stage 2: ROI Classifier (Fold 0, 5 Epochs) ---")
!python src/train.py \
  experiment=251013-seg_tf-v4-nnunet_truncate1-preV6_1-ex_dav6w3-m32g64-e25-w01_005_1-s128_256_256 \
  data.fold=0 \
  trainer.max_epochs=5

print("\n\n✅✅✅ Full Training Pipeline Complete! ✅✅✅")

In [None]:
# --- 1/8: DICOM CONVERSION (ROBUST VERSION) ---
# We will define the necessary functions right here in the notebook
# to bypass all import/patching errors.

import SimpleITK as sitk
import pydicom
import joblib
import subprocess
from tqdm import tqdm
from pathlib import Path
import numpy as np
import pandas as pd
import os

print("--- 1/8: Converting DICOM to NIfTI ---")

# 1. Define the config (copied from the script)
class CFG:
    img_dir = Path("/workspace/data/series")
    out_dir = Path("/workspace/data/series_niix")
    csv_dir = Path("/workspace/data")
    error_dir = Path("/workspace/data/error_data")
    num_workers = os.cpu_count()
    
# 2. Define the conversion functions (copied from the script)
def dcm2niix(
    src_path: Path,
    dst_path: Path,
    series_id: str,
) -> int:
    """Run dcm2niix"""
    dst_path.mkdir(parents=True, exist_ok=True)
    
    # Command to run dcm2niix
    cmd = [
        "dcm2niix",
        "-o", str(dst_path),
        "-f", f"{series_id}",
        "-z", "y",
        "-b", "n",
        "-m", "2",
        str(src_path),
    ]
    
    try:
        # Run the command
        subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=300)
    except subprocess.CalledProcessError as e:
        # print(f"Error running dcm2niix for {series_id}: {e.stderr}")
        return 1  # Error
    except subprocess.TimeoutExpired:
        # print(f"Timeout running dcm2niix for {series_id}")
        return 1  # Error
    return 0  # Success

def run_dcm2niix(series_ids: list[str]):
    """Run dcm2niix in parallel"""
    print(f"Running dcm2niix for {len(series_ids)} series...")
    
    # Run in parallel
    results = joblib.Parallel(n_jobs=CFG.num_workers)(
        joblib.delayed(dcm2niix)(
            src_path=CFG.img_dir / series_id,
            dst_path=CFG.out_dir / series_id,
            series_id=series_id,
        )
        for series_id in tqdm(series_ids, desc="Converting")
    )
    
    # Count successes and failures
    success_count = sum(1 for res in results if res == 0)
    failure_count = len(results) - success_count
    print(f"Done: {success_count} success, {failure_count} failure")

# 3. Run the conversion on OUR 100 scans
try:
    # Load our 100-scan sample file
    sample_df = pd.read_csv('/workspace/data/train.csv')
    
    # Get the list of 100 SeriesInstanceUIDs
    series_ids_to_process = sample_df['SeriesInstanceUID'].tolist()
    
    print(f"Found {len(series_ids_to_process)} scans to process.")
    
    # Run the conversion
    run_dcm2niix(series_ids_to_process)
    
    print("\n✅ DICOM conversion finished.")

except Exception as e:
    print(f"❌ An error occurred during conversion: {e}")
    print("Please check that Step 4 ran correctly.")


# --- THE REST OF THE PIPELINE ---
# The following steps are identical to before and will now work.

# 2. Move any error data
print("\n--- 2/8: Moving error data ---")
!python src/my_utils/move_error_data.py

# 3. Create nnU-Net training dataset 1 (VesselSegmentation)
print("\n--- 3/8: Creating nnU-Net Dataset 001 ---")
!python src/nnUnet_utils/create_nnunet_dataset.py
# Create dataset 3 (VesselGrouping)
!python src/nnUnet_utils/create_nnunet_dataset.py --dataset-id 3

# 4. Run nnU-Net planning and preprocessing for Dataset 1
print("\n--- 4/8: Planning and preprocessing Dataset 001 ---")
!nnUNetv2_plan_and_preprocess -d 1 --verify_dataset_integrity -pl nnUNetPlannerResEncM

# 5. Run nnU-Net planning and preprocessing for Dataset 3
print("\n--- 5/8: Planning and preprocessing Dataset 003 ---")
!nnUNetv2_plan_and_preprocess -d 3 --verify_dataset_integrity -pl nnUNetPlannerResEncMForcedLowres -overwrite_target_spacing 1.0 1.0 1.0 -c 3d_fullres

# 6. Manually set patch size and re-preprocess (as per README)
print("\n--- 6/8: Patching JSON and re-preprocessing Dataset 003 ---")
import json
plan_file = "/workspace/data/nnUNet/nnUNet_preprocessed/Dataset003_VesselGrouping/nnUNetResEncUNetMPlans_ForcedLowres.json"

try:
    with open(plan_file, 'r') as f:
        data = json.load(f)
    
    # Set the patch size as specified by the winner
    data['plans_per_stage'][0]['patch_size'] = [128, 128, 128]
    
    with open(plan_file, 'w') as f:
        json.dump(data, f, indent=4)
    
    print("✅ Patched nnUNet plans.json successfully.")
    
    # 6.2 Re-preprocess
    !nnUNetv2_preprocess -d 3 -plans_name nnUNetResEncUNetMPlans_ForcedLowres -c 3d_fullres

except Exception as e:
    print(f"Error patching JSON. You may need to do this manually: {e}")


# 7. Create inference set
print("\n--- 7/8: Creating inference dataset ---")
!python src/nnUnet_utils/create_nnunet_inference_dataset.py

# 8. Create empty dataset for classifier (this is a small hack, but needed by the scripts)
print("\n--- 8/8: Creating empty classifier dataset ---")
!mkdir -p /workspace/data/clf_data/imagesTr
!mkdir -p /workspace/data/clf_data/labelsTr

print("\n\n✅✅✅ Preprocessing complete! ✅✅✅")

In [None]:
# --- 0. FORCE-SET ENVIRONMENT VARIABLES ---
# We do this again to be 100% safe
print("--- Force-setting nnU-Net Environment Variables ---")
import os
os.environ['nnUNet_raw'] = "/workspace/data/nnUNet/nnUNet_raw"
os.environ['nnUNet_preprocessed'] = "/workspace/data/nnUNet/nnUNet_preprocessed"
os.environ['nnUNet_results'] = "/workspace/data/nnUNet/nnUNet_results"
print("✅ Environment variables re-set.")

# --- 4/8: RE-RUNNING FAILED STEP 4 (with RAM fix) ---
print("\n--- 4/8: Re-running preprocessing for Dataset 001 (3d_fullres only) ---")
# This time we manually tell it to ONLY process the '3d_fullres' config,
# skipping the '2d' config that crashed the notebook.
!nnUNetv2_preprocess -d 1 -c 3d_fullres -pl nnUNetPlannerResEncM -np 2

# --- 5/8: 
# This is the CORRECT command from the original 60-min cell
# It will create the '...ForcedLowres.json' file we need
!nnUNetv2_plan_and_preprocess -d 3 --verify_dataset_integrity -pl nnUNetPlannerResEncMForcedLowres -overwrite_target_spacing 1.0 1.0 1.0 -c 3d_fullres -np 2

# --- 6/8: RE-RUNNING FAILED STEP 6 (with correct file name) ---
print("\n--- 6/8: Re-running JSON patch with correct file name ---")
import json

# This is the CORRECT file name from your log
plan_file = "/workspace/data/nnUNet/nnUNet_preprocessed/Dataset003_VesselGrouping/nnUNetResEncUNetMPlans.json"

try:
    with open(plan_file, 'r') as f:
        data = json.load(f)
    
    # Set the patch size as specified by the winner
    data['plans_per_stage'][0]['patch_size'] = [128, 128, 128]
    
    with open(plan_file, 'w') as f:
        json.dump(data, f, indent=4)
    
    print("✅ Patched nnUNet plans.json successfully.")
    
    # --- 6.2: Re-preprocess Dataset 003 with the new patch size ---
    print("\n--- 6.2: Re-preprocessing Dataset 003 with new patch size ---")
    # We must run this again so the new patch size is used.
    !nnUNetv2_preprocess -d 3 -plans_name nnUNetResEncUNetMPlans -c 3d_fullres -np 2
    print("✅ Re-preprocessing for Dataset 003 complete.")

except Exception as e:
    print(f"❌ Error patching JSON. This means the steps above failed again: {e}")

# 7. Create inference set
print("\n--- 7/8: Creating inference dataset ---")
!python src/nnUnet_utils/create_nnunet_inference_dataset.py

# 8. Create empty dataset for classifier (this is a small hack, but needed by the scripts)
print("\n--- 8/8: Creating empty classifier dataset ---")
!mkdir -p /workspace/data/clf_data/imagesTr
!mkdir -p /workspace/data/clf_data/labelsTr

print("\n\n✅✅✅ Final Preprocessing Complete! ✅✅✅")

In [None]:
# --- 0. FORCE-SET ENVIRONMENT VARIABLES ---
# This is the fix. We set the variables again in this new cell
# so the following shell commands can find them.
print("--- Force-setting nnU-Net Environment Variables ---")
import os
os.environ['nnUNet_raw'] = "/workspace/data/nnUNet/nnUNet_raw"
os.environ['nnUNet_preprocessed'] = "/workspace/data/nnUNet/nnUNet_preprocessed"
os.environ['nnUNet_results'] = "/workspace/data/nnUNet/nnUNet_results"

# Also export them for the shell commands
!export nnUNet_raw="/workspace/data/nnUNet/nnUNet_raw"
!export nnUNet_preprocessed="/workspace/data/nnUNet/nnUNet_preprocessed"
!export nnUNet_results="/workspace/data/nnUNet/nnUNet_results"
print("✅ Environment variables re-set.")

# Let's verify the datasets were created
print("\n--- Verifying dataset locations ---")
!ls -l /workspace/data/nnUNet/nnUNet_raw/
print("---------------------------------")


# --- 4/8: RE-RUNNING FAILED STEP 4 ---
print("\n--- 4/8: Re-running planning and preprocessing Dataset 001 ---")
!nnUNetv2_plan_and_preprocess -d 1 --verify_dataset_integrity -pl nnUNetPlannerResEncM

# --- 5/8: RE-RUNNING FAILED STEP 5 ---
print("\n--- 5/8: Re-running planning and preprocessing Dataset 003 ---")
!nnUNetv2_plan_and_preprocess -d 3 --verify_dataset_integrity -pl nnUNetPlannerResEncMForcedLowres -overwrite_target_spacing 1.0 1.0 1.0 -c 3d_fullres

# --- 6/8: RE-RUNNING FAILED STEP 6 ---
print("\n--- 6/8: Re-running JSON patch and re-preprocessing ---")
import json
plan_file = "/workspace/data/nnUNet/nnUNet_preprocessed/Dataset003_VesselGrouping/nnUNetResEncUNetMPlans_ForcedLowres.json"

try:
    with open(plan_file, 'r') as f:
        data = json.load(f)
    
    # Set the patch size as specified by the winner
    data['plans_per_stage'][0]['patch_size'] = [128, 128, 128]
    
    with open(plan_file, 'w') as f:
        json.dump(data, f, indent=4)
    
    print("✅ Patched nnUNet plans.json successfully.")
    
    # 6.2 Re-preprocess
    !nnUNetv2_preprocess -d 3 -plans_name nnUNetResEncUNetMPlans_ForcedLowres -c 3d_fullres
    print("✅ Re-preprocessing for Dataset 003 complete.")

except Exception as e:
    print(f"❌ Error patching JSON. This means the steps above failed again: {e}")

print("\n\n✅✅✅ Preprocessing Fix Complete! ✅✅✅")

In [None]:
# --- 0. FORCE-SET ENVIRONMENT VARIABLES ---
# We do this again to be 100% safe
print("--- Force-setting nnU-Net Environment Variables ---")
import os
os.environ['nnUNet_raw'] = "/workspace/data/nnUNet/nnUNet_raw"
os.environ['nnUNet_preprocessed'] = "/workspace/data/nnUNet/nnUNet_preprocessed"
os.environ['nnUNet_results'] = "/workspace/data/nnUNet/nnUNet_results"
print("✅ Environment variables re-set.")

# --- 4/8: RE-RUNNING FAILED STEP 4 (with RAM fix) ---
print("\n--- 4/8: Re-running planning and preprocessing Dataset 001 ---")
# We add -np 2 to use only 2 CPU cores, preventing the RAM crash.
!nnUNetv2_plan_and_preprocess -d 1 --verify_dataset_integrity -pl nnUNetPlannerResEncM -np 2

# --- 5/8: RE-RUNNING FAILED STEP 5 (with correct command) ---
print("\n--- 5/8: Re-running planning and preprocessing Dataset 003 ---")
# This is the CORRECT command from the original 60-min cell
# It will create the '...ForcedLowres.json' file we need
!nnUNetv2_plan_and_preprocess -d 3 --verify_dataset_integrity -pl nnUNetPlannerResEncMForcedLowres -overwrite_target_spacing 1.0 1.0 1.0 -c 3d_fullres -np 2

# --- 6/8: RE-RUNNING FAILED STEP 6 (will now work) ---
print("\n--- 6/8: Re-running JSON patch and re-preprocessing ---")
import json
# This file will now exist
plan_file = "/workspace/data/nnUNet/nnUNet_preprocessed/Dataset003_VesselGrouping/nnUNetResEncUNetMPlans_ForcedLowres.json"

try:
    with open(plan_file, 'r') as f:
        data = json.load(f)
    
    # Set the patch size as specified by the winner
    data['plans_per_stage'][0]['patch_size'] = [128, 128, 128]
    
    with open(plan_file, 'w') as f:
        json.dump(data, f, indent=4)
    
    print("✅ Patched nnUNet plans.json successfully.")
    
    # 6.2 Re-preprocess
    !nnUNetv2_preprocess -d 3 -plans_name nnUNetResEncUNetMPlans_ForcedLowres -c 3d_fullres -np 2
    print("✅ Re-preprocessing for Dataset 003 complete.")

except Exception as e:
    print(f"❌ Error patching JSON. This means the steps above failed again: {e}")

print("\n\n✅✅✅ Preprocessing Fix Complete! ✅✅✅")