In [None]:
# 1. Copy the entire code repository from our "code dataset"
#    This path matches your new setup.
!cp -r /kaggle/input/rsna2025/rsna2025 /kaggle/working/rsna_project

# 2. Move into our new project directory
%cd /kaggle/working/rsna_project

# 3. Install system dependencies (dcm2niix)
print("--- Installing dcm2niix ---")
!apt-get update
!apt-get install -y dcm2niix

# 4. Install all Python packages from the requirements file
print("\n--- Installing Python packages ---")
!pip install -r pip_packages/requirements.txt

# 5. Install the custom nnU-Net
print("\n--- Installing local nnU-Net ---")
!pip install -e ./nnUNet

print("\n✅ Setup complete! All code is local and all packages are installed.")

/kaggle/working/rsna_project
--- Installing dcm2niix ---
Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]      
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease                         
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]        
Get:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]           
Get:7 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [2,138 kB]
Get:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]      
Get:9 https://r2u.stat.illinois.edu/ubuntu jammy/main amd64 Packages [2,825 kB]
Get:10 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]
Get:11 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,289

In [None]:
# We must be in this directory for the scripts to work
%cd /kaggle/working/rsna_project

In [None]:
import pandas as pd
import numpy as np

# --- Create the directory structure the winner's hardcoded scripts expect ---
!mkdir -p /workspace/data
!mkdir -p /workspace/data/nnUNet/nnUNet_raw
!mkdir -p /workspace/data/nnUNet/nnUNet_preprocessed
!mkdir -p /workspace/data/nnUNet/nnUNet_results

# --- Link Competition Data into /workspace/data ---
!ln -s /kaggle/input/rsna-intracranial-aneurysm-detection/series /workspace/data/series
!ln -s /kaggle/input/rsna-intracranial-aneurysm-detection/segmentations /workspace/data/segmentations
!ln -s /kaggle/input/rsna-intracranial-aneurysm-detection/train_localizers.csv /workspace/data/train_localizers.csv

# --- Copy the error file the script was looking for ---
!cp /kaggle/working/rsna_project/data/error_data.yaml /workspace/data/error_data.yaml

# --- Set the environment variables to this new location ---
%env nnUNet_raw="/workspace/data/nnUNet/nnUNet_raw"
%env nnUNet_preprocessed="/workspace/data/nnUNet/nnUNet_preprocessed"
%env nnUNet_results="/workspace/data/nnUNet/nnUNet_results"

print("✅ Fixed environment! '/workspace/data' is now set up.")

In [None]:
import pandas as pd
import numpy as np

# 1. Load the REAL training file from the competition data
full_train_df = pd.read_csv('/kaggle/input/rsna-intracranial-aneurysm-detection/train.csv')

# 2. Get a list of all unique scan IDs
all_scan_ids = full_train_df['SeriesInstanceUID'].unique()
print(f"Total scans in dataset: {len(all_scan_ids)}")

# 3. Select a small sample of scans (e.g., 50)
np.random.seed(42) # for reproducible results
sample_scan_ids = np.random.choice(all_scan_ids, 50, replace=False)

# 4. Filter the main dataframe to ONLY include these 50 scans
sample_df = full_train_df[full_train_df['SeriesInstanceUID'].isin(sample_scan_ids)]

# 5. Save this SMALL dataframe to the location the code expects
sample_df.to_csv('/workspace/data/train.csv', index=False)
print(f"✅ Created a sample 'train.csv' with {len(sample_scan_ids)} scans at /workspace/data/.")

In [None]:
import SimpleITK as sitk
import pydicom
import joblib
import subprocess
from tqdm import tqdm
from pathlib import Path
import numpy as np
import pandas as pd
import os
import json

# --- 0. SETUP & SANITY CHECKS ---
# Ensure we are in the correct directory
%cd /kaggle/working/rsna_project
print(f"Current directory: {os.getcwd()}")

# Force-set environment variables again
os.environ['nnUNet_raw'] = "/workspace/data/nnUNet/nnUNet_raw"
os.environ['nnUNet_preprocessed'] = "/workspace/data/nnUNet/nnUNet_preprocessed"
os.environ['nnUNet_results'] = "/workspace/data/nnUNet/nnUNet_results"
print("✅ Environment variables set.")

# --- 1/8: DICOM CONVERSION (ROBUST VERSION) ---
print("\n--- 1/8: Converting DICOM to NIfTI ---")

# 1. Define the config 
class CFG:
    img_dir = Path("/workspace/data/series")
    out_dir = Path("/workspace/data/series_niix")
    csv_dir = Path("/workspace/data")
    error_dir = Path("/workspace/data/error_data")
    num_workers = os.cpu_count()
    
# 2. Define the conversion functions 
def dcm2niix(src_path: Path, dst_path: Path, series_id: str) -> int:
    dst_path.mkdir(parents=True, exist_ok=True)
    cmd = ["dcm2niix", "-o", str(dst_path), "-f", f"{series_id}", "-z", "y", "-b", "n", "-m", "2", str(src_path)]
    try:
        subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=300)
    except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
        return 1
    return 0

def run_dcm2niix(series_ids: list[str]):
    print(f"Running dcm2niix for {len(series_ids)} series...")
    results = joblib.Parallel(n_jobs=CFG.num_workers)(
        joblib.delayed(dcm2niix)(
            src_path=CFG.img_dir / series_id,
            dst_path=CFG.out_dir / series_id,
            series_id=series_id,
        )
        for series_id in tqdm(series_ids, desc="Converting")
    )
    success_count = sum(1 for res in results if res == 0)
    print(f"Done: {success_count} success, {len(results) - success_count} failure")

# 3. Run the conversion on OUR 100 scans
try:
    sample_df = pd.read_csv('/workspace/data/train.csv')
    series_ids_to_process = sample_df['SeriesInstanceUID'].tolist()
    print(f"Found {len(series_ids_to_process)} scans to process.")
    run_dcm2niix(series_ids_to_process)
    print("\n✅ DICOM conversion finished.")
except Exception as e:
    print(f"❌ An error occurred during conversion: {e}")

# --- 2/8: Moving error data ---
print("\n--- 2/8: Moving error data ---")
!python src/my_utils/move_error_data.py

# --- 3/8: Creating nnU-Net Datasets ---
print("\n--- 3/8: Creating nnU-Net Dataset 001 & 003 ---")
!python src/nnUnet_utils/create_nnunet_dataset.py
!python src/nnUnet_utils/create_nnunet_dataset.py --dataset-id 3

# --- 4/8: Planning & Preprocessing Dataset 001 (RAM FIX) ---
print("\n--- 4/8: Planning and preprocessing Dataset 001 (3d_fullres only) ---")
# We use -c 3d_fullres to skip the 2d config that crashes RAM
!nnUNetv2_plan_and_preprocess -d 1 --verify_dataset_integrity -pl nnUNetPlannerResEncM -c 3d_fullres -np 2
print("✅ Preprocessing for Dataset 001 complete.")

# --- 5/8: Planning & Preprocessing Dataset 003 (CORRECT COMMAND) ---
print("\n--- 5/8: Planning and preprocessing Dataset 003 ---")
# This is the correct command to create the '...ForcedLowres' plan
!nnUNetv2_plan_and_preprocess -d 3 --verify_dataset_integrity -pl nnUNetPlannerResEncMForcedLowres -overwrite_target_spacing 1.0 1.0 1.0 -c 3d_fullres -np 2
print("✅ Planning and Preprocessing for Dataset 003 complete.")

# --- 6/8: JSON Patch (CORRECT FILE NAME) ---
print("\n--- 6/8: Re-running JSON patch with correct file name ---")
plan_file = "/workspace/data/nnUNet/nnUNet_preprocessed/Dataset003_VesselGrouping/nnUNetResEncUNetMPlans_ForcedLowres.json"

try:
    with open(plan_file, 'r') as f: data = json.load(f)
    
    data['plans_per_stage'][0]['patch_size'] = [128, 128, 128]
    
    with open(plan_file, 'w') as f: json.dump(data, f, indent=4)
    print("✅ Patched nnUNet plans.json successfully.")
    
    # 6.2 Re-preprocess Dataset 003 with the new patch size
    print("\n--- 6.2: Re-preprocessing Dataset 003 with new patch size ---")
    !nnUNetv2_preprocess -d 3 -plans_name nnUNetResEncUNetMPlans_ForcedLowres -c 3d_fullres -np 2
    print("✅ Re-preprocessing for Dataset 003 complete.")
except Exception as e:
    print(f"❌ Error patching JSON. The file '{plan_file}' was not found. {e}")

# --- 7/8 & 8/8: Final Steps ---
print("\n--- 7/8: Creating inference dataset ---")
!python src/nnUnet_utils/create_nnunet_inference_dataset.py
print("\n--- 8/8: Creating empty classifier dataset ---")
!mkdir -p /workspace/data/clf_data/imagesTr
!mkdir -p /workspace/data/clf_data/labelsTr

print("\n\n✅✅✅ Final Preprocessing Complete! ✅✅✅")

In [None]:
# 1. Delete the bad folder that was created by the failed 'cp' command
print("--- 1/4: Deleting corrupted destination folder ---")
!rm -rf /kaggle/working/FINAL_PREPROCESSED_DATA

# 2. Re-create the empty, safe directory


In [None]:
# --- STAGE 1: TRAINING THE nnU-Net SEGMENTATION MODELS ---
# We are training only fold 0, for 15 epochs (which you set manually)

print("--- 1/4: Training Stage 1: Vessel Segmentation (Dataset 001, Fold 0) ---")
!nnUNetv2_train 1 3d_fullres 0 -p nnUNetPlannerResEncM -tr nnUNetTrainerSkeletonRecall_more_DAv3

print("\n--- 2/4: Training Stage 1: Vessel Grouping (Dataset 003, Fold 0) ---")
!nnUNetv2_train 3 3d_fullres 0 -p nnUNetResEncUNetMPlans_ForcedLowres -tr RSNA2025Trainer_moreDAv7

# --- STAGE 1: INFERENCE ---
# Now we use the models we just trained to predict on our 100 scans.
# This creates the ROI (Region of Interest) inputs for the next stage.
print("\n--- 3/4: Running Stage 1 Inference (Creating ROIs) ---")
!python src/my_utils/vessel_segmentation.py

# --- STAGE 2: TRAINING THE CLASSIFIER MODEL ---
# This trains the final model. We override the epochs on the command line.
# trainer.max_epochs=5 tells it to only train for 5 epochs (fast).
print("\n--- 4/4: Training Stage 2: ROI Classifier (Fold 0, 5 Epochs) ---")
!python src/train.py \
  experiment=251013-seg_tf-v4-nnunet_truncate1-preV6_1-ex_dav6w3-m32g64-e25-w01_005_1-s128_256_256 \
  data.fold=0 \
  trainer.max_epochs=1

print("\n\n✅✅✅ Full Training Pipeline Complete! ✅✅✅")