In [1]:
import nibabel as nib
import numpy as np
from pathlib import Path

data_dir = Path("/nfs/khan/trainees/apooladi/abeta/data/AD_plaques")

# Check a raw image
img = nib.load(data_dir / "raw" / "patchvolume_000_0000.nii.gz")
print("=== RAW IMAGE ===")
print(f"Shape: {img.shape}")
print(f"Pixdim: {img.header['pixdim'][1:4]}")
print(f"Dtype: {img.header.get_data_dtype()}")
print(f"Affine:\n{img.affine}")

# Check a label
lbl = nib.load(data_dir / "gt" / "patchvolume_000.nii.gz")
lbl_data = np.asarray(lbl.dataobj)
print("\n=== LABEL ===")
print(f"Shape: {lbl.shape}")
print(f"Pixdim: {lbl.header['pixdim'][1:4]}")
print(f"Dtype: {lbl.header.get_data_dtype()}")
print(f"Unique values: {np.unique(lbl_data)}")
print(f"Value counts: {dict(zip(*np.unique(lbl_data, return_counts=True)))}")

=== RAW IMAGE ===
Shape: (300, 300, 300)
Pixdim: [1. 1. 1.]
Dtype: float64
Affine:
[[-1.  0.  0.  0.]
 [ 0. -1.  0.  0.]
 [ 0.  0.  1.  0.]
 [ 0.  0.  0.  1.]]

=== LABEL ===
Shape: (300, 300, 300)
Pixdim: [1. 1. 1.]
Dtype: uint16
Unique values: [0 1]
Value counts: {0: 26992698, 1: 7302}


In [2]:
import json
import os
from pathlib import Path

# ============================================================
# CONFIGURATION - adjust these paths as needed
# ============================================================
source_dir = Path("/nfs/khan/trainees/apooladi/abeta/data/AD_plaques")
nnunet_base = Path("/nfs/khan/trainees/apooladi/abeta/nnssl/nnunet_data")

# Dataset ID and name
dataset_id = 1
dataset_name = f"Dataset{dataset_id:03d}_ABetaPlaques"

# nnUNet directory structure
nnunet_raw = nnunet_base / "raw"
nnunet_preprocessed = nnunet_base / "preprocessed"
nnunet_results = nnunet_base / "results"

dataset_dir = nnunet_raw / dataset_name
imagesTr = dataset_dir / "imagesTr"
labelsTr = dataset_dir / "labelsTr"

# ============================================================
# CREATE DIRECTORIES
# ============================================================
for d in [nnunet_raw, nnunet_preprocessed, nnunet_results, imagesTr, labelsTr]:
    d.mkdir(parents=True, exist_ok=True)

print(f"Created dataset structure at: {dataset_dir}")

# ============================================================
# SYMLINK RAW IMAGES (already have _0000 suffix)
# ============================================================
raw_files = sorted(source_dir.glob("raw/*.nii.gz"))
for f in raw_files:
    dst = imagesTr / f.name
    if dst.exists() or dst.is_symlink():
        dst.unlink()
    dst.symlink_to(f.resolve())
print(f"Symlinked {len(raw_files)} raw images to imagesTr/")

# ============================================================
# SYMLINK LABELS
# ============================================================
gt_files = sorted(source_dir.glob("gt/*.nii.gz"))
for f in gt_files:
    dst = labelsTr / f.name
    if dst.exists() or dst.is_symlink():
        dst.unlink()
    dst.symlink_to(f.resolve())
print(f"Symlinked {len(gt_files)} label files to labelsTr/")

# ============================================================
# CREATE dataset.json
# ============================================================
dataset_json = {
    "channel_names": {
        "0": "LSFM"           # lightsheet fluorescence microscopy
    },
    "labels": {
        "background": 0,
        "plaque": 1
    },
    "numTraining": len(gt_files),
    "file_ending": ".nii.gz"
}

json_path = dataset_dir / "dataset.json"
with open(json_path, "w") as f:
    json.dump(dataset_json, f, indent=2)

print(f"\nCreated dataset.json with {len(gt_files)} training cases")
print(f"\n{'='*60}")
print(f"Dataset ready at: {dataset_dir}")
print(f"\nSet these environment variables before running nnUNet:")
print(f"  export nnUNet_raw=\"{nnunet_raw}\"")
print(f"  export nnUNet_preprocessed=\"{nnunet_preprocessed}\"")
print(f"  export nnUNet_results=\"{nnunet_results}\"")

Created dataset structure at: /nfs/khan/trainees/apooladi/abeta/nnssl/nnunet_data/raw/Dataset001_ABetaPlaques
Symlinked 34 raw images to imagesTr/
Symlinked 34 label files to labelsTr/

Created dataset.json with 34 training cases

Dataset ready at: /nfs/khan/trainees/apooladi/abeta/nnssl/nnunet_data/raw/Dataset001_ABetaPlaques

Set these environment variables before running nnUNet:
  export nnUNet_raw="/nfs/khan/trainees/apooladi/abeta/nnssl/nnunet_data/raw"
  export nnUNet_preprocessed="/nfs/khan/trainees/apooladi/abeta/nnssl/nnunet_data/preprocessed"
  export nnUNet_results="/nfs/khan/trainees/apooladi/abeta/nnssl/nnunet_data/results"


In [3]:
import subprocess
# Find the checkpoint files from both SimCLR runs
for bitdepth in ["16", "8"]:
    base = f"/nfs/khan/trainees/apooladi/abeta/nnssl_data/{bitdepth}/nnssl_data/results"
    result = subprocess.run(
        ["find", base, "-name", "checkpoint_final.pth", "-o", "-name", "checkpoint_best.pth"],
        capture_output=True, text=True
    )
    print(f"\n=== {bitdepth}-bit checkpoints ===")
    print(result.stdout.strip() if result.stdout.strip() else "No checkpoints found")


=== 16-bit checkpoints ===
/nfs/khan/trainees/apooladi/abeta/nnssl_data/16/nnssl_data/results/Dataset001_Pretrain/SimCLRTrainer_BS16_256iso__nnsslPlans__noresample/fold_all/checkpoint_best.pth
/nfs/khan/trainees/apooladi/abeta/nnssl_data/16/nnssl_data/results/Dataset001_Pretrain/SimCLRTrainer_BS8_256iso__nnsslPlans__noresample/fold_all/checkpoint_best.pth
/nfs/khan/trainees/apooladi/abeta/nnssl_data/16/nnssl_data/results/Dataset001_Pretrain/SimCLRTrainer_BS8_256iso__nnsslPlans__noresample/fold_all/checkpoint_final.pth
/nfs/khan/trainees/apooladi/abeta/nnssl_data/16/nnssl_data/results/Dataset001_Pretrain/BYOLTrainer_BS24_256iso__nnsslPlans__noresample/fold_all/checkpoint_best.pth
/nfs/khan/trainees/apooladi/abeta/nnssl_data/16/nnssl_data/results/Dataset001_Pretrain/BYOLTrainer_BS16_256iso__nnsslPlans__noresample/fold_all/checkpoint_best.pth
/nfs/khan/trainees/apooladi/abeta/nnssl_data/16/nnssl_data/results/Dataset001_Pretrain/SimCLRTrainer_BS20_256iso__nnsslPlans__noresample/fold_all/c