# nnUNet pre-processing:

Presprocessing the PI-CAI dataset to implement the centralized + FL experiments

In [None]:
import os

os.environ['nnUNet_raw'] = './workdir/nnUNet_raw'
os.environ['nnUNet_preprocessed'] = './workdir/nnUNet_preprocessed'
os.environ['nnUNet_results'] = './workdir/nnUNet_results'

### Converting mha to nnUNet structure
You can run in the terminal directly or using the Jupyter notebook: 

```bash
python -m picai_prep mha2nnunet_settings \
    --structure picai_archive \
    --input ./input/images/ \
    --annotations ./input/labels/csPCa_lesion_delineations/human_expert/resampled/ \
    --json ./workdir/nnUNet_raw/mha2nnunet_settings.json
```

In [None]:
from picai_prep.examples.mha2nnunet.picai_archive import generate_mha2nnunet_settings

generate_mha2nnunet_settings(
    archive_dir="./input/images/",
    annotations_dir="./input/labels_combined",
    output_path="./workdir/nnUNet_raw/mha2nnunet_settings.json",
)

In [None]:
from picai_prep import MHA2nnUNetConverter

archive = MHA2nnUNetConverter(
    scans_dir="./input/images",
    annotations_dir="./input/labels_combined", 
    output_dir="./workdir/nnUNet_raw",
    mha2nnunet_settings="./workdir/nnUNet_raw/mha2nnunet_settings.json",
)
archive.convert()
archive.create_dataset_json()

Ensure that the labels are binarized, meaning that voxels belonging to the cancerous lesion are represented as 1, and voxels belonging to the background are represented as 0.

In [None]:
import os
import nibabel as nib
import numpy as np

def process_nifti_labels(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if not filename.endswith(".nii.gz"):
            continue

        input_path = os.path.join(input_folder, filename)
        nifti = nib.load(input_path)

        data = nifti.get_fdata()
        binarized = (data >= 1).astype(np.uint8)

        output_path = os.path.join(output_folder, filename)
        nib.save(nib.Nifti1Image(binarized, nifti.affine, nifti.header), output_path)


input_folder = "./workdir/nnUNet_raw/Dataset104_picai/imagesTr"
output_folder = "./workdir/nnUNet_raw/Dataset104_picai/labelsTr"

process_nifti_labels(input_folder, output_folder)

### Run nnU-Net Preprocessing

Before running preprocessing, make sure that the raw data folders are structured correctly:

```
workdir/  
├── nnUNet_raw/  
│   └── Dataset104_picai/  
│       ├── imagesTr/  
│       ├── labelsTr/  
│       └── dataset.json
├── nnUNet_preprocessed/  
└── nnUNet_results/  
```

where the `dataset.json` is 
```json
{
    "channel_names": {
        "0": "T2W",
        "1": "ADC",
        "2": "HBV"
    },
    "labels": {
        "background": 0,
        "lesion": 1
    },
    "numTraining": 1500,
    "file_ending": ".nii.gz",
    "name": "picai_nnunetv2",
    "reference": "none",
    "release": "1.0",
    "description": "bpMRI scans from PI-CAI dataset to train by nnUNetv2",
    "overwrite_image_reader_writer": "SimpleITKIO"
}
```

Then, run the following command directly in the terminal:

```bash
nnUNetv2_plan_and_preprocess -d 104 -c 3d_fullres --verify_dataset_integrity
```

```bash
nnUNetv2_train Dataset104_picai 3d_fullres 0 -tr nnUNetTrainerCELoss_1000epochs --npz
```

After trainig the models for 5 different folds the ensembling model can be computed as:

```bash
nnUNetv2_find_best_configuration 104 -c 3d_fullres -tr nnUNetTrainerCELoss_1000epochs
```

Then the prediction can be run and the evaluation metrics be computed following the PI-CAI guidelines.

```bash
nnUNetv2_predict -d Dataset104_picai -i ./imagesTs -o ./workdir/nnUNet_predictions/ -f  0 1 2 3 4 -tr nnUNetTrainerCELoss_1000epochs -c 3d_fullres -p nnUNetPlans
```

In [None]:
from picai_eval import evaluate_folder
from report_guided_annotation import extract_lesion_candidates

pred_softmax = "./workdir/nnUNet_predictions"
annotations = "./labelsTs"
metrics = evaluate_folder(y_det_dir=pred_softmax,
                          y_true_dir=annotations, 
                          y_det_postprocess_func=lambda pred: extract_lesion_candidates(pred, threshold="dynamic")[0],)

print(f"\n")
print(f"AUROC: {round(metrics.auroc,4)}")
print(f"AP: {round(metrics.AP,4)}")
print(f"PICAI score: {round(.5*(metrics.auroc+metrics.AP),4)}")