-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #171 from mzenk/task2-code
Add MLCubes for evaluation
- Loading branch information
Showing
30 changed files
with
1,782 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
name: FeTS challenge 2022 (task 2) Medperf Data Preparator Cube | ||
description: MLCube for building data preparators for MedPerf | ||
authors: | ||
- {name: "MLCommons Medical Working Group"} | ||
- {name: "Maximilian Zenk (DKFZ)"} | ||
|
||
platform: | ||
accelerator_count: 0 | ||
|
||
docker: | ||
# Image name. | ||
image: docker.synapse.org/syn31437293/fets22_data-prep | ||
# Docker build context relative to $MLCUBE_ROOT. Default is `build`. | ||
build_context: "../project" | ||
# Docker file name within docker build context, default is `Dockerfile`. | ||
build_file: "Dockerfile" | ||
|
||
tasks: | ||
prepare: | ||
# This task is in charge of transforming the input data into the format expected by the model cubes. | ||
parameters: | ||
inputs: { | ||
data_path: {type: directory, default: data}, # Value must point to a directory containing the raw data inside workspace | ||
labels_path: {type: directory, default: data}, # Not used in this example | ||
parameters_file: parameters.yaml # Not used in this example | ||
} | ||
outputs: { | ||
output_path: prepped_data/, # Indicates where to store the transformed data. Must contain prepared data | ||
output_labels_path: labels/ # Indicates where to store the transformed data. Must contain labels | ||
} | ||
sanity_check: | ||
# This task ensures that the previously transformed data was transformed correctly. | ||
# It runs a set of tests that check que quality of the data. The rigurosity of those | ||
# tests is determined by the cube author. | ||
parameters: | ||
inputs: { | ||
data_path: {type: directory, default: prepped_data}, # Value should be the first output of the prepare task | ||
labels_path: labels/, # Value should be the second output of the prepare task | ||
parameters_file: parameters.yaml # Not used in this example | ||
} | ||
statistics: | ||
# This task computes statistics on the prepared dataset. Its purpose is to get a high-level | ||
# idea of what is contained inside the data, without providing any specifics of any single entry | ||
parameters: | ||
inputs: { | ||
data_path: {type: directory, default: prepped_data}, # Value should be the first output of the prepare task | ||
labels_path: labels/, # Value should be the second output of the prepare task | ||
parameters_file: parameters.yaml # Not used in this example | ||
} | ||
outputs: { | ||
output_path: { | ||
type: file, default: statistics.yaml | ||
} | ||
} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
FROM ubuntu:18.04 | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends \ | ||
software-properties-common \ | ||
python3-dev \ | ||
curl && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
RUN add-apt-repository ppa:deadsnakes/ppa -y && apt-get update | ||
|
||
RUN apt-get install python3 -y | ||
|
||
RUN apt-get install python3-pip -y | ||
|
||
COPY ./requirements.txt project/requirements.txt | ||
|
||
RUN pip3 install --upgrade pip | ||
|
||
RUN pip3 install --no-cache-dir -r project/requirements.txt | ||
|
||
# Set the locale | ||
ENV LANG C.UTF-8 | ||
ENV LC_ALL C.UTF-8 | ||
|
||
COPY . /project | ||
|
||
WORKDIR /project | ||
|
||
ENTRYPOINT ["python3", "/project/mlcube.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
# MLCube Entrypoint | ||
# | ||
# This script shows how you can bridge your app with an MLCube interface. | ||
# MLCubes expect the entrypoint to behave like a CLI, where tasks are | ||
# commands, and input/output parameters and command-line arguments. | ||
# You can provide that interface to MLCube in any way you prefer. | ||
# Here, we show a way that requires minimal intrusion to the original code, | ||
# By running the application through subprocesses. | ||
|
||
import typer | ||
from prepare import run_preparation | ||
from sanity_check import run_sanity_check | ||
from statistics import run_statistics | ||
|
||
|
||
app = typer.Typer() | ||
|
||
|
||
@app.command("prepare") | ||
def prepare( | ||
data_path: str = typer.Option(..., "--data_path"), | ||
labels_path: str = typer.Option(..., "--labels_path"), | ||
params_file: str = typer.Option(..., "--parameters_file"), | ||
out_path: str = typer.Option(..., "--output_path"), | ||
out_labels_path: str = typer.Option(..., "--output_labels_path"), | ||
): | ||
"""Prepare task command. This is what gets executed when we run: | ||
`mlcube run --task=prepare` | ||
Args: | ||
data_path (str): Location of the data to transform. Required for Medperf Data Preparation MLCubes. | ||
labels_path (str): Location of the labels. Required for Medperf Data Preparation MLCubes | ||
params_file (str): Location of the parameters.yaml file. Required for Medperf Data Preparation MLCubes. | ||
out_path (str): Location to store transformed data. Required for Medperf Data Preparation MLCubes. | ||
""" | ||
run_preparation( | ||
input_dir=data_path, | ||
output_data_dir=out_path, | ||
output_label_dir=out_labels_path | ||
) | ||
|
||
|
||
@app.command("sanity_check") | ||
def sanity_check( | ||
data_path: str = typer.Option(..., "--data_path"), | ||
labels_path: str = typer.Option(..., "--labels_path"), | ||
params_file: str = typer.Option(..., "--parameters_file"), | ||
): | ||
"""Sanity check task command. This is what gets executed when we run: | ||
`mlcube run --task=sanity_check` | ||
Args: | ||
data_path (str): Location of the prepared data. Required for Medperf Data Preparation MLCubes. | ||
params_file (str): Location of the parameters.yaml file. Required for Medperf Data Preparation MLCubes. | ||
""" | ||
run_sanity_check(data_path=data_path, labels_path=labels_path) | ||
|
||
|
||
@app.command("statistics") | ||
def statistics( | ||
data_path: str = typer.Option(..., "--data_path"), | ||
labels_path: str = typer.Option(..., "--labels_path"), | ||
params_file: str = typer.Option(..., "--parameters_file"), | ||
output_path: str = typer.Option(..., "--output_path"), | ||
): | ||
"""Computes statistics about the data. This statistics are uploaded | ||
to the Medperf platform under the data owner's approval. Include | ||
every statistic you consider useful for determining the nature of the | ||
data, but keep in mind that we want to keep the data as private as | ||
possible. | ||
Args: | ||
data_path (str): Location of the prepared data. Required for Medperf Data Preparation MLCubes. | ||
params_file (str): Location of the parameters.yaml file. Required for Medperf Data Preparation MLCubes. | ||
output_path (str): File to store the statistics. Must be statistics.yaml. Required for Medperf Data Preparation MLCubes. | ||
""" | ||
run_statistics(data_path=data_path, labels_path=labels_path, out_file=output_path) | ||
|
||
|
||
if __name__ == "__main__": | ||
app() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from pathlib import Path | ||
import shutil | ||
from tqdm import tqdm | ||
|
||
|
||
def copy_subject(subject_dir: Path, output_dir_data: Path, output_dir_labels: Path): | ||
subj_id = subject_dir.name | ||
# it's possible that minor naming differences are present. Accepted options for each modality are below. | ||
# input format: | ||
# <subject_id>[_brain]_t1.nii.gz etc | ||
# <subject_id>[_brain]_final_seg.nii.gz | ||
# output format: | ||
# <subject_id>_brain_t1.nii.gz etc | ||
# <subject_id>_final_seg.nii.gz | ||
files_to_copy = { | ||
"t1": [f"{subj_id}_brain_t1.nii.gz", f"{subj_id}_t1.nii.gz"], | ||
"t1ce": [f"{subj_id}_brain_t1ce.nii.gz", f"{subj_id}_t1ce.nii.gz"], | ||
"t2": [f"{subj_id}_brain_t2.nii.gz", f"{subj_id}_t2.nii.gz"], | ||
"flair": [f"{subj_id}_brain_flair.nii.gz", f"{subj_id}_flair.nii.gz"], | ||
"seg": [ | ||
f"{subj_id}_final_seg.nii.gz", | ||
f"{subj_id}_brain_final_seg.nii.gz", | ||
f"{subj_id}_seg.nii.gz", | ||
f"{subj_id}_brain_seg.nii.gz", | ||
], | ||
} | ||
for k, fname_options in files_to_copy.items(): | ||
for filename in fname_options: | ||
file_path = subject_dir / filename | ||
output_dir = output_dir_data / subj_id | ||
if k == "seg": | ||
output_dir = output_dir_labels | ||
output_dir.mkdir(exist_ok=True) | ||
if file_path.exists(): | ||
shutil.copy2(file_path, output_dir / files_to_copy[k][0]) | ||
break | ||
|
||
|
||
def run_preparation( | ||
input_dir: str, output_data_dir: str, output_label_dir: str | ||
) -> None: | ||
output_data_path = Path(output_data_dir) | ||
output_labels_path = Path(output_label_dir) | ||
output_data_path.mkdir(parents=True, exist_ok=True) | ||
output_labels_path.mkdir(parents=True, exist_ok=True) | ||
|
||
subject_list = [x for x in Path(input_dir).iterdir() if x.is_dir()] | ||
print(f"Preparing {len(subject_list)} subjects...") | ||
for subject_dir in tqdm(subject_list): | ||
copy_subject(subject_dir, output_data_path, output_labels_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
pyYAML | ||
typer | ||
pandas | ||
SimpleITK>=2.1.0 | ||
numpy | ||
tqdm |
Oops, something went wrong.