Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Train loop stops #18

Merged
merged 3 commits into from
Jun 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 56 additions & 21 deletions Task_1/FeTS_Challenge.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -27,7 +27,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -59,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -106,7 +106,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -256,7 +256,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -345,7 +345,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -489,7 +489,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -606,46 +606,81 @@
"- ```device``` : Which device to use for training and validation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setting up the experiment\n",
"Now that we've defined our custom functions, the last thing to do is to configure the experiment. The following cell shows the various settings you can change in your experiment.\n",
"\n",
"Note that ```rounds_to_train``` can be set as high as you want. However, the experiment will exit once the simulated time value exceeds 1 week of simulated time, or if the specified number of rounds has completed."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# change any of these you wish to your custom functions. You may leave defaults if you wish.\n",
"aggregation_function = weighted_average_aggregation\n",
"choose_training_collaborators = all_collaborators_train\n",
"training_hyper_parameters_for_round = constant_hyper_parameters\n",
"validation_functions = [('sensitivity', sensitivity), ('specificity', specificity)]\n",
"\n",
"# Final scoring will be on partitioning_1, partitioning_2, and a hidden partitioning\n",
"# We encourage you to experiment with other partitionings\n",
"institution_split_csv_filename = 'partitioning_1.csv'\n",
"\n",
"# change this to point to the parent directory of the data\n",
"brats_training_data_parent_dir = '/raid/datasets/FeTS21/MICCAI_FeTS2021_TrainingData'\n",
"\n",
"# increase this if you need a longer history for your algorithms\n",
"# decrease this if you need to reduce system RAM consumption\n",
"db_store_rounds = 5\n",
"rounds_to_train = 5\n",
"\n",
"# this is passed to PyTorch, so set it accordingly for your system\n",
"device = 'cuda'\n",
"\n",
"run_challenge_experiment(aggregation_function=aggregation_function,\n",
" choose_training_collaborators=choose_training_collaborators,\n",
" training_hyper_parameters_for_round=training_hyper_parameters_for_round,\n",
" validation_functions=validation_functions,\n",
" institution_split_csv_filename=institution_split_csv_filename,\n",
" brats_training_data_parent_dir=brats_training_data_parent_dir,\n",
" db_store_rounds=db_store_rounds,\n",
" rounds_to_train=rounds_to_train,\n",
" device=device)"
"# you'll want to increase this most likely. You can set it as high as you like, \n",
"# however, the experiment will exit once the simulated time exceeds one week. \n",
"rounds_to_train = 5"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# the scores are returned in a Pandas dataframe\n",
"scores_dataframe = run_challenge_experiment(\n",
" aggregation_function=aggregation_function,\n",
" choose_training_collaborators=choose_training_collaborators,\n",
" training_hyper_parameters_for_round=training_hyper_parameters_for_round,\n",
" validation_functions=validation_functions,\n",
" institution_split_csv_filename=institution_split_csv_filename,\n",
" brats_training_data_parent_dir=brats_training_data_parent_dir,\n",
" db_store_rounds=db_store_rounds,\n",
" rounds_to_train=rounds_to_train,\n",
" device=device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"scores_dataframe"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "fets_challenge_test_2",
"display_name": "openfl",
"language": "python",
"name": "fets_challenge_test_2"
"name": "openfl"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -657,7 +692,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.12"
"version": "3.6.13"
}
},
"nbformat": 4,
Expand Down
5 changes: 3 additions & 2 deletions Task_1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ Along with the typical DICE and Hausdorff metrics, we include a "time to converg

The time to convergence metric will be computed as the area under the validation learning curve over 1 week of simulated time where the horizontal axis measures simulated runtime and the vertical axis measures the current best score, computed as the average of enhancing tumor, tumor core, and whole tumor DICE scores over the validation split of the training data.

You can find the code for the "time to convergence metric" in the experiment.py file by searching for ## CONVERGENCE METRIC COMPUTATION.

### How Simulated Time is computed
The simulated time is stochastic, and computed per collaborator, per round, with the round time equaling the greatest round time of all collaborators in the round.

A given collaborator's round time is computed as the sum of:
Expand All @@ -57,8 +60,6 @@ We assign these network and compute distributions by drawing uniform-randomly fr

For a given collaborator, these normal distributions are constant throughout the experiment. Again, each possible timing distribution is based on actual timing information from a subset of the hospitals in the FeTS intitiative. You can find these distributions in the experiment.py file (search for ## COLLABORATOR TIMING DISTRIBUTIONS), as well as the random seed used to ensure reproducibility.

You can find the code for the "time to convergence metric" in the experiment.py file by searching for ## CONVERGENCE METRIC COMPUTATION.

## Data Partitioning and Sharding
The FeTS 2021 data release consists of a training set and two CSV files - each providing information for how to partition the training data into non-IID institutional subsets. The release will contain subfolders for single patient records whose names have the format `FeTS21_Training_###`, and two CSV files:
- **partitioning_1.csv**
Expand Down
48 changes: 46 additions & 2 deletions Task_1/fets_challenge/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pathlib import Path

import numpy as np
import pandas as pd
from openfl.utilities import split_tensor_dict_for_holdouts, TensorKey
from openfl.protocols import utils
import openfl.native as fx
Expand All @@ -19,6 +20,10 @@
from .custom_aggregation_wrapper import CustomAggregationWrapper

# one week
# MINUTE = 60
# HOUR = 60 * MINUTE
# DAY = 24 * HOUR
# WEEK = 7 * DAY
MAX_SIMULATION_TIME = 7 * 24 * 60 * 60

## COLLABORATOR TIMING DISTRIBUTIONS
Expand Down Expand Up @@ -194,6 +199,13 @@ def compute_times_per_collaborator(collaborator_names,
data_size *= epochs_per_round
time += data_size * training_time_per

# if training, we also validate the locally updated model
data_size = data.get_valid_data_size()
validation_time_per = np.random.normal(loc=stats.validation_mean,
scale=stats.validation_std)
validation_time_per = max(1, validation_time_per)
time += data_size * validation_time_per

# upload time
upload_time = np.random.normal(loc=stats.upload_speed_mean,
scale=stats.upload_speed_std)
Expand Down Expand Up @@ -295,6 +307,19 @@ def run_challenge_experiment(aggregation_function,
best_dice = -1.0
best_dice_over_time_auc = 0

# results dataframe data
experiment_results = {
'round':[],
'time': [],
'convergence_score': [],
'binary_dice_wt': [],
'binary_dice_et': [],
'binary_dice_tc': [],
'hausdorff95_wt': [],
'hausdorff95_et': [],
'hausdorff95_tc': [],
}

for round_num in range(rounds_to_train):
# pick collaborators to train for the round
training_collaborators = choose_training_collaborators(collaborator_names,
Expand Down Expand Up @@ -416,13 +441,32 @@ def run_challenge_experiment(aggregation_function,
# End of round summary
summary = '"**** END OF ROUND {} SUMMARY *****"'.format(round_num)
summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2))
summary += "\n\tProjected Convergence Score: {}".format(projected_auc)
summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc)
summary += "\n\tBinary DICE WT: {}".format(binary_dice_wt)
summary += "\n\tBinary DICE ET: {}".format(binary_dice_et)
summary += "\n\tBinary DICE TC: {}".format(binary_dice_tc)
summary += "\n\tHausdorff95 WT: {}".format(hausdorff95_wt)
summary += "\n\tHausdorff95 ET: {}".format(hausdorff95_et)
summary += "\n\tHausdorff95 TC: {}".format(hausdorff95_tc)

experiment_results['round'].append(round_num)
experiment_results['time'].append(total_simulated_time)
experiment_results['convergence_score'].append(projected_auc)
experiment_results['binary_dice_wt'].append(binary_dice_wt)
experiment_results['binary_dice_et'].append(binary_dice_et)
experiment_results['binary_dice_tc'].append(binary_dice_tc)
experiment_results['hausdorff95_wt'].append(hausdorff95_wt)
experiment_results['hausdorff95_et'].append(hausdorff95_et)
experiment_results['hausdorff95_tc'].append(hausdorff95_tc)

logger.info(summary)


# if the total_simulated_time has exceeded the maximum time, we break
# in practice, this means that the previous round's model is the last model scored,
# so a long final round should not actually benefit the competitor, since that final
# model is never globally validated
if total_simulated_time > MAX_SIMULATION_TIME:
logger.info("Simulation time exceeded. Ending Experiment")
break

return pd.DataFrame.from_dict(experiment_results)