### How to use this notebook
The data used in this notebook is not public. Contact Tamas to get an API key. Then create a python file in the same folder as this notebook, and name that file *girder_apikey_read.py*. Add only one line to that file that looks like this, just a different key string:
`girder_apikey_read="UjNzqutrfBwuk4t39VlJnJs4t3EZ6i7"`

In [10]:
this_notebook_name = "BreastTumorSegmentationStudy"

# This should be the only parameter to update for your local environment

local_data_folder = r"c:\Data\BreastTumorSegmentationStudy"
overwrite_existing_data_files = False

# All results and output will be archived with this timestamp

import datetime
save_timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
print("Save timestamp: {}".format(save_timestamp))

# For debugging only

limit_validation_rounds = -1
# limit_validation_rounds = 1

Save timestamp: 2019-12-16_11-46-48


In [11]:
import os
from random import sample

from ipywidgets import IntProgress
from IPython.display import display, HTML

import girder_client
import matplotlib.pyplot as plt
import pandas as pd

'''
import keras
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

import ultrasound_batch_generator as generator
import sagittal_spine_segmentation_unet as unet
import evaluation_metrics
'''

from girder_apikey_read import girder_apikey_read

In [12]:
# Define what data to download

girder_api_url = "https://pocus.cs.queensu.ca/api/v1"

training_ultrasound_ids = [
    "5df79d42d9e6a3be02d0131d",
    "5df79d43d9e6a3be02d01320"
]

training_ultrasound_filenames = [
    "ultrasound-004.npy",
    "ultrasound-005.npy"
]

training_segmentation_ids = [
    "5df79d3dd9e6a3be02d01305",
    "5df79d3ed9e6a3be02d01308"
]

training_segmentation_filenames = [
    "segmentation-004.npy",
    "segmentation-005.npy"
]

In [13]:
# These subfolders will be created/populated in the data folder

data_arrays_folder    = "DataArrays"
notebooks_save_folder = "SavedNotebooks"
results_save_folder   = "SavedResults"
models_save_folder    = "SavedModels"
val_data_folder       = "PredictionsValidation"

data_arrays_fullpath = os.path.join(local_data_folder, data_arrays_folder)
notebooks_save_fullpath = os.path.join(local_data_folder, notebooks_save_folder)
results_save_fullpath = os.path.join(local_data_folder, results_save_folder)
models_save_fullpath = os.path.join(local_data_folder, models_save_folder)
val_data_fullpath = os.path.join(local_data_folder, val_data_folder)

if not os.path.exists(data_arrays_fullpath):
    os.makedirs(data_arrays_fullpath)
    print("Created folder: {}".format(data_arrays_fullpath))

if not os.path.exists(notebooks_save_fullpath):
    os.makedirs(notebooks_save_fullpath)
    print("Created folder: {}".format(notebooks_save_fullpath))

if not os.path.exists(results_save_fullpath):
    os.makedirs(results_save_fullpath)
    print("Created folder: {}".format(results_save_fullpath))

if not os.path.exists(models_save_fullpath):
    os.makedirs(models_save_fullpath)
    print("Created folder: {}".format(models_save_fullpath))

if not os.path.exists(val_data_fullpath):
    os.makedirs(val_data_fullpath)
    print("Created folder: {}".format(val_data_fullpath))

Created folder: c:\Data\BreastTumorSegmentationStudy\DataArrays
Created folder: c:\Data\BreastTumorSegmentationStudy\SavedNotebooks
Created folder: c:\Data\BreastTumorSegmentationStudy\SavedResults
Created folder: c:\Data\BreastTumorSegmentationStudy\SavedModels
Created folder: c:\Data\BreastTumorSegmentationStudy\PredictionsValidation


In [14]:
# Download data from Girder

time_download_start = datetime.datetime.now()

print("Downloading training files ...")

# Setting up number of validation rounds

n_files = len(training_ultrasound_ids)
if limit_validation_rounds > 0:
    num_validation_rounds = min(n_files, limit_validation_rounds)
else:
    num_validation_rounds = n_files

# Preparing progress bar

f = IntProgress(min=0, max=n_files*2)
display(f)

# Downloading files

gclient = girder_client.GirderClient(apiUrl=girder_api_url)
gclient.authenticate(apiKey=girder_apikey_read)

for i in range(n_files):
    ultrasound_fullname = os.path.join(data_arrays_fullpath, training_ultrasound_filenames[i])
    if not os.path.exists(ultrasound_fullname) or overwrite_existing_data_files:
        print("Downloading {}...".format(ultrasound_fullname))
        gclient.downloadFile(training_ultrasound_ids[i], ultrasound_fullname)
    f.value = i * 2 + 1
    
    segmentation_fullname = os.path.join(data_arrays_fullpath, training_segmentation_filenames[i])
    if not os.path.exists(segmentation_fullname) or overwrite_existing_data_files:
        print("Downloading {}...".format(segmentation_fullname))
        gclient.downloadFile(training_segmentation_ids[i], segmentation_fullname)
    f.value = i * 2 + 2

time_download_stop = datetime.datetime.now()
print("\nTotal download time: {}".format(time_download_stop - time_download_start))

Downloading training files ...


IntProgress(value=0, max=4)

Downloading c:\Data\BreastTumorSegmentationStudy\DataArrays\ultrasound-004.npy...
Downloading c:\Data\BreastTumorSegmentationStudy\DataArrays\segmentation-004.npy...
Downloading c:\Data\BreastTumorSegmentationStudy\DataArrays\ultrasound-005.npy...
Downloading c:\Data\BreastTumorSegmentationStudy\DataArrays\segmentation-005.npy...

Total download time: 0:00:01.034053
