### How to use this notebook
The data used in this notebook is not public. Contact Tamas to get an API key. Then create a python file in the same folder as this notebook, and name that file *girder_apikey_read.py*. Add only one line to that file that looks like this, just a different key string:
`girder_apikey_read="UjNzqutrfBwuk4t39VlJnJs4t3EZ6i7"`

In [12]:
this_notebook_name = "BreastTumorSegmentationStudy"

# This should be the only parameter to update for your local environment

local_data_folder = r"c:\Data\BreastTumorSegmentationStudy"
#local_data_folder = r"C:\Users\jgerolami\Documents\GitHub\aigt\Notebooks\Segmentation\data"
overwrite_existing_data_files = False

# All results and output will be archived with this timestamp

import datetime
save_timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
print("Save timestamp: {}".format(save_timestamp))

# For debugging only

limit_validation_rounds = -1
# limit_validation_rounds = 1

Save timestamp: 2020-01-13_15-08-48


In [13]:
import os
from random import sample

from ipywidgets import IntProgress
from IPython.display import display, HTML

import girder_client
import matplotlib.pyplot as plt
import pandas as pd

'''
import keras
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

import ultrasound_batch_generator as generator
import sagittal_spine_segmentation_unet as unet
import evaluation_metrics
'''

from girder_apikey_read import girder_apikey_read

In [14]:
# Define what data to download

girder_api_url = "https://pocus.cs.queensu.ca/api/v1"

training_ultrasound_ids = [
    "5e1bae9ed9e6a3be02d013bc", #003
    "5dfbc687d9e6a3be02d01343", #004-v02
    "5e1cb476d9e6a3be02d01425", #005-v02
    "5e1cb685d9e6a3be02d01434", #006-v02
    "5e1cb84ed9e6a3be02d0143a", #007-v02
    "5e14cef4d9e6a3be02d01377", #008-v02
    "5e14cef4d9e6a3be02d0137a", #009-v02
    "5e1cba04d9e6a3be02d01440", #010-v02
    "5e1cbd21d9e6a3be02d01446", #011
    "5e1cbf54d9e6a3be02d0144c", #012
    "5e1cc395d9e6a3be02d01452", #013
    "5e1cc4fcd9e6a3be02d01458", #014
    "5e14cef4d9e6a3be02d0137d", #015
    "5e1cc907d9e6a3be02d0145e", #016
    "5e1cca85d9e6a3be02d01464", #018
    "5e1ccb5dd9e6a3be02d0146a", #019
    "5e1cb5ced9e6a3be02d0142b", #020
    "5e16028cd9e6a3be02d01386", #021
    "5e163f18d9e6a3be02d01392", #022
    "5e163f0dd9e6a3be02d0138c", #023
    "5e163f22d9e6a3be02d01398", #024
    "5e163f2ad9e6a3be02d0139e", #025
    "5e1c99f1d9e6a3be02d01401", #026
    "5e1c99f1d9e6a3be02d01404", #027
    "5e1c99f2d9e6a3be02d01407", #028
    "5e1c99f2d9e6a3be02d0140a", #029
    "5e1c99f3d9e6a3be02d0140d", #030
    "5e1c99f3d9e6a3be02d01410", #031
    "5e1c99f4d9e6a3be02d01413", #032
    "5e1c99f5d9e6a3be02d01416", #035
    "5e1c99f5d9e6a3be02d01419", #037
    "5e1c99f6d9e6a3be02d0141c", #038
    "5e1c99f6d9e6a3be02d0141f", #039
    "5df79d49d9e6a3be02d01332", #test
]

training_ultrasound_filenames = [
    "ultrasound-003-v02.npy",
    "ultrasound-004-v02.npy",
    "ultrasound-005-v02.npy",
    "ultrasound-006-v02.npy",
    "ultrasound-007-v02.npy",
    "ultrasound-008-v02.npy",
    "ultrasound-009-v02.npy",
    "ultrasound-010-v02.npy",
    "ultrasound-011.npy",
    "ultrasound-012.npy",
    "ultrasound-013.npy",
    "ultrasound-014.npy",
    "ultrasound-015.npy",
    "ultrasound-016.npy",
    "ultrasound-018.npy",
    "ultrasound-019.npy",
    "ultrasound-020.npy",
    "ultrasound-021.npy",
    "ultrasound-022.npy",
    "ultrasound-023.npy",
    "ultrasound-024.npy",
    "ultrasound-025.npy",
    "ultrasound-026.npy",
    "ultrasound-027.npy",
    "ultrasound-028.npy",
    "ultrasound-029.npy",
    "ultrasound-030.npy",
    "ultrasound-031.npy",
    "ultrasound-032.npy",
    "ultrasound-035.npy",
    "ultrasound-037.npy",
    "ultrasound-038.npy",
    "ultrasound-039.npy",
    "ultrasound-test.npy"
]

training_segmentation_ids = [
    "5e1bae9ed9e6a3be02d013b9", #003
    "5dfbc686d9e6a3be02d01340", #004-v02
    "5e1cb475d9e6a3be02d01422", #005-v02
    "5e1cb684d9e6a3be02d01431", #006-v02
    "5e1cb84dd9e6a3be02d01437", #007-v02
    "5e14cef3d9e6a3be02d0136e", #008-v02
    "5e14cef3d9e6a3be02d01371", #009-v02
    "5e1cba04d9e6a3be02d0143d", #010-v02
    "5e1cbd20d9e6a3be02d01443", #011
    "5e1cbf54d9e6a3be02d01449", #012
    "5e1cc395d9e6a3be02d0144f", #013
    "5e1cc4fcd9e6a3be02d01455", #014
    "5e14cef3d9e6a3be02d01374", #015
    "5e1cc906d9e6a3be02d0145b", #016
    "5e1cca84d9e6a3be02d01461", #018
    "5e1ccb5cd9e6a3be02d01467", #019
    "5e1cb5ced9e6a3be02d01428", #020
    "5e16028bd9e6a3be02d01383", #021
    "5e163f17d9e6a3be02d0138f", #022
    "5e163f0cd9e6a3be02d01389", #023
    "5e163f21d9e6a3be02d01395", #024
    "5e163f29d9e6a3be02d0139b", #025
    "5e1c99edd9e6a3be02d013e0", #026
    "5e1c99edd9e6a3be02d013e3", #027
    "5e1c99edd9e6a3be02d013e6", #028
    "5e1c99eed9e6a3be02d013e9", #029
    "5e1c99eed9e6a3be02d013ec", #030
    "5e1c99eed9e6a3be02d013ef", #031
    "5e1c99efd9e6a3be02d013f2", #032
    "5e1c99efd9e6a3be02d013f5", #035
    "5e1c99efd9e6a3be02d013f8", #037
    "5e1c99efd9e6a3be02d013fb", #038
    "5e1c99f0d9e6a3be02d013fe", #039
    "5df79d41d9e6a3be02d0131a", #test    
]

training_segmentation_filenames = [
    "segmentation-003-v02.npy",
    "segmentation-004-v02.npy",
    "segmentation-005-v02.npy",
    "segmentation-006-v02.npy",
    "segmentation-007-v02.npy",
    "segmentation-008-v02.npy",
    "segmentation-009-v02.npy",
    "segmentation-010-v02.npy",
    "segmentation-011.npy",
    "segmentation-012.npy",
    "segmentation-013.npy",
    "segmentation-014.npy",
    "segmentation-015.npy",
    "segmentation-016.npy",
    "segmentation-018.npy",
    "segmentation-019.npy",
    "segmentation-020.npy",
    "segmentation-021.npy",
    "segmentation-022.npy",
    "segmentation-023.npy",
    "segmentation-024.npy",
    "segmentation-025.npy",
    "segmentation-026.npy",
    "segmentation-027.npy",
    "segmentation-028.npy",
    "segmentation-029.npy",
    "segmentation-030.npy",
    "segmentation-031.npy",
    "segmentation-032.npy",
    "segmentation-035.npy",
    "segmentation-037.npy",
    "segmentation-038.npy",
    "segmentation-039.npy",
    "segmentation-test.npy"
]

In [15]:
# These subfolders will be created/populated in the data folder

data_arrays_folder    = "DataArrays"
notebooks_save_folder = "SavedNotebooks"
results_save_folder   = "SavedResults"
models_save_folder    = "SavedModels"
val_data_folder       = "PredictionsValidation"

data_arrays_fullpath = os.path.join(local_data_folder, data_arrays_folder)
notebooks_save_fullpath = os.path.join(local_data_folder, notebooks_save_folder)
results_save_fullpath = os.path.join(local_data_folder, results_save_folder)
models_save_fullpath = os.path.join(local_data_folder, models_save_folder)
val_data_fullpath = os.path.join(local_data_folder, val_data_folder)

if not os.path.exists(data_arrays_fullpath):
    os.makedirs(data_arrays_fullpath)
    print("Created folder: {}".format(data_arrays_fullpath))

if not os.path.exists(notebooks_save_fullpath):
    os.makedirs(notebooks_save_fullpath)
    print("Created folder: {}".format(notebooks_save_fullpath))

if not os.path.exists(results_save_fullpath):
    os.makedirs(results_save_fullpath)
    print("Created folder: {}".format(results_save_fullpath))

if not os.path.exists(models_save_fullpath):
    os.makedirs(models_save_fullpath)
    print("Created folder: {}".format(models_save_fullpath))

if not os.path.exists(val_data_fullpath):
    os.makedirs(val_data_fullpath)
    print("Created folder: {}".format(val_data_fullpath))

In [16]:
# Download data from Girder

time_download_start = datetime.datetime.now()

print("Downloading training files ...")

# Setting up number of validation rounds

n_files = len(training_ultrasound_ids)
if limit_validation_rounds > 0:
    num_validation_rounds = min(n_files, limit_validation_rounds)
else:
    num_validation_rounds = n_files

# Preparing progress bar

f = IntProgress(min=0, max=n_files*2)
display(f)

# Downloading files

gclient = girder_client.GirderClient(apiUrl=girder_api_url)
gclient.authenticate(apiKey=girder_apikey_read)

for i in range(n_files):
    ultrasound_fullname = os.path.join(data_arrays_fullpath, training_ultrasound_filenames[i])
    if not os.path.exists(ultrasound_fullname) or overwrite_existing_data_files:
        print("Downloading {}...".format(ultrasound_fullname))
        gclient.downloadFile(training_ultrasound_ids[i], ultrasound_fullname)
    f.value = i * 2 + 1
    
    segmentation_fullname = os.path.join(data_arrays_fullpath, training_segmentation_filenames[i])
    if not os.path.exists(segmentation_fullname) or overwrite_existing_data_files:
        print("Downloading {}...".format(segmentation_fullname))
        gclient.downloadFile(training_segmentation_ids[i], segmentation_fullname)
    f.value = i * 2 + 2

time_download_stop = datetime.datetime.now()
print("\nTotal download time: {}".format(time_download_stop - time_download_start))

Downloading training files ...


IntProgress(value=0, max=68)

Downloading C:\Users\jgerolami\Documents\GitHub\aigt\Notebooks\Segmentation\data\DataArrays\ultrasound-016.npy...
Downloading C:\Users\jgerolami\Documents\GitHub\aigt\Notebooks\Segmentation\data\DataArrays\segmentation-016.npy...
Downloading C:\Users\jgerolami\Documents\GitHub\aigt\Notebooks\Segmentation\data\DataArrays\ultrasound-018.npy...
Downloading C:\Users\jgerolami\Documents\GitHub\aigt\Notebooks\Segmentation\data\DataArrays\segmentation-018.npy...
Downloading C:\Users\jgerolami\Documents\GitHub\aigt\Notebooks\Segmentation\data\DataArrays\ultrasound-019.npy...
Downloading C:\Users\jgerolami\Documents\GitHub\aigt\Notebooks\Segmentation\data\DataArrays\segmentation-019.npy...

Total download time: 0:00:04.955693
