In [53]:

# Copyright (C) Microsoft Corporation. All rights reserved.

# Licensed under the MIT License. See LICENSE in the project root for

# information.


In [54]:
from cvtk import Dataset
from cvtk import Splitter
from cvtk import DNNModel, TransferLearningModel
from cvtk import (ClassificationDataset, CNTKTLModel, Context, Splitter, StorageContext)
from cvtk.augmentation import augment_dataset
from cvtk.evaluation import ClassificationEvaluation
from cntk import softmax
import cvtk
import cntk
import os, sys, shutil, json
import glob
import pandas as pd
from imgaug import augmenters
import numpy as np
from IPython.display import display
from cvtk import Context

### Load training dataset

In [55]:
dataset_name = 'CNV' 
dataset_dir='C:\\OCT\\OCT2017\\OCT2017\\train'
dataset_train =ClassificationDataset.create_from_dir(dataset_name, dataset_dir)

F1 2018-05-16 18:42:29,526 INFO Dataset name CNV
F1 2018-05-16 18:42:29,527 INFO Dataset location C:\OCT\OCT2017\OCT2017\train
F1 2018-05-16 18:42:29,530 INFO azureml.vision:dataset creating dataset for scenario=classification 


In [56]:
print("Training Dataset consists of {} images with {} labels.".format(len(dataset_train.images), len(dataset_train.labels)))
print("Information for training image 2:\n {}".format(dataset_train.labels))

Training Dataset consists of 83484 images with 4 labels.
Information for training image 2:
 [<Label(name = CNV)>, <Label(name = DME)>, <Label(name = DRUSEN)>, <Label(name = NORMAL)>]


### Split training data

In [57]:
splitter = cvtk.Splitter(dataset_train)
trainSet, evalSet = splitter.split(0.80)
print("Number of training images = {}, test images = {}.".format(trainSet.size(), evalSet.size()))

F1 2018-05-16 18:43:14,669 INFO azureml.vision:splitter splitting a dataset 
F1 2018-05-16 18:43:44,314 INFO azureml.vision:dataset creating dataset for scenario=classification 
F1 2018-05-16 18:43:45,130 INFO azureml.vision:dataset creating dataset for scenario=classification 
F1 2018-05-16 18:43:45,339 INFO Train set size 66787
F1 2018-05-16 18:43:45,341 INFO Test set size 16697
Number of training images = 66787, test images = 16697.


### Model definition

In [58]:
output_aml='C:\\CVTK\\OCT_output'
#Model
nrClasses = len(trainSet.labels)
class_map = {i:l.name for i,l in enumerate(dataset_train.labels)} 
base_model_name = 'ResNet50_ImageNet_CNTK'
mymodel = CNTKTLModel(trainSet.labels,
                                class_map   = class_map,
                                base_model_name = base_model_name, 
                                output_path = output_aml,
                                image_dims=(3, 224, 224),
                                num_conv_layers_freeze=0) 

print("Model state: {}".format(mymodel.model_state))
print("Model name: {}".format(mymodel.base_model_name))


F1 2018-05-16 18:43:45,792 INFO Base model name ResNet50_ImageNet_CNTK
Model state: untrained
Model name: ResNet50_ImageNet_CNTK


### Start training

In [59]:
print("Training locally...")

lr_per_mb = [0.01] * 7+ [0.001] * 7 + [0.0001]
num_epochs = 2
l2_reg_weight=0.005
mb_size = 16

t = mymodel.train( trainSet,  
                    lr_per_mb = lr_per_mb , 
                    num_epochs = num_epochs,
                    l2_reg_weight=l2_reg_weight,
                    mb_size = mb_size)

Training locally...
F1 2018-05-16 18:43:47,287 INFO Number of training epochs 2
F1 2018-05-16 18:43:47,288 INFO Minibatch size 16
F1 2018-05-16 18:43:47,291 INFO azureml.vision:Fit starting in experiment  284121010 
F1 2018-05-16 18:43:47,295 INFO azureml.vision:model starting training for scenario=classification 
<class 'int'>
1 worker
Training transfer learning model for 2 epochs (epoch_size = 66787).
non-distributed mode
Training 23516228 parameters in 161 parameter tensors.
Training 23516228 parameters in 161 parameter tensors.
Learning rate per minibatch: 0.01
Momentum per minibatch: 0.9
PROGRESS: 0.00%
PROGRESS: 0.00%
 Minibatch[   1- 100]: loss = 0.772148 * 1600, metric = 28.06% * 1600;
 Minibatch[ 101- 200]: loss = 0.424842 * 1600, metric = 13.50% * 1600;
PROGRESS: 0.00%
 Minibatch[ 201- 300]: loss = 0.329084 * 1600, metric = 10.69% * 1600;
PROGRESS: 0.00%
 Minibatch[ 301- 400]: loss = 0.260898 * 1600, metric = 8.62% * 1600;
 Minibatch[ 401- 500]: loss = 0.286111 * 1600, metric

###  Log hyperparameters

In [60]:
from azureml.logging import get_azureml_logger
logger = get_azureml_logger()
logger.log("step","training")
logger.log("number of epochs",num_epochs)
logger.log("L2 reg weight",l2_reg_weight)
logger.log("mini_batch size",mb_size)
logger.log("learning rate",lr_per_mb)
logger.log("learning rate",lr_per_mb,mb_size)

<azureml.logging.script_run_request.ScriptRunRequest at 0x2695be4e2b0>

### Evaluate model on evaluation set

In [61]:
# Set if want to evaluate accuracy on e.g. testSet or trainSet
testMinibatchSize = 64

ce = ClassificationEvaluation(mymodel, evalSet, minibatch_size = testMinibatchSize)

# Compute Accuracy, confusion matrix, and pr curve
acc = ce.compute_accuracy()
print("Accuracy = {}".format(acc))
cm  = ce.compute_confusion_matrix()
print("Confusion matrix = {}".format(cm))
precisions, recalls, thresholds = ce.compute_precision_recall_curve() 


print("len(precisions) = {}, min = {}, max = {}".format(len(precisions), min(precisions), max(precisions)))
print("len(recalls) = {}, min = {}, max = {}".format(len(recalls), min(recalls), max(recalls)))
print("len(thresholds) = " + str(len(thresholds)))

# Get classification on evaluation dataset

scores_val = ce.scores
logger.log("step","training")
logger.log("accuracy",acc)
logger.log("max precisions", max(precisions))
logger.log("recall",recalls)
logger.log("thresholds",thresholds)

F1 2018-05-16 18:54:15,286 INFO azureml.vision:evaluation doing evaluation for scenario=classification 
F1 2018-05-16 18:54:15,288 INFO azureml.vision:model scoring dataset for scenario=classification 
F1 2018-05-16 18:54:46,470 INFO Accuracy 0.9811343355093729
Accuracy = 0.9811343355093729
Confusion matrix = [[7168   39  209   25]
 [  44 2136    9   81]
 [  83    6 1554   80]
 [   3   19   32 5209]]
F1 2018-05-16 18:54:46,539 INFO Precision curve [0.32346617 0.32345306 0.32345933 ... 1.         1.         1.        ]
F1 2018-05-16 18:54:48,029 INFO Recall curve [1.00000000e+00 9.99940109e-01 9.99940109e-01 ... 1.19781997e-04
 5.98909984e-05 0.00000000e+00]
len(precisions) = 50597, min = 0.3234530590104227, max = 1.0
len(recalls) = 50597, min = 0.0, max = 1.0
len(thresholds) = 50596


<azureml.logging.script_run_request.ScriptRunRequest at 0x26964e1a2e8>

### Evaluate model on test set

In [62]:
##load test dataset and compare results
testset_dir='C:\\OCT\\OCT2017\\OCT2017\\test'
dataset_test = ClassificationDataset.create_from_dir(dataset_name, testset_dir)
print("Testing Dataset consists of {} images.".format(len(dataset_test.images)))
print("Information for test image 2:\n {}".format(dataset_test.labels))

F1 2018-05-16 18:54:52,651 INFO Dataset name CNV
F1 2018-05-16 18:54:52,653 INFO Dataset location C:\OCT\OCT2017\OCT2017\test
F1 2018-05-16 18:54:52,656 INFO azureml.vision:dataset creating dataset for scenario=classification 
Testing Dataset consists of 1000 images.
Information for test image 2:
 [<Label(name = CNV)>, <Label(name = DME)>, <Label(name = DRUSEN)>, <Label(name = NORMAL)>]


In [63]:
ce = ClassificationEvaluation(mymodel, dataset_test, minibatch_size = testMinibatchSize, add_softmax=True)
acc = ce.compute_accuracy()
print("Accuracy = {}".format(acc))
cm  = ce.compute_confusion_matrix()
print("Confusion matrix = {}".format(cm))
precisions, recalls, thresholds = ce.compute_precision_recall_curve() 


F1 2018-05-16 18:54:54,092 INFO azureml.vision:evaluation doing evaluation for scenario=classification 
F1 2018-05-16 18:54:54,094 INFO azureml.vision:model scoring dataset for scenario=classification 
F1 2018-05-16 18:54:57,824 INFO Accuracy 0.9984999999999999
Accuracy = 0.9984999999999999
Confusion matrix = [[250   0   0   0]
 [  1 249   0   0]
 [  2   0 248   0]
 [  0   0   0 250]]
F1 2018-05-16 18:54:57,834 INFO Precision curve [0.98522167 0.9852071  0.98617966 ... 1.         1.         1.        ]
F1 2018-05-16 18:54:57,865 INFO Recall curve [1.    0.999 0.999 ... 0.002 0.001 0.   ]


In [64]:
ce.classes

{'CNV', 'DME', 'DRUSEN', 'NORMAL'}

### Log result

In [65]:
scores_val = ce.scores
logger.log("step","testing")
logger.log("accuracy",acc)
logger.log("max precisions", max(precisions))
logger.log("recall",recalls)
logger.log("thresholds",thresholds)

<azureml.logging.script_run_request.ScriptRunRequest at 0x269647f5b70>