### Download the model weights from original Darket's release

# TODO:
1. remove hardcoded filenames from `train_aml.py` and `train.py`
2. cleanup `model_data` folder to reduce size
3. upload `*.weights` to model registry to reduce size for aml/staging

In [None]:
# setting the model variables
import os
import urllib.request

os.makedirs('model_data', exist_ok=True)

yolo3_weights_filename ='model_data/yolov3-tiny.weights' # remove this from model_data after the keras conversion
yolo3_config = 'keras-yolo3/yolov3-tiny.cfg'
model_url = 'https://pjreddie.com/media/files/yolov3-tiny.weights'

In [None]:
# download the model weights
urllib.request.urlretrieve(model_url, yolo3_weights_filename)

## Install dependent packages

In [None]:
!pip uninstall -y azureml-automl-runtime azureml-train-automl-runtime
!pip install -U keras==2.2.4 tensorflow==1.14.0 tensorflow-gpu==1.14.0 pillow matplotlib h5py==2.10.0 tensorboard azureml-sdk==1.13.0 onnxruntime==1.4.0 onnx==1.7.0 azureml-widgets azureml-tensorboard azureml-opendatasets azureml-mlflow azureml-defaults azureml-contrib-services azureml-contrib-interpret

!pip install -U git+git://github.com/microsoft/onnxconverter-common.git@3451bbffe61a2591a17f4d99a405b48e9ae8e395
!pip install -U git+git://github.com/onnx/keras-onnx.git@ff17787c393e2ce34d43185447d7354525f3ba87

### Convert the Darket model to Keras

In [None]:
yolo3_keras_model = 'model_data/tiny_yolo_weights.h5' # Converted from the Darknet weights. Need to pass this as parameter to train.py

# execute the pre-built conversion script provided in the sample
!python3 keras-yolo3/convert.py $yolo3_config $yolo3_weights_filename $yolo3_keras_model

### Training data
This sample uses the [VOC Pascal dataset](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/#devkit) referred to as _VOCDevkit_.

To generate your own annotation file and class names file.

- One row for one image;  
- Row format: `image_file_path box1 box2 ... boxN`;  
- Box format: `x_min,y_min,x_max,y_max,class_id` (no space).  
- For VOC dataset, try `python voc_annotation.py`  
Here is an example:

        ```
        path/to/img1.jpg 50,100,150,200,0 30,50,200,120,3
        path/to/img2.jpg 120,300,250,600,2
        ...
        ```

In [None]:
# setup the folder for the training dataset

import os

dataset_folder = '/mnt/tmp/'
os.makedirs(dataset_folder, exist_ok=True)

dataset_filename = dataset_folder + 'VOCtrainval2007.tar'
optional_dataset_filename = dataset_folder + 'VOCtest2007.tar'

In [None]:
dataset_url = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar'
urllib.request.urlretrieve(dataset_url, dataset_filename)

In [None]:
optional_dataset_url = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar'
urllib.request.urlretrieve(optional_dataset_url, optional_dataset_filename)

Unpack the dataset and prepare to upload to Datastore in AML.

_This step will take a few minutes._

In [None]:
! tar xf $dataset_filename -C $dataset_folder
! tar xf $optional_dataset_filename -C $dataset_folder

Convert VOC-style dataset to YOLO-style dataset

In [None]:
! python src/voc_annotation.py

Fill in the variables with your config values found on your Azure Portal Workspace.

In [None]:
%%writefile config.json
{
    "workspace_name": "",
    "subscription_id": "",
    "resource_group": "",
    "location": ""
}

In [None]:
#Initialize Workspace 
from azureml.core import Workspace

## existing AML Workspace in config.json
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

def_blob_store = ws.get_default_datastore()

#### OR create a new workspace with the following steps

```
from azureml.core import Workspace

### Change this cell from markdown to code and run this if you need to create a workspace 
### Update the values for your workspace below
ws=Workspace.create(subscription_id="<subscription-id goes here>",
                resource_group="<resource group goes here>",
                name="<name of the AML workspace>",
                location="<location>")
                
ws.write_config()
```

Upload the VOCdevkit to the workspace datastore

In [None]:
def_blob_store.upload(dataset_folder + "/VOCdevkit", target_path="/data/VOCdevkit", show_progress=False)

In [None]:
from azureml.data.data_reference import DataReference
from azureml.core import Dataset

training_dataset = Dataset.File.from_files(path=(def_blob_store, '/data/VOCdevkit'))

### Train the Keras model.

In [None]:
import os
import shutil
import glob

#set the project folder
PROJECT_FOLDER = "./aml/staging"
if os.path.exists(PROJECT_FOLDER):
    shutil.rmtree(PROJECT_FOLDER)

os.makedirs(PROJECT_FOLDER, exist_ok=True)

# copy all pythfrom keras-yolo3 repoject folder
files = glob.glob("keras-yolo3/*.py")
for f in files:
    shutil.copy(f, PROJECT_FOLDER)

# copy all config files to the project folder
files = glob.glob("keras-yolo3/*.cfg")
for f in files:
    shutil.copy(f, PROJECT_FOLDER)

# copy all text files to the project folder
files = glob.glob("keras-yolo3/*.txt")
for f in files:
    shutil.copy(f, PROJECT_FOLDER)

# copy all python scripts to project folder
files = glob.glob("src/*.py")
for f in files:
    shutil.copy(f, PROJECT_FOLDER)

In [None]:
! cp -rf keras-yolo3/model_data $PROJECT_FOLDER # copy the dataset under the project folder

! cp -rf keras-yolo3/yolo3 $PROJECT_FOLDER # copy the model files under the project fold er

In [None]:
# copy the Keras model files to PROJECT FOLDER
files = glob.glob("model_data/*.*")
for f in files:
    shutil.copy(f, PROJECT_FOLDER + '/model_data')

Add the optional packages and setup the training environment

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_GPU_IMAGE

cd = CondaDependencies.create(pip_packages=['keras==2.1.5', 'tensorflow==1.6.0', 'pillow', 'matplotlib', 'h5py', 'tensorboard'], conda_packages=['python=3.6.11'])
myenv = Environment("yolov3")
myenv.python.conda_dependencies = cd
myenv.python.conda_dependencies.add_pip_package("azureml-sdk")
myenv.python.conda_dependencies.add_channel("conda-forge")
myenv.docker.enabled = True
myenv.docker.base_image = DEFAULT_GPU_IMAGE

Setup the training compute

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for my cluster
CLUSTER_NAME="gpu-training"

# Verify that cluster does not exist already
try:
    aml_cluster = AmlCompute(workspace=ws, name=CLUSTER_NAME)
    print("Found existing cluster, use it.")
except ComputeTargetException:
    print("provisioning new compute target")
    compute_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_NC6", max_nodes=8, vm_priority="lowpriority", idle_seconds_before_scaledown=1800
    )
    aml_cluster = ComputeTarget.create(ws, CLUSTER_NAME, compute_config)

aml_cluster.wait_for_completion(show_output=True)


Define the run config for the experiment

In [None]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(
    source_directory=PROJECT_FOLDER,
    script='train_aml.py',
    arguments=["--data", training_dataset.as_named_input('input').as_mount()],
    )

src.run_config.framework = 'python'
src.run_config.target = aml_cluster.name

# Set environment
src.run_config.environment = myenv

Run the training experiment

In [None]:
from azureml.core import Experiment

EXPERIMENT_NAME = "keras-yolo3"

experiment = Experiment(workspace=ws, name=EXPERIMENT_NAME)

run = experiment.submit(config=src)

In [None]:
%%time

run.wait_for_completion(show_output=True)

In [None]:
# register the model in the model registry

my_trained_model = 'tiny_yolov3'

from azureml.core import Model
model = Model(ws, my_trained_model)

# download  trained model
model.download(target_dir='model_data', exist_ok=True)

### Convert the model to ONNX and register in the model registry

In [None]:
trained_model_path="model_data/trained_weights_final.h5" # make sure this name matches the downloaded file is the previous step
test_image="media/000004.jpg"
anchors_path="model_data/tiny_yolo_anchors.txt"
classes_path="model_data/voc_classes.txt"
onnx_model_file="model_data/new_yolo.onnx"

!python ./src/convert2onnx.py \
    --model_path $trained_model_path \
    --test_image $test_image \
    --anchors_path $anchors_path \
    --classes_path $classes_path \
    --model_file_name $onnx_model_file \
    --overwrite

Test this ONNX Model. Refresh the repo to recognize the test image. 

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image

image = Image.open("media/000004.jpg")
image_score = Image.open("media/000004_score.jpg")
fig, ax = plt.subplots(1,2)
ax[0].imshow(image)
ax[1].imshow(image_score)
ax[0].axis('off')
_ = ax[1].axis('off')

In [None]:
# Register the ONNX model in the workspace registry
from azureml.core.model import Model
Model.register(model_path = onnx_model_file, 
                model_name = "TinyYOLO", 
                workspace = ws,
                description="ONNX model converted from trained Keras/Tf")

# CLEANUP!!!

Optionally remove all the files downloaded for this exercise.

```
shutil.rmtree(PROJECT_FOLDER)
shutil.rmtree('/tmp/VOCdevkit')

files = glob.glob("aml")
for f in files:
    os.remove(f)
```

## FINISHED. 
Transition to the ADO to review status of the pipeline.