In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr'

%load_ext autoreload
%autoreload 2

In [None]:
import os
from shutil import copy

import azureml.core
from azureml.core import Workspace, Experiment, Datastore, Dataset, ContainerRegistry, ScriptRunConfig, RunConfiguration
from azureml.data.datapath import DataPath
from azureml.data.data_reference import DataReference

In [None]:
print('Version of AML: {}'.format(azureml.core.__version__))

# MegaDetector v4 training

https://docs.microsoft.com/en-us/azure/machine-learning/how-to-set-up-training-targets#amlcompute
    

### Provide credentials

Provide the account name and the key to the storage account, and password to the container registry where the base image is.

In [None]:
storage_account_name = os.environ.get('STORAGE_ACCOUNT_NAME')
storage_account_key = os.environ.get('STORAGE_ACCOUNT_KEY')
registry_pw = os.environ.get('REGISTRY_PASSWORD')

### Connect to the AML workspace

In [None]:
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')

In [None]:
compute_target = ws.compute_targets['gpu-nc6-v3']

In [None]:
type(compute_target)

### Connect to datastore

In [None]:
input_datastore_name = 'tfrecords'
input_container_name = 'megadetectorv4'

output_datastore_name = 'artifacts'
output_container_name = 'megadetectorv4-artifacts'

input_datastore = None
output_datastore = None
for name, ds in ws.datastores.items():
    if name == input_datastore_name:
        input_datastore = ds
    if name == output_datastore_name:
        output_datastore = ds
        
if input_datastore is None:
    'Input datastore {} is not in the workspace; registering it...'.format(input_datastore_name)
    input_datastore = Datastore.register_azure_blob_container(workspace=ws, 
                                             datastore_name=input_datastore_name, 
                                             container_name=input_container_name,
                                             account_name=storage_account_name, 
                                             account_key=storage_account_key,
                                             create_if_not_exists=True)

if output_datastore is None:
    'Output datastore {} is not in the workspace; reigstering it...'.format(output_datastore_name)
    output_datastore = Datastore.register_azure_blob_container(workspace=ws, 
                                             datastore_name=output_datastore_name, 
                                             container_name=output_container_name,
                                             account_name=storage_account_name, 
                                             account_key=storage_account_key,
                                             create_if_not_exists=True)

print(input_datastore)
print(output_datastore)

In [None]:
input_data_ref = DataReference(datastore=input_datastore,
                               data_reference_name=input_datastore_name,
                               mode='mount')
output_data_ref = DataReference(datastore=output_datastore,
                               data_reference_name=output_datastore_name,
                               mode='mount')

input_data_ref
output_data_ref

In [None]:
str(output_data_ref)

In [None]:
input_dataset = Dataset.File.from_files(path=DataPath(datastore=input_datastore))

In [None]:
input_dataset.as_named_input('tfrecords').as_mount('/tmp')

### Environment setup

In [None]:
container_registry = ContainerRegistry()
container_registry.address = 'yasiyu.azurecr.io'
container_registry.username = 'yasiyu'
container_registry.password = registry_pw

In [None]:
run_config = RunConfiguration()
run_config.environment.docker.enabled = True
run_config.environment.docker.base_image='tfodapi112:190905'
run_config.environment.docker.base_image_registry=container_registry

# GPU support: Azure automatically detects and uses the NVIDIA Docker extension when it is available.

run_config.environment.python.user_managed_dependencies=True  # use your own installed packages instead of an AML created Conda env

run_config.target = compute_target # specify the compute target; obscure error message: `docker image` cannot run

### Experiment

In [None]:
os.getcwd()

In [None]:
experiment_name = 'mdv4_trial'

exp_folder = 'experiments/megadetector_v4/200203'

config_file_name = 'pipeline_baseline.config'

tags = {
    'learning_rate': str(0.003),
    'input_set': 'mdv4boxes',
    'train_on': 'train',
    'val_on': 'val'
}

In [None]:
exp = Experiment(workspace=ws, name=experiment_name)

Copy the entry script of TFODAPI to the `source_directory`, which also contains the `pipeline.config`

In [None]:
copy('model_main.py', exp_folder)

### Run configuration

In the pipeline config file, need to replace the environment variables

$AZUREML_DATAREFERENCE_artifacts

$AZUREML_DATAREFERENCE_tfrecords

In [None]:
config = ScriptRunConfig(
    source_directory=exp_folder,
    script='try.py',
    arguments=[
        '--input_dir', str(input_data_ref)
    ],
    run_config=run_config
)

In [None]:
run = exp.submit(config, tags=tags)

In [None]:
run.status

In [None]:
config = ScriptRunConfig(
    source_directory=exp_folder,
    script='model_main.py',
    arguments=[
        '--model_dir', './outputs',
        '--pipeline_config_path', str(config_file_name),
        '--sample_1_of_n_eval_examples', 10
    ],
    run_config=run_config
)

In [None]:
run = exp.submit(config, tags=tags)

In [None]:
run.status

In [None]:
run.get_details()