### 0 Workspace initialization

In [1]:
import os
import azureml.core
from azureml.core import Experiment
from azureml.core import Workspace, Datastore
from dotenv import load_dotenv, find_dotenv

In [2]:
os.chdir('..')

In [3]:
load_dotenv(dotenv_path='.env', override=True)
# %load_ext dotenv
# %dotenv .env

True

##### 0.1 workspace,  experiment and datastore

In [4]:
from azureml.core.authentication import ServicePrincipalAuthentication

data_dir = os.environ['DATA_DIR']

AML_CONTAINER_REGISTRY_SERVER = os.environ['AML_CONTAINER_REGISTRY_SERVER']
AML_CONTAINER_REGISTRY_USR = os.environ['AML_CONTAINER_REGISTRY_USR']
AML_CONTAINER_REGISTRY_PWD = os.environ['AML_CONTAINER_REGISTRY_PWD']

AML_SERVICE_PRINCIPAL_APP_ID = os.environ['AML_SERVICE_PRINCIPAL_APP_ID']
AML_SERVICE_PRINCIPAL_DIR_ID = os.environ["AML_SERVICE_PRINCIPAL_DIR_ID"]
AML_SERVICE_PRINCIPAL_PWD = os.environ['AML_SERVICE_PRINCIPAL_PWD']

In [5]:
svc_pr = ServicePrincipalAuthentication(
    tenant_id=AML_SERVICE_PRINCIPAL_DIR_ID,
    service_principal_id=AML_SERVICE_PRINCIPAL_APP_ID,
    service_principal_password=AML_SERVICE_PRINCIPAL_PWD)

# get Workspace
# ws = Workspace.from_config(path = os.path.join('aml_config.json'))

ws = Workspace(
    subscription_id="96a9ec41-928f-4f69-9cb4-0a6c396f6ab2",
    resource_group="azure-ml",
    workspace_name="azure-ml",
    auth=svc_pr
)

exp = Experiment(workspace=ws, name='rossmann')
ds = Datastore.get(ws, datastore_name='workspaceblobstore')

In [9]:
print(ws.name, ws.location, ws.resource_group, ws.subscription_id, sep='\t')
print('\ndatastores\n', ws.datastores)
# print('\nselected datastore\n', ds.name, ds.datastore_type, ds.account_name, ds.container_name, sep='\n')

azure-ml	westeurope	azure-ml	96a9ec41-928f-4f69-9cb4-0a6c396f6ab2

datastores
 {'workspaceblobstore': <azureml.data.azure_storage_datastore.AzureBlobDatastore object at 0x114af1990>, 'workspacefilestore': <azureml.data.azure_storage_datastore.AzureFileDatastore object at 0x114b252d0>}


##### 0.2 upload data

In [None]:
# ds = ws.get_default_datastore()
ds = Datastore.get(ws, datastore_name='workspaceblobstore')

ds.upload(src_dir=data_dir, target_path='rossmann-store-sales/source', overwrite=True, show_progress=True)

##### 0.3 look at available cpus and create a compute instance if necessary

In [10]:
ws.compute_targets

{}

In [12]:
from azureml.core.compute import ComputeTarget, AmlCompute

supported_vms = AmlCompute.supported_vmsizes(ws)
supported_vms = list(filter(lambda x: x['gpus']==0, supported_vms))
supported_vms = sorted(supported_vms, key=lambda x: x['vCPUs'])

In [14]:
supported_vms[:3]

[{'name': 'Standard_D1_v2',
  'vCPUs': 1,
  'gpus': 0,
  'memoryGB': 3.5,
  'maxResourceVolumeMB': 51200},
 {'name': 'Standard_DS1_v2',
  'vCPUs': 1,
  'gpus': 0,
  'memoryGB': 3.5,
  'maxResourceVolumeMB': 7168},
 {'name': 'Standard_D1',
  'vCPUs': 1,
  'gpus': 0,
  'memoryGB': 3.5,
  'maxResourceVolumeMB': 51200}]

In [21]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cpu_cluster_name = "mini-cpu-cluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D1',
                                                           min_nodes=0,
                                                           max_nodes=4, 
                                                           idle_seconds_before_scaledown=240)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

Creating
Succeeded
AmlCompute wait for completion finished
Minimum number of nodes requested have been provisioned


### 1 Train localy

##### 1.1 local run; use locally stored data

- the argument `--n_stores` is used only if we want to apply the model to a subset of the data (only for testing purposes)   

In [32]:
from azureml.core.runconfig import RunConfiguration
from azureml.core import ScriptRunConfig

run_config = RunConfiguration()
run_config.environment.python.user_managed_dependencies = True
run_config.environment.python.interpreter_path = os.environ['PYTHON_INTERPRETER_PATH']

script_run_config = ScriptRunConfig(source_directory='./', 
                                    script='./train/model_01.py',
                                    arguments=['--data_dir', data_dir, 
                                               '--max_pdq', '6', '1', '4',
                                               '--n_stores', '4'],
                                    run_config=run_config)


run = exp.submit(script_run_config)
run.log('comment', 'test local run with locally stored data')
# run.log('datastore_name', ds.name)
# run.log('path_on_datastore',path_on_datastore)
# run.wait_for_completion()
run.complete()

##### 1.2 local run; remotely stored data

In [33]:
from azureml.core.runconfig import RunConfiguration
from azureml.core import ScriptRunConfig
from azureml.core.runconfig import DataReferenceConfiguration

data_dir='rossmann-store-sales/source'

run_config = RunConfiguration()
run_config.environment.python.user_managed_dependencies = True
run_config.environment.python.interpreter_path = os.environ['PYTHON_INTERPRETER_PATH']

dr = DataReferenceConfiguration(datastore_name=ds.name, 
                                path_on_datastore=data_dir, 
                                mode='download',
                                overwrite=True)
run_config.data_references = {ds.name: dr}

script_run_config = ScriptRunConfig(source_directory='./', 
                                    script='./train/model_01.py',
                                    arguments=['--data_dir', str(ds.as_download()), 
                                               '--max_pdq', '6', '1', '4',
                                               '--n_stores', '4'],
                                    run_config=run_config)



run = exp.submit(script_run_config)
run.log('comment', 'test local run with remotely stored data')
run.log('datastore_name', ds.name)
run.log('path_on_datastore', data_dir)
run.complete()

##### 1.5 local run; use conda environment (autoprepare)

In [None]:
from azureml.core.runconfig import RunConfiguration, DataReferenceConfiguration
from azureml.core import ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies

data_dir='rossmann-store-sales/source'

run_config = RunConfiguration()
run_config.environment.python.user_managed_dependencies = False
run_config.auto_prepare_environment = True


# cd = CondaDependencies.create(python_version='3.7.3',
#                               pip_packages=['pandas', 'numpy', 'scikit-learn', 'azureml-sdk'])
cd = CondaDependencies(conda_dependencies_file_path='environment.yml')

run_config.environment.python.conda_dependencies = cd

dr = DataReferenceConfiguration(datastore_name=ds.name, 
                                path_on_datastore=data_dir, 
                                mode='download',
                                overwrite=True)

run_config.data_references = {ds.name: dr}

script_run_config = ScriptRunConfig(source_directory='./', 
                                    script='./train/model_01.py',
                                    arguments=['--data_dir', str(ds.as_download()), 
                                               '--max_pdq', '6', '1', '4',
                                               '--n_stores', '4'],
                                    run_config=run_config)

run = exp.submit(script_run_config)
run.log('comment', 'test local run with remotely stored data; use conda environment (autoprepare)')
run.complete()

##### 1.10 local run; Docker-based execution (image from Azure registry with user defined Conda env)

```
FROM continuumio/miniconda3:4.7.12

# replace dockers shell used by run to bash such that 'source activate' works
RUN ln -fs /bin/bash /bin/sh

RUN mkdir -p opt/rossmann
COPY requirements.txt opt/rossmann/
ENV PYTHONPATH=/home/rossmann

RUN conda create -n rossmann python=3.7 --yes

RUN source activate rossmann && \
    pip install -r opt/rossmann/requirements.txt \
    && source deactivate

```

In [9]:
from azureml.core.runconfig import RunConfiguration, DataReferenceConfiguration
from azureml.core import ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies

In [None]:
data_dir='rossmann-store-sales/source'

run_config = RunConfiguration(framework="Python")

run_config.environment.python.user_managed_dependencies = False
run_config.auto_prepare_environment = False
run_config.environment.docker.enabled = True

cd = CondaDependencies(conda_dependencies_file_path='environment.yml')

run_config.environment.python.conda_dependencies = cd

dr = DataReferenceConfiguration(datastore_name=ds.name, 
                                path_on_datastore=data_dir, 
                                mode='download',
                                overwrite=True)

run_config.data_references = {ds.name: dr}

script_run_config = ScriptRunConfig(source_directory='./', 
                                    script='./train/model_01.py',
                                    arguments=['--data_dir', str(ds.as_download()), 
                                               '--max_pdq', '6', '1', '4',
                                               '--n_stores', '4'],
                                    run_config=run_config)

run = exp.submit(script_run_config)
run.log('comment', 'test local run with remotely stored data; use conda environment (autoprepare)')
run.complete()

### 2 Train on a remote machine

In [83]:
elements = dir(run_config)
for el in elements:
    if el.startswith('_') is False:
        print(el)

amlcompute
arguments
auto_prepare_environment
communicator
data
data_references
delete
environment
framework
hdi
history
load
max_run_duration_seconds
mpi
node_count
save
script
source_directory_data_store
spark
target
tensorflow


##### 2.1 Use the Azure standard docker image; auto prepare env

In [None]:
from azureml.core import Environment, ScriptRunConfig
from azureml.core.runconfig import RunConfiguration, DataReferenceConfiguration, DEFAULT_CPU_IMAGE
from azureml.core.conda_dependencies import CondaDependencies

data_dir='rossmann-store-sales/source'

run_config = RunConfiguration("Python")  # default arg anyway

# target
run_config.target = cpu_cluster.name  # check if you have already created a cpu cluster and use its name

# environment 
run_config.environment.python.user_managed_dependencies = False  # default value
                                                                 # In this case a conda env will be build on the image
                                                                 # set to True only if we want to use an env already present on the image.
run_config.environment = Environment.from_pip_requirements('rossmann', 'requirements.txt')
run_config.environment.python.conda_dependencies.set_python_version('3.7.5')

run_config.environment.docker.enabled = True
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE


# data_references
dr = DataReferenceConfiguration(datastore_name=ds.name, 
                                path_on_datastore=data_dir, 
                                mode='mount',
                                overwrite=True)
run_config.data_references = {ds.name: dr}


# overall config
script_run_config = ScriptRunConfig(source_directory='./', 
                                    script='./train/model_01.py',
                                    arguments=['--data_dir', str(ds.as_download()), 
                                               '--max_pdq', '6', '1', '4',
                                               '--n_stores', '4'],
                                    run_config=run_config)

run = exp.submit(script_run_config)
run.log('comment', 'Run-based remote instance; Env from pip; data mounted from azure blob storage 2')
run.wait_for_completion(show_output=True)
run.complete()

##### 2.2 Use user defined  Docker image; Azure cpu

In [None]:
from azureml.core import Environment, ScriptRunConfig
from azureml.core.runconfig import RunConfiguration, DataReferenceConfiguration, DEFAULT_CPU_IMAGE
from azureml.core.conda_dependencies import CondaDependencies

data_dir='rossmann-store-sales/source'

run_config = RunConfiguration("Python")

# target
run_config.target = cpu_cluster.name  # check if you have already created a cpu cluster and use its name

# environment 
run_config.environment.python.user_managed_dependencies = True  # set to True only if we want to use an env already present on the image.
run_config.environment.python.interpreter_path = '/opt/conda/envs/rossmann/bin/python'

run_config.environment.docker.enabled = True
run_config.environment.docker.base_image = "imscientist/rossmann_img:0.2"
run_config.environment.docker.base_image_registry.address = "registry.hub.docker.com"
run_config.environment.docker.base_image_registry.username = "imscientist"
run_config.environment.docker.base_image_registry.password = os.environ["DOCKER_HUB_PWD"]


dr = DataReferenceConfiguration(datastore_name=ds.name, 
                                path_on_datastore=data_dir, 
                                mode='mount',
                                overwrite=True)
run_config.data_references = {ds.name: dr}


script_run_config = ScriptRunConfig(source_directory='./', 
                                    script='./train/train_03.py',
                                    arguments=['--data_dir', str(ds.as_download()), 
                                               '--num_boost_round', '3000',
                                               '--early_stopping_rounds', '200'],
                                    run_config=run_config)

run = exp.submit(script_run_config)
run.log('comment', 'model 3 (xgboost), eta=0.03 num_boost_round=3000 max_depth=10')
run.wait_for_completion(show_output=True)
run.complete()