# Run Bert2Punc in Azure
This is the script you can run to train the Bert2Punc model.
To see the needed project organization to get to current code to work, see below

Project Organization

------------
    ├── Run Bert2Punc in Azure  <- The current file
    ├── src
    │   ├── data
    │   │   └── processed       <- Data generated from running make_dataset.py
    │   │
    │   └── models
    │       ├── azure_train_model_pl.py     <- The script to run
    │       └── model_pl.py                 <- Model to train
    │
    ├── conda_dependencies.yml  <- To create environment
    │
    └── variables.py    <- To navigate around the different folders

In [None]:
import azureml.core
from azureml.core import Dataset, Workspace, Experiment, ScriptRunConfig, Environment, Run
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.conda_dependencies import CondaDependencies

from azureml.widgets import RunDetails

In [None]:
subscription_id = 'your_subscription_id'
resource_group = 'your_resource_group'
workspace_name = 'your_workspace'

#log-in to your workspace
ws = Workspace(subscription_id, resource_group, workspace_name)

#Alternativly you can run this:
# Load the workspace from the saved config file
#ws = Workspace.from_config() # This automatically looks for a directory .azureml
#print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

## Create a compute cluster
You can either use your current compute instance, or you can create a new compute cluster 

In [None]:
#cluster_name = "gpu-cluster"
cluster_name = 'Bert2Punc'
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                           max_nodes=4,
                                                           min_nodes = 1)

    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

#Alternative to use CPU

# Choose a name for your CPU cluster
#cpu_cluster_name = "Bert2PuncCPU"

# Verify that the cluster does not exist already
#try:
#    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
#    print('Found existing cluster, use it.')
#except ComputeTargetException:
#    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS12_v2',
#                                                           max_nodes=4)
#    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

#cpu_cluster.wait_for_completion(show_output=True)


## Set up environment 
Now you'll need to set up the environment to run the training script in. The needed packages to load is in the ``conda_dependencies`` file

In [None]:
# Create a Python environment for the experiment
env = Environment.from_conda_specification(name='Bert2Punc',file_path='./conda_dependencies.yml')

#define experiment
experiment = Experiment(workspace=ws, name='Bert2Punc_pl')

#submit the script to the environment to run it on the created compute target
config = ScriptRunConfig(source_directory='./src',
                            script='./models/azure_train_model_pl.py',
                            arguments= [
                                '--gpus', 1,
                                '--max_epochs', 2,
                                '--precision', 16
                            ],
                            compute_target="Bert2PuncGPU",
                            environment=env) 

config.run_config.environment = env

run = experiment.submit(config)
RunDetails(run).show()
run.wait_for_completion(show_output=True)