In [None]:
import os
import math
from azureml.widgets import RunDetails
from azureml.train.hyperdrive import *
from azureml.train.estimator import Estimator
from azureml.core import Workspace, Datastore, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

#  
#  
# Connect to environment

In [None]:
# Connect to workspace
ws = Workspace.from_config()
print("Workspace:",ws.name,"in region", ws.location)

# Connect to compute cluster
cluster = ComputeTarget(workspace=ws, name="absa-cluster")
print('Compute cluster:', cluster.name)

# Connect to the default datastore
ds = ws.get_default_datastore()
print("Datastore:",ds.name)

# Connect to the experiment
experiment = Experiment(workspace=ws, name='absa_hyperdrive')
print("Experiment:",experiment.name)

#   
#  
# Fine-Tuning the model  with AzureML HyperDrive

In [None]:
# Define the parameters that need to be tuned
param_sampling = RandomParameterSampling({
         '--asp_thresh': choice(range(2,5)),
         '--op_thresh': choice(range(2,5)), 
         '--max_iter': choice(range(2,5))
    })

In [None]:
# Define a termination policy
early_termination_policy = MedianStoppingPolicy(evaluation_interval=1, delay_evaluation=0)

In [None]:
script_params = {
    '--data_folder': ds,
    '--large': 'yes'
}

# Create the estimator
nlp_est = Estimator(source_directory='../scripts',
                   compute_target=cluster,
                   script_params=script_params,
                   environment_variables = {'NLP_ARCHITECT_BE':'CPU'},
                   entry_script='train.py',
                   pip_packages=['git+https://github.com/NervanaSystems/nlp-architect.git@absa',
                                 'spacy==2.1.8']
)

# Create the HyperDriveConfig
hd_config = HyperDriveConfig(estimator=nlp_est,
                            hyperparameter_sampling=param_sampling,
                            #policy=early_termination_policy,
                            primary_metric_name='f1_weighted',
                            primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                            max_total_runs=16,
                            max_concurrent_runs=8)

In [None]:
# Run the estimators by submitting the HyperDriveConfig
hyperdrive_run = experiment.submit(hd_config)

In [None]:
# Monitor the HyperDrive runs
RunDetails(hyperdrive_run).show()

In [None]:
# Select a previous run and show the details
hyperdrive_run = [r for r in experiment.get_runs() if r.id == 'absa_hyperdrive_1580325134380722'][0]
RunDetails(hyperdrive_run).show()

### Find the best model
Once all the runs complete, we can find the run that produced the model with the highest evaluation (METRIC TBD).

In [None]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

print('Best is Run is:',best_run.number,' \n  F1: {0:.5f}'.format(best_run_metrics['f1_weighted']))