In [1]:
%matplotlib inline
import numpy as np
import os
import matplotlib.pyplot as plt


In [2]:
import azureml
import mlflow
from azureml.core import Workspace, Dataset, Environment

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)
print("MLflow version:", mlflow.version.VERSION)


Azure ML SDK Version:  1.44.0
MLflow version: 1.26.1


In [3]:
ws = Workspace.from_config()
# mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep='\n')


Workspace name: scsccps-dsai-aide-dev-mlw
Azure region: canadacentral
Subscription id: 105efa68-0ff4-486f-ae3a-86e28a447237
Resource group: scsc-dsai-aide-dev-rg


In [4]:
from azureml.core import Experiment

script_folder = '../project'
# os.makedirs(script_folder, exist_ok=True)

exp = Experiment(workspace=ws, name='transformer_hp')
# mlflow.set_experiment('transformer_hp')

In [5]:
ds_train = Dataset.get_by_name(ws, name="owner_g_classfication_train", version=21)
ds_val = Dataset.get_by_name(ws, name="owner_g_classfication_val", version=21)
ds_test = Dataset.get_by_name(ws, name="owner_g_classfication_test", version=21)
ds_temporal_test = Dataset.get_by_name(ws, name="owner_g_classfication_temporal_test", version=16)


In [6]:

print(f'{ds_train.tags}: V{ds_train.version}')
print(f'{ds_val.tags}: V{ds_val.version}')
print(f'{ds_test.tags}: V{ds_test.version}')
print(f'{ds_temporal_test.tags}: V{ds_temporal_test.version}')


{'top': '120', 'total_records': '142820', 'period': 'REPORT_MONTH >= 201808', 'ratio': '80%', 'description': 'all records since [201808] but OTHER from [202107]', 'valid_period': '202105', 'logic': 'v4', 'owner groups': "['DC000203', 'DC000130', 'DC000216', 'NW000400', 'DC000132', 'DC000141', 'NW000414', 'NW000403', 'OTHER', 'ENT00005', 'NW000438', 'DC000200', 'EDC00054', 'ITS00313', 'SM000562', 'ITS00374', 'DC000107', 'NW000417', 'DC000111', 'DC000133', 'DC000222', 'ITS00330', 'DC000202', 'NW000429', 'EDC00089', 'ITS00335', 'ITS00380', 'DC000139', 'DC000178', 'SM000537', 'ITS00368', 'ITS00306', 'DC000152', 'NW000433', 'NW000459', 'ITS00343', 'DC000113', 'DC000115', 'NW000489', 'ITS00376', 'ITS00340', 'NW000420', 'ITS00341', 'DC000205', 'DC000117', 'DC000210', 'NW000404', 'ITS00326', 'DC000119', 'EDC00029', 'ITS00345', 'DC000140', 'NW000453', 'EDC00095', 'DC000127', 'DC000188', 'DC000209', 'NW000405', 'DC000179', 'DC000236', 'ITS00375', 'DC000155', 'DC000191', 'SM000557', 'SM000502', '

In [7]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "NC6s-v3-5-nodes"
compute_target = ComputeTarget(workspace=ws, name=cluster_name)

In [8]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies


In [9]:
env = Environment.get(workspace=ws, name="Transformer-DeBerta")

In [10]:
from azureml.core import ScriptRunConfig

args = ['--target-name', 'target',
        '--text-field', 'TEXT_FINAL',
        '--is-test', 0,
        '--is-local', 0,
        '--evaluation-strategy', "epoch",
        ds_train.as_named_input("train"),
        ds_val.as_named_input("validation"),
        ds_test.as_named_input("test"),
        ds_temporal_test.as_named_input("temporal_test")]

src = ScriptRunConfig(source_directory=script_folder,
                      script='train_transformer.py',
                      arguments=args,
                      compute_target=compute_target,
                      environment=env)


In [17]:
from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive import choice, loguniform

ps = RandomParameterSampling(
    {
        '--base-checkpoint': choice("bert-base-cased"), #, "bert-base-cased"), # , "bert-large-cased", "microsoft/deberta-v3-small", "distilbert-base-uncased", "bert-base-uncased", "allenai/longformer-base-4096"),
        '--batch-size': choice(4),
        '--no-epochs': choice(4),
        '--learning-rate': choice(5e-5, 4.5e-5, 4e-5, 5.5e-5, 6e-5), #, 2.5e-5, 1e-5, 1.5e-5, 3e-5),
        '--warmup-steps': choice(0),
        '--weight-decay': choice(0.0),
        '--adam-beta1': choice(0.9),
        '--adam-beta2': choice(0.999),
        '--adam-epsilon': choice(1e-8)
    }
)


In [18]:
policy = BanditPolicy(evaluation_interval=5, slack_factor=0.1)


In [19]:
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=ps,
                                     policy=policy,
                                     primary_metric_name='eval_f1_weighted',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=20,
                                     max_concurrent_runs=3)


In [20]:
hyperdrive_run = exp.submit(config=hyperdrive_config, tags={"dataset_tags": ds_train.tags})


Converting non-string tag to string: (dataset_tags: {'top': '120', 'total_records': '142820', 'period': 'REPORT_MONTH >= 201808', 'ratio': '80%', 'description': 'all records since [201808] but OTHER from [202107]', 'valid_period': '202105', 'logic': 'v4', 'owner groups': "['DC000203', 'DC000130', 'DC000216', 'NW000400', 'DC000132', 'DC000141', 'NW000414', 'NW000403', 'OTHER', 'ENT00005', 'NW000438', 'DC000200', 'EDC00054', 'ITS00313', 'SM000562', 'ITS00374', 'DC000107', 'NW000417', 'DC000111', 'DC000133', 'DC000222', 'ITS00330', 'DC000202', 'NW000429', 'EDC00089', 'ITS00335', 'ITS00380', 'DC000139', 'DC000178', 'SM000537', 'ITS00368', 'ITS00306', 'DC000152', 'NW000433', 'NW000459', 'ITS00343', 'DC000113', 'DC000115', 'NW000489', 'ITS00376', 'ITS00340', 'NW000420', 'ITS00341', 'DC000205', 'DC000117', 'DC000210', 'NW000404', 'ITS00326', 'DC000119', 'EDC00029', 'ITS00345', 'DC000140', 'NW000453', 'EDC00095', 'DC000127', 'DC000188', 'DC000209', 'NW000405', 'DC000179', 'DC000236', 'ITS00375

In [21]:
hyperdrive_run


Experiment,Id,Type,Status,Details Page,Docs Page
transformer_hp,HD_167b99cc-3e5f-4444-8d1f-3771f91b3221,hyperdrive,Running,Link to Azure Machine Learning studio,Link to Documentation
