In [1]:
!pip install azureml azureml.core azureml.widgets azureml-dataset-runtime transformers azureml.train

Collecting azureml
  Downloading https://files.pythonhosted.org/packages/ab/e8/76cd2cb6784b9039affd2c659eed1b3f46baf2e6b87a10b072a20b5b0113/azureml-0.2.7-py2.py3-none-any.whl
Collecting azureml.core
[?25l  Downloading https://files.pythonhosted.org/packages/bf/56/0fe0576c3efe0cf19aeed7dd64ed412db49522c11134f7ccf50e33ac6281/azureml_core-1.25.0-py3-none-any.whl (2.2MB)
[K     |████████████████████████████████| 2.2MB 12.2MB/s 
[?25hCollecting azureml.widgets
[?25l  Downloading https://files.pythonhosted.org/packages/ff/3e/3187ecbefd9de606003a6a44b160b839a8913c40fb10f8ca44d0faacf86c/azureml_widgets-1.25.0-py3-none-any.whl (14.1MB)
[K     |████████████████████████████████| 14.1MB 281kB/s 
[?25hCollecting azureml-dataset-runtime
  Downloading https://files.pythonhosted.org/packages/7a/7a/deb88d9216c374364362e922a7b3aaa66bff96465f3f9e609137bcbd279b/azureml_dataset_runtime-1.25.0-py3-none-any.whl
Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/ed/d5/f4

In [2]:
# Azure Connection Details
setup = False
azure = "edu"
modelName = "bert-base-uncased" # "openai-gpt" # 'bert-base-uncased'

if azure == "edu":
  subscription_id = "b0cfdf73-f3dd-4fd0-891a-c54130a3d181"
  resource_group = "master"
  workspace_name = "master1"
  computeSize = "small"
  if computeSize == "small":
    computeResource = "master-gpu-12"
  elif computeSize == "big":
    computeResource = "master-gpu-32-1" 

elif azure == "private":
  subscription_id = "93986b83-8c58-4738-abfd-f7d1cbcce9f8"
  resource_group = "master-privat"
  workspace_name = "master-privat-useast"
  computeResource = "cluster-nd6"

repoDir = "/Users/michael/workspaces/MasterThesis"

scriptsPath = os.path.join(repoDir,"scripts")
trainingDataPath = os.path.join(repoDir,"data","embeddings")
condaFile = os.path.join(repoDir,"environment.yaml")

In [3]:
# Establish Connection to Google Drive
#from google.colab import drive
#drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
# Establish Connection to Workspace
from azureml.core import Workspace, Dataset
ws = Workspace(subscription_id, resource_group, workspace_name)
ws

Workspace.create(name='master1', subscription_id='b0cfdf73-f3dd-4fd0-891a-c54130a3d181', resource_group='master')

In [17]:
ds = ws.get_default_datastore()
ds

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-9f12576a-c5bf-4eee-afa1-78d97de413d6",
  "account_name": "master15868529431",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

In [5]:
if setup:
  ds.upload(
      src_dir=trainingDataPath,
      target_path="embeddings",
      overwrite=False,
  )

In [6]:
if setup:
  from transformers import BertForSequenceClassification
  model = BertForSequenceClassification.from_pretrained(
                                      modelName,
                                      num_labels = 9,
                                      output_attentions = False,
                                      output_hidden_states = False)
  localPretrainedModelPath = "/content/model/" + modelName
  model.save_pretrained(localPretrainedModelPath)
  ds.upload(
      src_dir=localPretrainedModelPath,
      target_path=modelName,
      overwrite=False,
  )

In [18]:
from azureml.core import (
    Experiment,
    Environment,
    ScriptRunConfig,
    Dataset,
)
from azureml.widgets import RunDetails

In [19]:
ds = Dataset.File.from_files((ws.get_default_datastore(),
                              "embeddings/"))
bertPretrained = Dataset.File.from_files((ws.get_default_datastore(),
                              modelName +"/"))

In [20]:
env = Environment.from_conda_specification(
    name="master-thesis-env",
    file_path=condaFile,
)

In [36]:
category = "climate"

script_run_config = ScriptRunConfig(
    source_directory=scriptsPath,
    script="08_cnn_train.py",
    compute_target=computeResource,
    environment=env,
    arguments=[
        "--data-path",
        ds.as_mount(),
        "--pretrained-model",
        bertPretrained.as_mount(),
        "--epochs",
        30,
        "--batch-size",
        8,
        "--category",
        category
    ]

)

#        "--learning-rate",  0.003,
#"--big"
#"--learning-rate",  0.003,

In [None]:
runSingle = Experiment(
    workspace=ws, name="Train1"
).submit(script_run_config)
RunDetails(runSingle).show()

In [37]:
from azureml.train.hyperdrive import HyperDriveConfig
from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, uniform, PrimaryMetricGoal

param_sampling = RandomParameterSampling( {
        'learning-rate': uniform(0.0005, 0.005),
    }
)

early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=10)

hd_config = HyperDriveConfig(run_config=script_run_config,
                             hyperparameter_sampling=param_sampling,
                             policy=early_termination_policy,
                             primary_metric_name="accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=50,
                             max_concurrent_runs=1)

run = Experiment(workspace=ws, name="HyperParameter_"+category).submit(hd_config)
RunDetails(run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…