In [None]:
import os
import azureml.core
import pandas as pd
from azureml.core.runconfig import JarLibrary
from azureml.core.compute import ComputeTarget, DatabricksCompute
from azureml.exceptions import ComputeTargetException
from azureml.core.datastore import Datastore
from azureml.data.data_reference import DataReference
from azureml.core.databricks import PyPiLibrary

from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.core import Workspace, Environment, Experiment, Datastore, Dataset, ScriptRunConfig
from azureml.pipeline.core import Pipeline, PipelineData, TrainingOutput
from azureml.pipeline.steps import DatabricksStep, PythonScriptStep 
from azureml.train.hyperdrive import choice, loguniform

from sklearn.model_selection import train_test_split
from azureml.train.automl import AutoMLConfig

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)


In [None]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')


In [None]:
from sklearn import preprocessing
from transformers import TrainingArguments, Trainer, AutoTokenizer


def get_encode_labels(pdf, text_field_name):
    le = preprocessing.LabelEncoder()

    le.fit(list(pdf[text_field_name].unique()))
    
    return le

def adjust_tokenizer(model, tokenizer, new_tokens):
    tokenizer.add_tokens(new_tokens)
    model.resize_token_embeddings(len(tokenizer))

    return model, tokenizer

def tokenize_function(example, text_field_name, tokenizer):
    tokenized_batch = tokenizer(example[text_field_name], truncation=True)
    return tokenized_batch

def generate_tokenized_dataset(pdf, fields, le, target_name, text_field_name, tokenizer):
    from datasets import Dataset

    pdf['labels'] = le.transform(pdf[target_name])
    # pdf['labels'] = pdf[target_name]
    
    ds = Dataset.from_pandas(pdf[fields].dropna())
    
    tokenized_dataset = ds.map(tokenize_function, batched=True, fn_kwargs={"text_field_name": text_field_name, "tokenizer": tokenizer})
    return ds, tokenized_dataset

In [None]:

from azureml.core import Workspace, Experiment, Environment, Model, Dataset, Run

from azureml.train.automl.run import AutoMLRun


run = ws.get_run(run_id="1047e487-166b-4d65-b0ca-9c03c57dad26")
exp = run.experiment

print('Run {} will be used'.format(run))
print('Experiment {} will be used'.format(exp))

parent_id = run.parent.id
pipeline_run = ws.get_run(parent_id)

print('pipeline_run -- {} will be used'.format(pipeline_run))

automl_run_found = next(r for r in pipeline_run.get_children(recursive=True) if r.name == 'AutoML_Classification')
print('AutoML-- {}'.format(automl_run_found))

automl_run = AutoMLRun(exp, run_id = automl_run_found.id)

lookup_metric = "AUC_weighted"
best_run, fitted_model = automl_run.get_output(metric=lookup_metric)
print(best_run)
print(fitted_model.tokenizer)

print('Tokenizing')
test_dataset = Dataset.get_by_name(workspace=exp.workspace, name='test_set')
pdf_test = test_dataset.to_pandas_dataframe()
print('Test Dataset {}'.format(pdf_test))

target_name="sentiment"
text_field_name="reviewText"

new_tokens = []
num_labels = len(pdf_test[target_name].unique())
print(f'num_labels: {num_labels}')

le = get_encode_labels(pdf_test, target_name)
fields = [text_field_name, target_name, 'labels']
print(f'le: {le}')
print(f'fields: {fields}')

test_ds, tokenized_test_ds = generate_tokenized_dataset(pdf_test, fields, le, target_name, text_field_name, fitted_model.tokenizer)

print('Tokenized data is generated')

print('Predictions started')
test_predictions = fitted_model.predict(pdf_test)
print(f'predictions: {test_predictions}')

In [None]:
from datasets import  load_metric
import numpy as np
import pandas as pd
from datasets import Dataset

target_name="sentiment"
# Retrieve reference labels from test set
test_references = test_ds[target_name]

# Compute AUC_weighted

metric = load_metric("roc_auc", average='weighted')
final_score = metric.compute(prediction_scores=test_predictions, references=test_references)

print(f'Metrics:', final_score["roc_auc"])
run.log(f'test_AUC_weighted', f'{final_score["roc_auc"]}')  

In [None]:
from azureml.core import Workspace, Experiment, Environment, Model, Dataset, Run

from azureml.train.automl.run import AutoMLRun


run = ws.get_run(run_id="1047e487-166b-4d65-b0ca-9c03c57dad26")
exp = run.experiment

print('Run {} will be used'.format(run))
print('Experiment {} will be used'.format(exp))

parent_id = run.parent.id
pipeline_run = ws.get_run(parent_id)

print('pipeline_run -- {} will be used'.format(pipeline_run))

counter = 0


all_runs = pipeline_run.get_children(recursive=True)
dic_runs = {}

print('all runs {} '.format(all_runs))

for i, runstep in enumerate(all_runs):
        metrics = runstep.get_metrics()
        # print('RunStep: {}  '.format(runstep))
        if "test_AUC_weighted" in metrics:
            dic_runs[runstep.id] = {
                'run': runstep,
                'metrics': metrics
            }
            print('Adding step {} and metrics: {}  '.format(runstep,metrics))
        counter+=1
print(f'len(dic_runs) = {len(dic_runs)}')


In [None]:
li_test_values = []
best_performing_run = None
metric_name = "test_AUC_weighted"

for run_id in dic_runs:
    print('run_id', run_id)
    test_metric = dic_runs[run_id]['metrics'][metric_name]
    if (type(test_metric) == list):
        test_metric = float(test_metric[0])
    else:
        test_metric = float(test_metric)
    print(f'{metric_name} = {test_metric}')
    
    if len(li_test_values) == 0 or (len(li_test_values) > 0 and test_metric > max(li_test_values)):
        # if temporal_test_date == None:
        best_performing_run = dic_runs[run_id]

    # if temporal_test_date == None:
    li_test_values.append(test_metric)

In [None]:
from azureml.core import Workspace, Experiment, Environment, Model, Dataset, Run

from azureml.train.automl.run import AutoMLRun


run = ws.get_run(run_id="1047e487-166b-4d65-b0ca-9c03c57dad26")
exp = run.experiment

print('Run {} will be used'.format(run))
print('Run name {} will be used'.format(run.name))

parent_id = run.parent.id
pipeline_run = ws.get_run(parent_id)

all_runs = pipeline_run.get_children(recursive=True)


print('all runs {} '.format(all_runs))



In [None]:
from azureml.core import Workspace, Experiment, Environment, Model, Dataset, Run

from azureml.train.automl.run import AutoMLRun
from azureml.core.resource_configuration import ResourceConfiguration
from shutil import copy2


ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

best_run = ws.get_run(run_id="8123b373-38cb-4f14-b1ce-78e7d9aca346")
exp = best_run.experiment



print('Run {} will be used'.format(best_run))
print('Run name {} will be used'.format(best_run.name))
print('Pipeline Run {} will be used'.format(best_run.parent))

pipeline_run = best_run.parent
model_output = pipeline_run.get_pipeline_output("model_output")
print("Pipeline Data ==============: {}".format(model_output._path_on_datastore))

model_output_dir = './model/'

os.makedirs(model_output_dir, exist_ok=True)
model_output.download(model_output_dir)


model = Model.register(workspace=ws, 
                       model_name="test_model",        
                        model_path=model_output_dir)

                        

#ds_train, ds_val, ds_test = find_run_datasets(best_run)  

In [None]:
import argparse
import json
import os
import joblib
import shutil
import os
import sys
import shutil
import pickle
import pandas as pd
import numpy as np
from azureml.core.resource_configuration import ResourceConfiguration

pdf_train = ds_train.to_pandas_dataframe()
print(f"Train dataset name: {ds_train.name}, V:{ds_train.version}")
best_run.parent

# Download Best Model
dir = f'output'

isdir = os.path.isdir(dir)
if isdir:
    shutil.rmtree(dir)
    
model_directory = f'{dir}/outputs/model'
os.makedirs(model_directory,exist_ok=True)

best_run.download_files(prefix="outputs/model", output_directory=dir, timeout_seconds=6000)

print(f'the output path: [{model_directory}]')
shutil.copy('score.py', model_directory)

num_labels = len(pdf_train["reviewText"].unique())
print(f'Number of labels: [{num_labels}]')

li_target = list(pdf_train["reviewText"].unique())
with open(f"{model_directory}/target_list.json", "wb") as outfile:
    pickle.dump(li_target, outfile)

# Register the Model
tags = {
    'run_id': best_run.id,
   
}

model = Model.register(workspace=ws, 
                    datasets=[('train dataset', ds_train),
                                ('val dataset', ds_val)
                                ], 
                    tags=tags,
                    model_name="test_model", 
                    resource_configuration=ResourceConfiguration(cpu=2, memory_in_gb=1),
                    model_path=model_directory)

In [None]:
from azureml.core import Workspace, Experiment, Environment, Model, Dataset, Run

from azureml.train.automl.run import AutoMLRun
from azureml.core.resource_configuration import ResourceConfiguration
from shutil import copy2
from azureml.core.runconfig import RunConfiguration

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.pipeline.core import Pipeline, PipelineData, TrainingOutput, PipelineRun
from azureml.pipeline.steps import  PythonScriptStep
from azureml.core.conda_dependencies import CondaDependencies

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')


best_run_id= "a08dc10b-abac-4b73-8dc3-c2393373250b"
best_run = ws.get_run(run_id=best_run_id)
best_exp = best_run.experiment
pipeline_run = PipelineRun(best_exp, best_run.parent.id)

saved_model = pipeline_run.get_pipeline_output("model_output")
print(f'Model output: [{saved_model}]')


source_directory = "./project"


# choose a name for your cluster
cpu_compute = ComputeTarget(workspace=ws, name="cpu-cluster")
base_env = Environment.get(workspace=ws, name="AzureML-AutoML-DNN" ) #name="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu")
#env = Environment.get(workspace=ws, name="nlp-accelerator")
env = base_env.clone("automml_dnn")

#env.python.conda_dependencies.add_pip_package("transformers[sentencepiece]==4.6.0")

rcfg = RunConfiguration()
rcfg.environment = env

register_model_step = PythonScriptStep(script_name='test_runs.py',
                                       source_directory=source_directory,
                                       name="Register_Best_Model",
                                       compute_target=cpu_compute,
                                       arguments=[
                                                  '--best_run_id', best_run_id,
                                                  '--saved_model', saved_model
                                                  ],
                                        inputs=[saved_model],          
                                       allow_reuse=True,
                                       runconfig=rcfg)


deploy_model_step = PythonScriptStep(script_name='deploy_model.py',
                                       source_directory=source_directory,
                                       name="Deploy_Latest_Model",
                                       compute_target=cpu_compute,
                                       arguments=['--endpoint-name', 'test-endpoint-en2',
                                                  '--model-name', 'test_model'],
                                       allow_reuse=True,
                                       runconfig=rcfg)

deploy_model_step.run_after(register_model_step)

exp = Experiment(workspace=ws, name='test')
steps = [register_model_step,deploy_model_step]
pipeline = Pipeline(workspace=ws, steps=steps)      
pipeline.submit(exp.name)                          

In [None]:
# create local deployment
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
)
from azure.identity import DefaultAzureCredential, ManagedIdentityCredential

subscription_id = "f9b97038-ed78-4a26-a1a7-51e81e75d867"
resource_group = "openaml"
workspace = "nlp-workspace"

# get a handle to the workspace
ml_client = MLClient(
    ManagedIdentityCredential(), subscription_id, resource_group, workspace
)

# Creating a local endpoint
import datetime

local_endpoint_name = "localautoml-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=local_endpoint_name, description="this is a sample local endpoint"
)
ml_client.online_endpoints.begin_create_or_update(endpoint, local=True)

In [40]:
#from azureml.core.model import Model
#model = Model(ws, 'test_model', version=29)
dir = 'outputs'
#model.download(target_dir=dir, exist_ok=True)

prefix_path = "model"
model_directory = f'{dir}/{prefix_path}'
print(' Get Model object Dir {} '.format(os.listdir(model_directory)))

local_model = Model(path=model_directory)

env = Environment(
    conda_file="./project/conda_env_automl.yml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    #image="mcr.microsoft.com/azureml/curated/azureml-automl-dnn-text-gpu:56"
)

blue_deployment = ManagedOnlineDeployment(
    name="green",
    endpoint_name=local_endpoint_name,
    model=local_model,
    environment=env,
    code_configuration=CodeConfiguration(
        code=model_directory, scoring_script="score_automl.py"
    ),
    instance_type="Standard_DS2_v2",
    instance_count=1,
)

ml_client.online_deployments.begin_create_or_update(
    deployment=blue_deployment, local=True
)

 Get Model object Dir ['all_results.json', 'conda_env.yml', 'conda_env_v_1_0_0.yml', 'config.json', 'generated_code', 'label_list.npy', 'max_seq_length.npy', 'model.pkl', 'pytorch_model.bin', 'run_id.txt', 'score_automl.py', 'score_script.py', 'scoring_file_v_1_0_0.py', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer_config.json', 'trainer_state.json', 'training_args.bin', 'train_results.json', 'vocab.txt', '__pycache__'] 


Updating local deployment (localautoml-01240249314656 / green) .
Building Docker image from Dockerfile
Step 1/6 : FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest
 ---> 4bd2dec82540
Step 2/6 : RUN mkdir -p /var/azureml-app/
 ---> Using cache
 ---> efbf049c8276
Step 3/6 : WORKDIR /var/azureml-app/
 ---> Using cache
 ---> 625b95615298
Step 4/6 : COPY conda.yml /var/azureml-app/
 ---> Using cache
 ---> 34db19d37c97
Step 5/6 : RUN conda env create -n inf-conda-env --file conda.yml
 ---> Using cache
 ---> 38995073c501
Step 6/6 : CMD ["conda", "run", "--no-capture-output", "-n", "inf-conda-env", "runsvdir", "/var/runit"]
 ---> Using cache
 ---> 845900be7fb6
Successfully built 845900be7fb6
Successfully tagged localautoml-01240249314656:green

Starting up endpoint.....Done (0m 30s)


ManagedOnlineDeployment({'private_network_connection': None, 'data_collector': None, 'provisioning_state': 'Succeeded', 'endpoint_name': 'localautoml-01240249314656', 'type': 'Managed', 'name': 'green', 'description': None, 'tags': {}, 'properties': {}, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/mnt/batch/tasks/shared/LS_root/mounts/clusters/eneros1/code/Users/eneros/nlp-aml-private'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7fb0d48194c0>, 'model': Model({'job_name': None, 'is_anonymous': False, 'auto_increment_version': False, 'name': '77dc4e7c57e60a23fd9c55c5ba09c713', 'description': None, 'tags': {}, 'properties': {}, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/mnt/batch/tasks/shared/LS_root/mounts/clusters/eneros1/code/Users/eneros/nlp-aml-private'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7fb0d4812e50>, 'version': '1', 'latest_version': None, 'pa

In [39]:
ml_client.online_endpoints.get(name=local_endpoint_name, local=True)

ml_client.online_deployments.get_logs(
    name="green", endpoint_name=local_endpoint_name, local=True, lines=50
)

'Azure ML Inferencing HTTP server v0.7.7\r\n\r\n\r\nServer Settings\r\n---------------\r\nEntry Script Name: /var/azureml-app/model/score_automl.py\r\nModel Directory: /var/azureml-app/azureml-models//c0a037710bb50141f7ffce45d2e09660/1\r\nWorker Count: 1\r\nWorker Timeout (seconds): 300\r\nServer Port: 31311\r\nApplication Insights Enabled: false\r\nApplication Insights Key: None\r\nInferencing HTTP server version: azmlinfsrv/0.7.7\r\nCORS for the specified origins: None\r\n\r\n\r\nServer Routes\r\n---------------\r\nLiveness Probe: GET   127.0.0.1:31311/\r\nScore:          POST  127.0.0.1:31311/score\r\n\r\nStarting gunicorn 20.1.0\r\nListening at: http://0.0.0.0:31311 (27)\r\nUsing worker: sync\r\nBooting worker with pid: 87\r\nInitializing logger\r\n2023-01-24 04:27:38,602 | root | INFO | Starting up app insights client\r\nlogging socket was found. logging is available.\r\nlogging socket was found. logging is available.\r\n2023-01-24 04:27:38,606 | root | INFO | Starting up app insi

In [37]:
ml_client.online_endpoints.invoke(
    endpoint_name=local_endpoint_name,
    request_file="./project/sample_request_automl.json",
    local=True,
)

'{"Results": ["1.0", "1.0", "0.0"]}'