In [49]:
%matplotlib inline
import numpy as np
import os
import matplotlib.pyplot as plt
import shutil


In [2]:
import azureml
import mlflow
from azureml.core import Workspace, Dataset, Environment

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)
print("MLflow version:", mlflow.version.VERSION)


Azure ML SDK Version:  1.44.0
MLflow version: 1.26.1


In [3]:
ws = Workspace.from_config()
# mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep='\n')


Workspace name: scsccps-dsai-aide-dev-mlw
Azure region: canadacentral
Subscription id: 105efa68-0ff4-486f-ae3a-86e28a447237
Resource group: scsc-dsai-aide-dev-rg


In [4]:
from azureml.core import Experiment

# script_folder = './project'
# os.makedirs(script_folder, exist_ok=True)

exp = Experiment(workspace=ws, name='transformer_hp')
# mlflow.set_experiment('transformer_hp')

In [5]:
counter = 0
best_temporal_f1_weighted = 0.0
all_runs = exp.get_runs(include_children=True)
dic_runs = {}
test = False
run_id = 'HD_03ef2102-1cf6-49a7-84a7-140720e57834_2'

for i, run in enumerate(all_runs):
    if test:
        if run_id in run.id:
            metrics = run.get_metrics()
            dic_runs[run.id] = {
                'run': run,
                'metrics': metrics
            }
            break
        else:
            continue
    else:
        metrics = run.get_metrics()
        if 'temporal_test_f1_weighted' in metrics:
            dic_runs[run.id] = {
                'run': run,
                'metrics': metrics
            }
        counter+=1
    
counter

384

In [12]:
temporal_test_f1_weighted = dic_runs['HD_03ef2102-1cf6-49a7-84a7-140720e57834_2']['metrics'][metric_name]

if (type(temporal_test_f1_weighted) == list):
        temporal_test_f1_weighted = float(temporal_test_f1_weighted[0])

temporal_test_f1_weighted

0.6946529747604724

In [26]:
# dic_runs['HD_bc6f1629-7b96-45fc-9858-b4c053fdfbe0_5']['run'].get_details()

In [32]:

metric_name = "temporal_test_f1_weighted" # "temporal_test_f1_weighted"
second_metric = "temporal_test_f1"
temporal_test_date = '202207'
li_test_values = []
best_performing_run = None

for run_id in dic_runs:
    if 'HD_e8d5cb0f-6d51-4837-bef6-be3e8072eb38' in run_id: # or 'HD_432106ac-4483-44ef-94bc-ba7399357160' in run_id or 'transformer_hp_1659502191570' in run_id:
        continue

    temporal_test_f1_weighted = dic_runs[run_id]['metrics'][metric_name]
    if (type(temporal_test_f1_weighted) == list):
        temporal_test_f1_weighted = float(temporal_test_f1_weighted[0])
    else:
        temporal_test_f1_weighted = float(temporal_test_f1_weighted)

    if len(li_test_values) > 0 and temporal_test_f1_weighted > max(li_test_values):
        if temporal_test_date == None:
            best_performing_run = dic_runs[run_id]

        if temporal_test_date:
            dataset = dic_runs[run_id]['run'].get_details()['inputDatasets'][0]
            if 'temporal_test_date' in dataset['dataset'].tags:
                if dataset['dataset'].tags['temporal_test_date'] == temporal_test_date:
                    best_performing_run = dic_runs[run_id]

    if temporal_test_date == None:
        li_test_values.append(temporal_test_f1_weighted)

    if temporal_test_date:
        dataset = dic_runs[run_id]['run'].get_details()['inputDatasets'][0]
        if 'temporal_test_date' in dataset['dataset'].tags:
            if dataset['dataset'].tags['temporal_test_date'] == temporal_test_date:
                # best_performing_run = dic_runs[run_id]
                li_test_values.append(temporal_test_f1_weighted)

if not best_performing_run:
    print('No run is found')
else:
    run = best_performing_run['run']

    train_dataset = None
    temporal_dataset = None

    for dataset in run.get_details()['inputDatasets']:
        if dataset['dataset'].name == 'owner_g_classfication_train':
            train_dataset = dataset
        elif dataset['dataset'].name == 'owner_g_classfication_temporal_test':
            temporal_dataset = dataset

    print(f'run id: {run.id}')
    print(f'Temporal test date: {temporal_test_date}')
    print(f'{metric_name}: {best_performing_run["metrics"][metric_name]} - {second_metric}: {best_performing_run["metrics"][second_metric]}')
    print(f'Train dataset name: {train_dataset["dataset"].name}, V:{train_dataset["dataset"].version}')
    print(f'Train dataset name: {temporal_dataset["dataset"].name}, V:{temporal_dataset["dataset"].version}')

run id: HD_03ef2102-1cf6-49a7-84a7-140720e57834_1
Temporal test date: 202207
temporal_test_f1_weighted: 0.7093031060452879 - temporal_test_f1: 0.552862822070211
Train dataset name: owner_g_classfication_train, V:55
Train dataset name: owner_g_classfication_temporal_test, V:50


In [33]:
ds_train = Dataset.get_by_name(ws, name="owner_g_classfication_train", version=train_dataset["dataset"].version)
ds_val = Dataset.get_by_name(ws, name="owner_g_classfication_val", version=train_dataset["dataset"].version)
ds_test = Dataset.get_by_name(ws, name="owner_g_classfication_test", version=train_dataset["dataset"].version)
ds_temporal_test = Dataset.get_by_name(ws, name="owner_g_classfication_temporal_test", version=temporal_dataset["dataset"].version)


In [34]:
from azureml.train.hyperdrive import HyperDriveRun

run_hp = best_performing_run['run']
run_hp


Experiment,Id,Type,Status,Details Page,Docs Page
transformer_hp,HD_03ef2102-1cf6-49a7-84a7-140720e57834_1,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [52]:
dir = 'output'
prefix_path = "outputs/model"
model_directory = f'{dir}/{prefix_path}'

isdir = os.path.isdir(dir)
if isdir:
    shutil.rmtree(dir)

In [53]:
run_hp.download_files(prefix=prefix_path, output_directory=dir, timeout_seconds=6000)


In [None]:
pdf_train = ds_train.to_pandas_dataframe()
li_target = list(pdf_train['target'].unique())


In [56]:
import pickle

with open(f"{model_directory}/target_list.json", "wb") as outfile:
    pickle.dump(li_target, outfile)
    # outfile.write("\n".join(li_target))


In [57]:
with open(f"{model_directory}/target_list.json", "rb") as outfile:
    li_target = pickle.load(outfile)


In [60]:
len(li_target)

121

In [8]:
import numpy as np
import pandas as pd
import argparse
import os
import re
import time
import glob
import joblib

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn import preprocessing
import torch
from transformers import TrainingArguments, Trainer
from transformers import BertTokenizer, BertForSequenceClassification, AutoModelForSequenceClassification, AutoTokenizer
from transformers import EarlyStoppingCallback
from transformers.integrations import AzureMLCallback
from transformers import AutoTokenizer, DataCollatorWithPadding
from datasets import Dataset, DatasetDict

In [9]:
model_directory = 'outputs/model'
model = AutoModelForSequenceClassification.from_pretrained(model_directory, num_labels=51)
tokenizer = AutoTokenizer.from_pretrained(model_directory)

In [10]:
le=joblib.load(model_directory + '/labelEncoder.joblib')
le

LabelEncoder()

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
model.zero_grad()
print(device)


cuda:0


In [15]:
pdf_temporal_test = ds_temporal_test.to_pandas_dataframe()
pdf_temporal_test.shape


(8099, 2)

In [16]:
sys.path.append(os.path.join(os.path.join(os.getcwd(), ".."), 'project'))
from train_transformer import get_model, adjust_tokenizer, compute_metrics, get_encode_labels, tokenize_function, generate_tokenized_dataset, get_datasets, test_model


In [17]:
fields = ['TEXT_FINAL', 'target', 'labels']
target_name = 'target'
text_field_name = 'TEXT_FINAL'


In [38]:
temporal_test_ds, tokenized_temporal_test_ds = generate_tokenized_dataset(pdf_temporal_test, fields, le, target_name, text_field_name, tokenizer)


  0%|          | 0/9 [00:00<?, ?ba/s]

In [39]:
trainer = Trainer(model=model, compute_metrics=compute_metrics, tokenizer=tokenizer)


In [19]:
prefix = 'temporal_test'

test_result = trainer.predict(tokenized_temporal_test_ds)

metrics = test_result.metrics.keys()
# print(f'len(metrics): {metrics}')

for m in metrics:
    print(f'{prefix}_{m.replace("test_", "")}', f'{test_result.metrics[m]}')


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


temporal_test_loss 1.03770112991333
temporal_test_accuracy 0.6673252654976537
temporal_test_precision 0.6867294283374241
temporal_test_recall 0.5928634688970607
temporal_test_f1 0.6211564949726264
temporal_test_recall_weighted 0.6673252654976537
temporal_test_precision_weighted 0.6965405885330128
temporal_test_f1_weighted 0.665144170447547
temporal_test_runtime 25.7525
temporal_test_samples_per_second 314.455
temporal_test_init_mem_cpu_alloc_delta 8192
temporal_test_init_mem_gpu_alloc_delta 0
temporal_test_init_mem_cpu_peaked_delta 0
temporal_test_init_mem_gpu_peaked_delta 0
temporal_test_mem_cpu_alloc_delta 16289792
temporal_test_mem_gpu_alloc_delta 0
temporal_test_mem_cpu_peaked_delta 0
temporal_test_mem_gpu_peaked_delta 291664896


In [20]:
pred = np.argmax(test_result.predictions, axis=1)


In [25]:
labels = test_result.label_ids


In [26]:
accuracy = accuracy_score(y_true=labels, y_pred=pred)
recall = recall_score(y_true=labels, y_pred=pred, average='macro')
precision = precision_score(y_true=labels, y_pred=pred, average='macro')
f1 = f1_score(y_true=labels, y_pred=pred, average='macro')

recall_weighted = recall_score(y_true=labels, y_pred=pred, average='weighted')
precision_weighted = precision_score(y_true=labels, y_pred=pred, average='weighted')
f1_weighted = f1_score(y_true=labels, y_pred=pred, average='weighted')


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


In [27]:
f1_weighted


0.665144170447547