# Model Deployment using Azure Machine Learning

In [1]:
!pip install transformers==4.5.1 --quiet
!pip install pytorch-lightning==1.2.8 --quiet
!pip install tqdm
!pip install wandb



In [88]:
import os
import azureml
import shutil
import urllib.request
from pathlib import Path
import urllib3
import zipfile
import tqdm.auto as tqdm
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from azureml.core.model import Model, InferenceConfig
from azureml.core import Workspace, Datastore, Experiment, Run, Environment, ScriptRunConfig

from azureml.core.compute import ComputeTarget, AmlCompute, AksCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.train.dnn import PyTorch
from azureml.widgets import RunDetails

from azureml.core.webservice import Webservice, AksWebservice, AciWebservice
from azureml.core.dataset import Dataset
from azureml.core.resource_configuration import ResourceConfiguration
from azureml.core.conda_dependencies import CondaDependencies

from transformers import BertTokenizer

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.27.0


## 1 - Connect to the AML Workspace Environment

In [3]:
# Connect to workspace
ws = Workspace.from_config()
print("Workspace:",ws.name)

# # Connect to compute for training
# gpu_instance_name = "weak-supervisor-gpu"

# gpu_instance = ComputeTarget(workspace=ws, name=gpu_instance_name)
# gpu_instance.wait_for_completion(show_output=True)
# print("Compute Target:",gpu_instance.name)

# Connect to the datastore for the training images
ds = Datastore.get_default(ws)
print("Datastore:",ds.name)

# Connect to the experiment
exp = Experiment(workspace=ws, name='Weak-Supervision-fsdl')
print("Experiment:",exp.name)

Workspace: weak-supervision-project
Datastore: workspaceblobstore
Experiment: Weak-Supervision-fsdl


## 2 - Data

In [4]:
df = pd.read_csv('toxic_comments.csv')
df.head()

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


In [5]:
LABEL_COLUMNS = df.columns.tolist()[2:]

In [6]:
df_toxic = df[df[LABEL_COLUMNS].sum(axis=1) > 0]
df_clean = df[df[LABEL_COLUMNS].sum(axis=1) == 0]

train_df = pd.concat([
  df_toxic.sample(2000, random_state=42),
  df_clean.sample(2000, random_state=42)
])

train_df['label'] = np.where(train_df[LABEL_COLUMNS].sum(axis=1) == 0, 0, 1)
train_df = train_df.drop(LABEL_COLUMNS, axis=1)
train_df = train_df.rename(columns={"comment_text": "text"})

In [7]:
fine_tune_df = train_df.groupby('label').apply(lambda s: s.sample(500, random_state=123)).reset_index(level=0, drop=True)
train_df.drop(fine_tune_df.index, inplace=True)

train_df, test_df = train_test_split(train_df, test_size=0.25)
val_df = test_df.sample(frac=0.1)
test_df.drop(val_df.index, inplace=True)

print('\t Fine-Tune Set:', len(fine_tune_df), 'Valid:', len(val_df), '\t', 'Train:', len(train_df), '\t Test:', len(test_df),)

	 Fine-Tune Set: 1000 Valid: 75 	 Train: 2250 	 Test: 675


## 3 - Train the model

### 3.1 Create model script

In [9]:
%%writefile 'toxic_bert.py'

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm.auto import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from transformers import BertTokenizerFast, BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup

import pytorch_lightning as pl
from pytorch_lightning.metrics.functional import accuracy, f1
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, BackboneFinetuning, QuantizationAwareTraining, ModelPruning
RANDOM_SEED = 42
pl.seed_everything(RANDOM_SEED)
# lightning plus wandb
import wandb
from pytorch_lightning.loggers import WandbLogger
import gc

MAX_TOKEN_COUNT = 256
BATCH_SIZE = 4
BERT_MODEL_NAME = 'bert-base-cased'


class ToxicCommentsDataset(Dataset):

  def __init__(
    self, 
    data: pd.DataFrame,
    tokenizer: BertTokenizer,
    max_token_len: int = 128
  ):
    self.tokenizer = tokenizer
    self.data = data
    self.max_token_len = max_token_len
    
  def __len__(self):
    return len(self.data)

  def __getitem__(self, index: int):
    data_row = self.data.iloc[index]

    comment_text = data_row.text
    labels = data_row['label']

    encoding = self.tokenizer.encode_plus(
      comment_text,
      add_special_tokens=True,
      max_length=self.max_token_len,
      return_token_type_ids=False,
      padding="max_length",
      truncation=True,
      return_attention_mask=True,
      return_tensors='pt',
    )

    return dict(
      comment_text=comment_text,
      input_ids=encoding["input_ids"].flatten(),
      attention_mask=encoding["attention_mask"].flatten(),
      labels=labels
    )

class ToxicCommentDataModule(pl.LightningDataModule):

  def __init__(self, train_df, test_df, tokenizer, batch_size=4, max_token_len=128):
    super().__init__()
    self.batch_size = batch_size
    self.train_df = train_df
    self.test_df = test_df
    self.tokenizer = tokenizer
    self.max_token_len = max_token_len

  def setup(self, stage=None):
    self.train_dataset = ToxicCommentsDataset(
      self.train_df,
      self.tokenizer,
      self.max_token_len
    )

    self.test_dataset = ToxicCommentsDataset(
      self.test_df,
      self.tokenizer,
      self.max_token_len
    )

  def train_dataloader(self):
    return DataLoader(
      self.train_dataset,
      batch_size=self.batch_size,
      shuffle=True,
      num_workers=2
    )

  def val_dataloader(self):
    return DataLoader(
      self.test_dataset,
      batch_size=self.batch_size,
      num_workers=2
    )

  def test_dataloader(self):
    return DataLoader(
      self.test_dataset,
      batch_size=self.batch_size,
      num_workers=2
    )


class ToxicCommentTagger(pl.LightningModule):

  def __init__(self, num_classes: int, input_dims=(8, 256), attn_mask=(8, 256), n_training_steps=None, n_warmup_steps=None, BERT_MODEL_NAME = 'bert-base-cased'):
    super().__init__()
    BERT_MODEL_NAME = 'bert-base-cased'
    self.backbone = BertModel.from_pretrained(BERT_MODEL_NAME, return_dict=True)
    # self.backbone = DistilBertForSequenceClassification.from_pretrained(BERT_MODEL_NAME, return_dict=True)
    self.classifier = nn.Linear(self.backbone.config.hidden_size, num_classes)
    self.n_training_steps = n_training_steps
    self.n_warmup_steps = n_warmup_steps
    self.criterion = nn.CrossEntropyLoss()

    # log hyperparameters
    self.save_hyperparameters()

    # compute the accuracy -- no need to roll your own!
    self.train_acc = pl.metrics.Accuracy()
    self.val_acc = pl.metrics.Accuracy()
    self.test_acc = pl.metrics.Accuracy()
    self.train_f1 = pl.metrics.F1(num_classes=num_classes)
    self.val_f1 = pl.metrics.F1(num_classes=num_classes)
    self.test_f1 = pl.metrics.F1(num_classes=num_classes)

  def forward(self, input_ids, attention_mask, labels=None):
    output = self.backbone(input_ids, attention_mask=attention_mask)
    # output = torch.argmax(output.logits, 1)
    # print(output.pooler_output)
    output = self.classifier(output.pooler_output)
    # print(output)
    output = torch.sigmoid(output)
    # print(output)
    loss = 0
    if labels is not None:
        loss = self.criterion(output, labels)
    return loss, output

  def training_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"]

    loss, outputs = self(input_ids, attention_mask, labels) # calls self.forward
    preds = torch.argmax(outputs, 1) # take the highest predicted number

    self.train_acc(preds, labels)
    self.train_f1(preds, labels)
    self.log('train/loss_epoch', loss, on_step=False, on_epoch=True)
    self.log('train/acc_epoch', self.train_acc, on_step=False, on_epoch=True)
    self.log('train/f1_epoch', self.train_f1, on_step=False, on_epoch=True)

    self.log("train_loss", loss)
    self.log("train_f1", self.train_f1)
    return {"loss": loss, "predictions": preds, "labels": labels}

  def validation_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"]
    loss, outputs = self(input_ids, attention_mask, labels)
    preds = torch.argmax(outputs, 1)

    self.val_acc(preds, labels)
    self.val_f1(preds, labels)
    self.log('val/loss_epoch', loss, on_step=False, on_epoch=True)
    self.log('val/acc_epoch', self.val_acc, on_step=False, on_epoch=True)
    self.log('val/f1_epoch', self.val_f1, on_step=False, on_epoch=True)

    self.log("val_loss", loss, prog_bar=True, logger=True)
    self.log("val_f1", self.val_f1, prog_bar=True, logger=True)

    torch.cuda.empty_cache()
    return loss

  # def validation_epoch_end(self, validation_step_outputs):

  # #   # Saving our model weights and biases every epoch.
  # #   # This way, if we overfit, we can just roll back our weights to the saved weights at the best epoch
  # #   dummy_input_dims = torch.zeros(self.hparams['input_dims'], dtype=torch.long, device=self.device)
  # #   dummy_attn_mask = torch.zeros(self.hparams['attention_mask'], dtype=torch.long, device=self.device)
  # #   input_names = [ "input_dims", "attention_mask" ]
  # #   output_names = [ "input_dims", "attention_mask" ]
  # #   dummy_input = [dummy_input_dims, dummy_attn_mask]
  # #   model_filename = f'model_{str(self.global_step).zfill(5)}.onnx'
  # #   torch.onnx.export(self, dummy_input, model_filename,
  # #                     input_names=input_names,
  # #                     output_names=output_names)
  # #   wandb.save(model_filename)

  #   # flatten validation step outputs as a pytorch tensor and turn into a histogram
  #   flattened_logits = torch.flatten(torch.cat(validation_step_outputs))
  #   self.logger.experiment.log(
  #       {'valid/logits': wandb.Histogram(flattened_logits.to('cpu')),
  #         'global_step': self.global_step}
  #   )

  def test_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"]
    loss, outputs = self(input_ids, attention_mask, labels)
    preds = torch.argmax(outputs, 1)

    self.test_acc(preds, labels)
    self.test_f1(preds, labels)
    self.log('test/loss_epoch', loss, on_step=False, on_epoch=True)
    self.log('test/acc_epoch', self.test_acc, on_step=False, on_epoch=True)
    self.log('test/f1_epoch', self.test_f1, on_step=False, on_epoch=True)

    self.log("test_loss", loss, prog_bar=True, logger=True)
    self.log("test_f1", self.test_f1, prog_bar=True, logger=True)
    return loss

  def configure_optimizers(self):

    optimizer = AdamW(filter(lambda p: p.requires_grad, self.parameters()), lr=2e-5)

    scheduler = get_linear_schedule_with_warmup(
      optimizer,
      num_warmup_steps=self.n_warmup_steps,
      num_training_steps=self.n_training_steps
    )

    return dict(
      optimizer=optimizer,
      lr_scheduler=dict(
        scheduler=scheduler,
        interval='step'
      )
    )


Overwriting toxic_bert.py


### 3.2 Create a training script

In [11]:
%%writefile 'train.py'

from __future__ import print_function, division

import pandas as pd
import numpy as np
import os
import requests
import json
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from transformers import BertTokenizerFast, BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup

import pytorch_lightning as pl
from pytorch_lightning.metrics.functional import accuracy, f1
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, BackboneFinetuning, QuantizationAwareTraining, ModelPruning
RANDOM_SEED = 42
pl.seed_everything(RANDOM_SEED)
# lightning plus wandb
import wandb
from pytorch_lightning.loggers import WandbLogger
import gc

### Add References
import argparse
from azureml.core import Run
from azureml.core.model import Model

# importing models
from toxic_bert import ToxicCommentsDataset, ToxicCommentDataModule, ToxicCommentTagger

### Add run context for AML
run = Run.get_context()

### Parse incoming parameters
parser = argparse.ArgumentParser()
parser.add_argument("--data-folder", type=str, dest="data_folder", help="data folder mounting point", default="")
parser.add_argument("--num-epochs", type=int, dest="num_epochs", help="Number of epochs", default="10")
parser.add_argument("--max_token_count", type=int, dest="max_token_count", help="max_token_count", default="256")
parser.add_argument("--batch_size", type=int, dest="batch_size", help="batch_size", default="4")
parser.add_argument("--dataset", type=int, dest="dataset", help="dataset", default="")
parser.add_argument("--model_name", type=int, dest="model_name", help="model_name", default="")

args = parser.parse_args()
DATA_PATH = args.data_folder
N_EPOCHS = args.num_epochs
MAX_TOKEN_COUNT = args.max_token_count
BATCH_SIZE = args.batch_size
BERT_MODEL_NAME = args.model_name

tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
# tokenizer = DistilBertTokenizerFast.from_pretrained(BERT_MODEL_NAME)
os.environ["TOKENIZERS_PARALLELISM"] = "true"

# Get data (upload dataset to Azure first)
toxic_comments_ds = Dataset.get_by_name(ws, name='toxic_comments')
df = toxic_comments_ds.to_pandas_dataframe()

df_toxic = df[df[LABEL_COLUMNS].sum(axis=1) > 0]
df_clean = df[df[LABEL_COLUMNS].sum(axis=1) == 0]

train_df = pd.concat([
  df_toxic.sample(2000, random_state=42),
  df_clean.sample(2000, random_state=42)
])

train_df['label'] = np.where(train_df[LABEL_COLUMNS].sum(axis=1) == 0, 0, 1)
train_df = train_df.drop(LABEL_COLUMNS, axis=1)
train_df = train_df.rename(columns={"comment_text": "text"})

fine_tune_df = train_df.groupby('label').apply(lambda s: s.sample(500, random_state=123)).reset_index(level=0, drop=True)
train_df.drop(fine_tune_df.index, inplace=True)

train_df, test_df = train_test_split(train_df, test_size=0.25)
val_df = test_df.sample(frac=0.1)
test_df.drop(val_df.index, inplace=True)

class_names = fine_tune_df['label']


train_dataset = ToxicCommentsDataset(
  fine_tune_df,
  tokenizer=tokenizer,
  max_token_len=MAX_TOKEN_COUNT
)

data_module = ToxicCommentDataModule(
  fine_tune_df,
  val_df,
  tokenizer=tokenizer,
  batch_size=BATCH_SIZE,
  max_token_len=MAX_TOKEN_COUNT
)

steps_per_epoch=len(fine_tune_df) // BATCH_SIZE
total_training_steps = steps_per_epoch * N_EPOCHS

warmup_steps = total_training_steps // 5
warmup_steps, total_training_steps

model = ToxicCommentTagger(
    'finetuned_retrained_bert_on_snorkel_dataset_14863ex_256tokens_8bs_pruning.ckpt'
    num_classes=2,
    n_warmup_steps=warmup_steps,
    n_training_steps=total_training_steps
    )

checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints",
    filename="best-checkpoint",
    save_top_k=1,
    verbose=True,
    monitor="val_loss",
    mode="min"
    )

multiplicative=lambda epoch: 1.1 # modifies learning rate after unfreeze
backbone_finetuning = BackboneFinetuning(5, multiplicative) # freezes pretrained model until 5 epochs

model_pruning = ModelPruning(
    pruning_fn='l1_unstructured',
    amount=0.1,
    use_global_unstructured=True,
    )

wandb.login()
wandb_logger = WandbLogger(project="weak-supervision-fsdl-project", entity="weak-supervision-classifier-team")
wandb.watch(model)

trainer = pl.Trainer(
    logger=[wandb_logger],
    log_every_n_steps=50,
    checkpoint_callback=checkpoint_callback,
    callbacks=[backbone_finetuning, model_pruning], # , early_stopping_callback, quantization_aware_training]
    max_epochs=N_EPOCHS,
    gpus=1,
    deterministic=True,
    progress_bar_refresh_rate=30,
    precision=16
    )

trainer.fit(model, data_module)

model_ckpt = "./outputs/pl_bert_toxic_best_model.ckpt"
trainer.save_checkpoint(model_ckpt)
wandb.save('*ckpt*')
wandb.finish()

Overwriting train.py


### 3.3 Create and run a PyTorch ScriptRunConfig

Don't run this section unless you have a GPU you can run to train the model. Since we already trained the models in Colab, there was no need to train models in Azure as well. But the code is here for future use!

In [12]:
# %%writefile conda_dependencies.yml

# channels:
#   - conda-forge
# dependencies:
#   - python=3.6
#   - pip:
#     - azureml-defaults
#     - torch
#     - pytorch-lightning==1.2.8
#     - transformers==4.5.1
#     - wandb
#     - tqdm
#     - scikit-learn
#     - numpy

In [13]:
# pl_env = Environment.from_conda_specification(name='pytorch-lightning-1.2.8-gpu', file_path='./conda_dependencies.yml')

# # specify a GPU base image
# env.docker.enabled = True
# env.docker.base_image = (
#     "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04"
# )

In [14]:
# args = [
#     '--data-folder', toxic_comments_ds.as_named_input('toxic_comments').as_mount(),
#     '--num-epochs', 10,
#     '--max_token_count', 256,
#     '--batch_size', 4,
#     '--model_name', 'bert-base-cased',
# ]

# project_folder = "./trainingscripts"

# config = ScriptRunConfig(
#     source_directory = project_folder, 
#     script = 'train.py', 
#     compute_target=gpu_instance,
#     environment = pl_env,
#     arguments=args,
# )

In [15]:
# run = exp.submit(config)

In [16]:
# # Show the PyTorch estimator
# RunDetails(run).show()

In [17]:
# # Load a historic run
# previousRunId = ''
# run = [r for r in exp.get_runs() if r.id == previousRunId][0]
# RunDetails(run).show()

### 3.4 Register the model in Azure ML

In [67]:
from azureml.core.model import Model

model = Model.register(model_name='Toxic-PyTorch-Lightning',
                           model_path='finetuned_retrained_bert_on_snorkel_dataset_14863ex_256tokens_8bs_pruning.ckpt',
                           model_framework='PyTorch Lightning',
                           model_framework_version='1.2.8',
                           description="Toxic Comments Classifier",
                           tags={'Weak Supervision Team':'Test 1'},
                           resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=2),
                           workspace=ws)

print("Model '{}' version {} registered ".format(model.name,model.version))

DEBUG:azureml.core.model:Uploading model Toxic-PyTorch-Lightning with 1 files of total size 1299959732 bytes
DEBUG:azureml.ArtifactsClient.upload_files:Overriding default timeout to 857973.42312
DEBUG:azureml.ArtifactsClient.upload_files:[Start]
DEBUG:azureml.ArtifactsClient.batch_create_empty_artifacts-async:False:[START]
DEBUG:azureml.ArtifactsClient:ClientBase: Calling batch_create_empty_artifacts with url /artifact/v2.0/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/{workspaceName}/artifacts/batch/metadata/{origin}/{container}
DEBUG:msrest.service_client:Accept header absent and forced to application/json
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.1.1:46808
DEBUG:urllib3.connectionpool:http://127.0.1.1:46808 "GET /MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01 HTTP/1.1" 200 None
DEBUG:msrestazure.azure_active_directory:MSI: Retrieving a token from http

Registering model Toxic-PyTorch-Lightning
Model 'Toxic-PyTorch-Lightning' version 2 registered 


### 3.5 Download & Test the model

In [74]:
model.download(exist_ok=True)

DEBUG:azureml.AssetsClient.query_by_id-async:False:[START]
DEBUG:azureml.AssetsClient:ClientBase: Calling query_by_id with url /modelmanagement/v1.0/subscriptions/{subscriptionId}/resourceGroups/{resourceGroup}/providers/Microsoft.MachineLearningServices/workspaces/{workspace}/assets/{id}
DEBUG:msrest.service_client:Accept header absent and forced to application/json
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.1.1:46808
DEBUG:urllib3.connectionpool:http://127.0.1.1:46808 "GET /MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01 HTTP/1.1" 200 None
DEBUG:msrestazure.azure_active_directory:MSI: Retrieving a token from http://127.0.1.1:46808/MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01
DEBUG:msrestazure.azure_active_directory:MSI: token retrieved
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.1.1:46808
DEBUG:urllib3.connectionpool:http://127.0.1.1:46808 "GET /MSI/auth/?resource=https

'finetuned_retrained_bert_on_snorkel_dataset_14863ex_256tokens_8bs_pruning.ckpt'

In [18]:
import os
import torch
import torch.nn as nn
import json
import urllib
from toxic_bert import ToxicCommentsDataset, ToxicCommentDataModule, ToxicCommentTagger

BERT_MODEL_NAME = 'bert-base-cased'
MAX_TOKEN_COUNT = 256
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
os.environ["TOKENIZERS_PARALLELISM"] = "true"

model_path = 'finetuned_retrained_bert_on_snorkel_dataset_14863ex_256tokens_8bs_pruning.ckpt' # checkpoint created in Colab
model = ToxicCommentTagger(num_classes=2)
trained_model = ToxicCommentTagger.load_from_checkpoint(
            checkpoint_path=model_path, 
            model=model
            )
trained_model.eval()
trained_model.freeze()

test_comment = "you are straight garbage"

encoding = tokenizer.encode_plus(
  test_comment,
  add_special_tokens=True,
  max_length=MAX_TOKEN_COUNT,
  return_token_type_ids=False,
  padding="max_length",
  truncation=True,
  return_attention_mask=True,
  return_tensors='pt',
)

_, test_prediction = trained_model(encoding["input_ids"], encoding["attention_mask"])
test_prediction = test_prediction.flatten().numpy()

for label, prediction in zip(['non-toxic', 'toxic'], test_prediction):
  print(f"{label}: {prediction}")

Global seed set to 42


non-toxic: 0.01291364524513483
toxic: 0.988312304019928


In [19]:
torch.save(trained_model, 'best_model.pt')
loaded_model = torch.load('best_model.pt', map_location=lambda storage, loc: storage)
loaded_model.eval()

## Registering the model with the .pt file

We need to register the .pt file so that we can use it later for inference.

In [24]:
from azureml.core.model import Model

model = Model.register(model_name='Best-Toxic-PyTorch-Lightning',
                           model_path='best_model.pt',
                           model_framework='PyTorch Lightning',
                           model_framework_version='1.2.8',
                           description="Toxic Comments Classifier",
                           tags={'Weak Supervision Team':'finetuned_retrained_bert_on_snorkel_dataset_14863ex_256tokens_8bs_pruning'},
                           resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=2),
                           workspace=ws)

Registering model Best-Toxic-PyTorch-Lightning


In [25]:
loaded_model = torch.load('best_model.pt', map_location=lambda storage, loc: storage)
loaded_model.eval()

ToxicCommentTagger(
  (backbone): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_af

In [505]:
from tqdm.auto import tqdm
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load(os.path.join('best_model.pt'), map_location=lambda storage, loc: storage)
model.eval()
model.freeze()

In [509]:
MAX_TOKEN_COUNT = 256

input_text = """you are straight garbage
~
but I still love you very much"""
x_new = {"text": input_text}
input_data = json.dumps(x_new).encode('utf-8')

input_texts = list(test_comment.split('~'))
probabilities = []
labels = []
response = {"prediction": []}

for item in input_texts:
    encoding = tokenizer.encode_plus(
    item,
    add_special_tokens=True,
    max_length=MAX_TOKEN_COUNT,
    return_token_type_ids=False,
    padding="max_length",
    truncation=True,
    return_attention_mask=True,
    return_tensors='pt')
    
    _, probability = model(
    encoding["input_ids"],
    encoding["attention_mask"]
    )
    probability = probability.flatten().detach().cpu().numpy()
    if probability[1] > probability[0]:
        label = 'toxic'
        probability = probability[1].astype('float64')
    else:
        label = 'non-toxic'
        probability = probability[0].astype('float64')

    response["prediction"].append({"label": label, "probability": probability, "text": item})

json.dumps(response)

'{"prediction": [{"label": "toxic", "probability": 0.988312304019928, "text": "you are straight garbage\\n"}, {"label": "non-toxic", "probability": 0.9873024225234985, "text": "\\nbut I still love you very much"}]}'

## 4 Deploy the model

### 4.1 Create a scoring script

In [None]:
!mkdir deployment-scripts

In [510]:
%%writefile 'deployment-scripts/score.py'
import os
import torch
import torch.nn as nn
from transformers import BertTokenizer

import json
import urllib
import toxic_bert
from toxic_bert import ToxicCommentsDataset, ToxicCommentDataModule, ToxicCommentTagger

from azureml.core.model import Model

def init():
    global model
    global tokenizer
    BERT_MODEL_NAME = 'bert-base-cased'
    MAX_TOKEN_COUNT = 256
    tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
    os.environ["TOKENIZERS_PARALLELISM"] = "true"
    
    print('Loading model...', end='')
    # Load the model
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'best_model.pt')
    model = torch.load(model_path, map_location=lambda storage, loc: storage)
    model.eval()
    model.freeze()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
def run(input_data):

    MAX_TOKEN_COUNT = 256
    input_texts = list(json.loads(input_data)['text'].split('\n\n')) # splits string by paragraphs
    response = {"prediction": []}

    for item in input_texts:
        encoding = tokenizer.encode_plus(
        item,
        add_special_tokens=True,
        max_length=MAX_TOKEN_COUNT,
        return_token_type_ids=False,
        padding="max_length",
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt')
        
        _, probability = model(
        encoding["input_ids"],
        encoding["attention_mask"]
        )
        probability = probability.flatten().detach().cpu().numpy()
        if probability[1] > probability[0]:
            label = 'toxic'
            probability = probability[1].astype('float64')
        else:
            label = 'non-toxic'
            probability = probability[0].astype('float64')

        response["prediction"].append({"label": label, "probability": probability, "text": item})

    return json.dumps(response)

Overwriting deployment-scripts/score.py


### 4.2 Create an environment file

In [511]:
myenv = Environment(name="weak-supervision-inference")

conda_dep = CondaDependencies()

# You must list azureml-defaults as a pip dependency
conda_dep.add_pip_package("azureml-defaults")
conda_dep.add_pip_package("torch")
conda_dep.add_pip_package("transformers==4.5.1")
conda_dep.add_pip_package("sklearn")
conda_dep.add_pip_package("pandas")
conda_dep.add_pip_package("numpy")
conda_dep.add_pip_package("pytorch-lightning==1.2.8")
conda_dep.add_pip_package("wandb")

# Adds dependencies to PythonSection of myenv
myenv.python.conda_dependencies=conda_dep


### 4.3 Create an Inference config

In [512]:
inference_config = InferenceConfig(
    entry_script="score.py",
    source_directory='./deployment-scripts',
    environment=myenv
)

### 4.4 Deploy to ACI (Docker)

In [513]:
model = Model(ws, name='Best-Toxic-PyTorch-Lightning')
print("Loaded model version:",model.version)

DEBUG:azureml.ModelsClient.list_query-async:True:[START]
DEBUG:azureml._restclient.clientbase.WorkerPool:submitting future: _execute_with_base_arguments
DEBUG:azureml.ModelsClient.list_query:Using basic handler - no exception handling
DEBUG:azureml.ModelsClient:ClientBase: Calling list_query with url /modelmanagement/v1.0/subscriptions/{subscriptionId}/resourceGroups/{resourceGroup}/providers/Microsoft.MachineLearningServices/workspaces/{workspace}/models
DEBUG:azureml.ModelsClient.list_query-async:True:[STOP]
DEBUG:msrest.service_client:Accept header absent and forced to application/json
DEBUG:azureml.ModelsClient.list_query.WaitingTask:[START]
DEBUG:msrest.universal_http.requests:Configuring retry: max_retries=3, backoff_factor=0.8, max_backoff=90
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.1.1:46808
DEBUG:azureml.ModelsClient.list_query.WaitingTask:Awaiter is ApiPagination
DEBUG:urllib3.connectionpool:http://127.0.1.1:46808 "GET /MSI/auth/?resource=https://m

Loaded model version: 1


In [514]:
# Create a deployment config
deploy_config = AciWebservice.deploy_configuration(
                    cpu_cores = 1, 
                    memory_gb = 1,
                    enable_app_insights=True,
                    description='Toxic Comment Classifier')

In [515]:
# Deploy the model to an ACI
aci_service = Model.deploy(ws, 
                name="toxic-comments-prediction", 
                models = [model], 
                inference_config = inference_config, 
                deployment_config = deploy_config,
                deployment_target= 'inference-clustr',
                overwrite = True)

aci_service.wait_for_deployment(show_output=True)

DEBUG:azureml.ArtifactsClient.batch_create_empty_artifacts-async:False:[START]
DEBUG:azureml.ArtifactsClient:ClientBase: Calling batch_create_empty_artifacts with url /artifact/v2.0/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/{workspaceName}/artifacts/batch/metadata/{origin}/{container}
DEBUG:msrest.service_client:Accept header absent and forced to application/json
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.1.1:46808
DEBUG:urllib3.connectionpool:http://127.0.1.1:46808 "GET /MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01 HTTP/1.1" 200 None
DEBUG:msrestazure.azure_active_directory:MSI: Retrieving a token from http://127.0.1.1:46808/MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01
DEBUG:msrestazure.azure_active_directory:MSI: token retrieved
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.1.1:46808
DEBUG:u

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-05-15 18:36:53+00:00 Creating Container Registry if not exists.
2021-05-15 18:36:53+00:00 Registering the environment.
2021-05-15 18:36:55+00:00 Use the existing image.
2021-05-15 18:36:55+00:00 Generating deployment configuration.
2021-05-15 18:36:56+00:00 Submitting deployment to compute.
2021-05-15 18:36:59+00:00 Checking the status of deployment toxic-comments-prediction..
2021-05-15 18:39:45+00:00 Checking the status of inference endpoint toxic-comments-prediction.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [516]:
print(aci_service.get_logs())

DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.1.1:46808
DEBUG:urllib3.connectionpool:http://127.0.1.1:46808 "GET /MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01 HTTP/1.1" 200 None
DEBUG:msrestazure.azure_active_directory:MSI: Retrieving a token from http://127.0.1.1:46808/MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01
DEBUG:msrestazure.azure_active_directory:MSI: token retrieved
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.1.1:46808
DEBUG:urllib3.connectionpool:http://127.0.1.1:46808 "GET /MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01 HTTP/1.1" 200 None
DEBUG:msrestazure.azure_active_directory:MSI: Retrieving a token from http://127.0.1.1:46808/MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01
DEBUG:msrestazure.azure_active_directory:MSI: token retrieved
DEBUG:azureml._restclient.clientbase:ClientBase: Calling get w

2021-05-15T18:39:40,766800400+00:00 - rsyslog/run 
2021-05-15T18:39:40,783217300+00:00 - gunicorn/run 
2021-05-15T18:39:40,776900000+00:00 - iot-server/run 
2021-05-15T18:39:40,820243200+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_c3366d8531518cc77327e9208a3a8fb1/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_c3366d8531518cc77327e9208a3a8fb1/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_c3366d8531518cc77327e9208a3a8fb1/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_c3366d8531518cc77327e9208a3a8fb1/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_c3366d8531518cc77327e9208a3a8fb1/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
EdgeHubC

In [517]:
# Connect to previous deployment
aci_service = AciWebservice(ws, "toxic-comments-prediction")

DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.1.1:46808
DEBUG:urllib3.connectionpool:http://127.0.1.1:46808 "GET /MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01 HTTP/1.1" 200 None
DEBUG:msrestazure.azure_active_directory:MSI: Retrieving a token from http://127.0.1.1:46808/MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01
DEBUG:msrestazure.azure_active_directory:MSI: token retrieved
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.1.1:46808
DEBUG:urllib3.connectionpool:http://127.0.1.1:46808 "GET /MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01 HTTP/1.1" 200 None
DEBUG:msrestazure.azure_active_directory:MSI: Retrieving a token from http://127.0.1.1:46808/MSI/auth/?resource=https://management.core.windows.net/&api-version=2017-09-01
DEBUG:msrestazure.azure_active_directory:MSI: token retrieved
DEBUG:azureml._restclient.clientbase:ClientBase: Calling get w

In [518]:
print("Scoring endpoint:",aci_service.scoring_uri)

Scoring endpoint: http://64c13356-b19d-4b04-9202-4891a3060bc3.eastus2.azurecontainer.io/score


### 4.4 Deploy to Azure Kuberneter Service

In [519]:
# aks_target = AksCompute(ws,"Exodus")

# deployment_config = AksWebservice.deploy_configuration(
#     cpu_cores = 1, 
#     memory_gb = 2)

# aks_service = Model.deploy(workspace=ws, 
#                        name="toxic-pl-test-1", 
#                        models=[model], 
#                        inference_config=inference_config, 
#                        deployment_config=deployment_config, 
#                        deployment_target=aks_target,
#                        overwrite=True)

# aks_service.wait_for_deployment(show_output = True)

In [520]:
### Connect to a previous deployed service
# aks_service = [r for r in AksWebservice.list(ws) if r.name == 'Toxic-PyTorch-Lightning'][0]

In [521]:
# print(aks_service.scoring_uri)
# aks_service.get_keys()

### 4.4 Test the API

In [522]:
endpoint = aci_service.scoring_uri

print(f'\nservice state: {aci_service.state}\n')
print(f'scoring URI: \n{endpoint}\n')
print(f'swagger URI: \n{aci_service.swagger_uri}\n')

print(endpoint)
print(aci_service.swagger_uri)


service state: Healthy

scoring URI: 
http://64c13356-b19d-4b04-9202-4891a3060bc3.eastus2.azurecontainer.io/score

swagger URI: 
http://64c13356-b19d-4b04-9202-4891a3060bc3.eastus2.azurecontainer.io/swagger.json

http://64c13356-b19d-4b04-9202-4891a3060bc3.eastus2.azurecontainer.io/score
http://64c13356-b19d-4b04-9202-4891a3060bc3.eastus2.azurecontainer.io/swagger.json


In [529]:
import requests
import json
import pandas as pd

input_text = """You look so beautiful today

i hate you so much"""
x_new = {"text": input_text}

# Convert the array to a serializable list in a JSON document
input_data = json.dumps(x_new)

# Set the content type in the request headers
request_headers = { "Content-Type":"application/json"}

# Call the service
response = requests.post(url = endpoint,
                         data = input_data,
                         headers = request_headers)

print(response.status_code)
print("Prediction Results:", response.json())

DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 64c13356-b19d-4b04-9202-4891a3060bc3.eastus2.azurecontainer.io:80
DEBUG:urllib3.connectionpool:http://64c13356-b19d-4b04-9202-4891a3060bc3.eastus2.azurecontainer.io:80 "POST /score HTTP/1.1" 200 223


200
Prediction Results: {"prediction": [{"label": "non-toxic", "probability": 0.7762245535850525, "text": "You look so beautiful today"}, {"label": "toxic", "probability": 0.9929181933403015, "text": "i hate you so much"}]}


In [499]:
input_data

'{"text": "You are so cool my man wwooooow"}'

## 5. Retrain Our Model on New Data (Incomplete: Future Work)

In [None]:
%%writefile 'trainingscripts/retrain.py'

from __future__ import print_function, division

import pandas as pd
import numpy as np
import os
import requests
import json
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from transformers import BertTokenizerFast, BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup

import pytorch_lightning as pl
from pytorch_lightning.metrics.functional import accuracy, f1
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, BackboneFinetuning, QuantizationAwareTraining, ModelPruning
RANDOM_SEED = 42
pl.seed_everything(RANDOM_SEED)
# lightning plus wandb
import wandb
from pytorch_lightning.loggers import WandbLogger
import gc

### Add References
import argparse
from azureml.core import Run
from azureml.core.model import Model

# importing models
from models.toxic_bert import *

### Add run context for AML
run = Run.get_context()

### Parse incoming parameters
parser = argparse.ArgumentParser()
parser.add_argument("--data-folder", type=str, dest="data_folder", help="data folder mounting point", default="")
parser.add_argument("--num-epochs", type=int, dest="num_epochs", help="Number of epochs", default="10")
parser.add_argument("--max_token_count", type=int, dest="max_token_count", help="max_token_count", default="256")
parser.add_argument("--batch_size", type=int, dest="batch_size", help="batch_size", default="8")
parser.add_argument("--dataset", type=int, dest="dataset", help="dataset", default="")
parser.add_argument("--model_name", type=int, dest="model_name", help="model_name", default="")

args = parser.parse_args()
DATA_PATH = args.data_folder
N_EPOCHS = args.num_epochs
MAX_TOKEN_COUNT = args.max_token_count
BATCH_SIZE = args.batch_size
BERT_MODEL_NAME = args.model_name

tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
# tokenizer = DistilBertTokenizerFast.from_pretrained(BERT_MODEL_NAME)
os.environ["TOKENIZERS_PARALLELISM"] = "true"

# Get data (upload dataset to Azure first)
toxic_comments_ds = Dataset.get_by_name(ws, name='toxic_comments')
df = toxic_comments_ds.to_pandas_dataframe()

df_toxic = df[df[LABEL_COLUMNS].sum(axis=1) > 0]
df_clean = df[df[LABEL_COLUMNS].sum(axis=1) == 0]

train_df = pd.concat([
  df_toxic.sample(2000, random_state=42),
  df_clean.sample(2000, random_state=42)
])

train_df['label'] = np.where(train_df[LABEL_COLUMNS].sum(axis=1) == 0, 0, 1)
train_df = train_df.drop(LABEL_COLUMNS, axis=1)
train_df = train_df.rename(columns={"comment_text": "text"})

fine_tune_df = train_df.groupby('label').apply(lambda s: s.sample(500, random_state=123)).reset_index(level=0, drop=True)
train_df.drop(fine_tune_df.index, inplace=True)

train_df, test_df = train_test_split(train_df, test_size=0.25)
val_df = test_df.sample(frac=0.1)
test_df.drop(val_df.index, inplace=True)

class_names = fine_tune_df['label']


train_dataset = ToxicCommentsDataset(
  fine_tune_df,
  tokenizer=tokenizer,
  max_token_len=MAX_TOKEN_COUNT
)

data_module = ToxicCommentDataModule(
  fine_tune_df,
  val_df,
  tokenizer=tokenizer,
  batch_size=BATCH_SIZE,
  max_token_len=MAX_TOKEN_COUNT
)

steps_per_epoch=len(fine_tune_df) // BATCH_SIZE
total_training_steps = steps_per_epoch * N_EPOCHS

warmup_steps = total_training_steps // 5
warmup_steps, total_training_steps

model = ToxicCommentTagger(
    'finetuned_multilingual_bert_1000ex_512tokens_8bs_pruning.ckpt'
    num_classes=2,
    n_warmup_steps=warmup_steps,
    n_training_steps=total_training_steps
    )

checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints",
    filename="best-checkpoint",
    save_top_k=1,
    verbose=True,
    monitor="val_loss",
    mode="min"
    )

multiplicative=lambda epoch: 1.1 # modifies learning rate after unfreeze
backbone_finetuning = BackboneFinetuning(5, multiplicative) # freezes pretrained model until 5 epochs

model_pruning = ModelPruning(
    pruning_fn='l1_unstructured',
    amount=0.1,
    use_global_unstructured=True,
    )

wandb.login()
wandb_logger = WandbLogger(project="weak-supervision-fsdl-project", entity="weak-supervision-classifier-team")

trainer = pl.Trainer(
    logger=[wandb_logger],
    log_every_n_steps=50,
    checkpoint_callback=checkpoint_callback,
    callbacks=[backbone_finetuning, model_pruning], # , early_stopping_callback, quantization_aware_training]
    max_epochs=N_EPOCHS,
    gpus=1,
    deterministic=True,
    progress_bar_refresh_rate=30,
    precision=16
    )

trainer.fit(model, data_module)

model_ckpt = "./outputs/pl_bert_toxic_best_model.ckpt"
trainer.save_checkpoint(model_ckpt)
wandb.save('*ckpt*')
wandb.finish()

# Save the training labels
with open('./outputs/labels.txt', 'w') as f:
    f.writelines(["%s\n" % item  for item in class_names])