In [None]:
%pip install datasets transformers onnx onnxruntime 

We use the small distilled BERT model from Microsoft as our pre-trained model which we fine-tune on the emotion classification task. 
See https://huggingface.co/microsoft/xtremedistil-l6-h256-uncased for details. 

In [1]:
model_name = 'microsoft/xtremedistil-l6-h256-uncased'

In [2]:
from datasets import load_dataset
dataset = load_dataset("emotion")
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
tokenized_datasets = dataset.map(tokenize_function, batched=True)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
full_train_dataset = tokenized_datasets["train"]
full_eval_dataset = tokenized_datasets["test"]

In [5]:
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [6]:
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=6)
model = model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/xtremedistil-l6-h256-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
import numpy as np
import evaluate


metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [15]:
from transformers import TrainingArguments
training_args = TrainingArguments("test_trainer",
                                  per_device_train_batch_size=128, 
                                  num_train_epochs=24,
                                  learning_rate=3e-05)
from transformers import Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=full_train_dataset,
    eval_dataset=full_eval_dataset,
    compute_metrics=compute_metrics,
)

ImportError: Using the `Trainer` with `PyTorch` requires `accelerate>=0.26.0`: Please run `pip install transformers[torch]` or `pip install 'accelerate>=0.26.0'`

In [14]:
trainer.train()

NameError: name 'trainer' is not defined

In [23]:
trainer.evaluate()

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


{'epoch': 24.0,
 'eval_accuracy': 0.9265,
 'eval_loss': 0.22581592202186584,
 'eval_runtime': 5.0402,
 'eval_samples_per_second': 396.809,
 'eval_steps_per_second': 49.601}

Export PyTorch model to ONNX format for serving with ONNX Runtime Web 

In [24]:
import transformers
import transformers.convert_graph_to_onnx as onnx_convert
from pathlib import Path

In [25]:
pipeline = transformers.pipeline("text-classification",model=model,tokenizer=tokenizer)

In [26]:
model = model.to("cpu")

In [None]:
onnx_convert.convert_pytorch(pipeline, opset=11, output=Path("classifier.onnx"), use_external_format=False)

In [71]:
%pip install onnx onnxruntime 



In [72]:
from onnxruntime.quantization import quantize_dynamic, QuantType
quantize_dynamic("classifier.onnx", "classifier_int8.onnx", 
                 weight_type=QuantType.QUInt8)

Evaluate accuracy using ONNX-Runtime inference - validate PyTorch inference versus ONNX-Runtime 

In [61]:
import onnxruntime as ort

In [73]:
session = ort.InferenceSession("classifier.onnx")
session_int8 = ort.InferenceSession("classifier_int8.onnx")

In [74]:
import numpy as np

In [75]:
input_feed = {
    "input_ids": np.array(full_eval_dataset['input_ids']),
    "attention_mask": np.array(full_eval_dataset['attention_mask']),
    "token_type_ids": np.array(full_eval_dataset['token_type_ids'])
}

In [76]:
out = session.run(input_feed=input_feed,output_names=['output_0'])[0]
out_int8 = session_int8.run(input_feed=input_feed,output_names=['output_0'])[0]

In [77]:
predictions = np.argmax(out, axis=-1)
predictions_int8 = np.argmax(out_int8, axis=-1)

In [78]:
metric.compute(predictions=predictions, references=full_eval_dataset['label'])

{'accuracy': 0.9265}

In [79]:
metric.compute(predictions=predictions_int8, references=full_eval_dataset['label'])

{'accuracy': 0.8195}

In [31]:
from google.colab import files

In [80]:
files.download('classifier_int8.onnx') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [81]:
files.download('classifier.onnx')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>