In [1]:
!pip install optimum[onnxruntime-gpu] 

Collecting optimum[onnxruntime-gpu]
  Downloading optimum-1.13.2.tar.gz (300 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m301.0/301.0 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting coloredlogs (from optimum[onnxruntime-gpu])
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting onnxruntime-gpu>=1.11.0 (from optimum[onnxruntime-gpu])
  Downloading onnxruntime_gpu-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (153.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.4/153.4 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting evaluate (from optimum[onnxruntime-gpu])
  Downloading

In [2]:
pip install transformers --upgrade

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m61.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m88.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.13.3
    Uninstalling tokenizers-0.13.3:
      Successfully uninstalled tokenizers-0.13.3
  Attempting uninstall: transformers
    Found existing installation: transformers 4.33.0
    Uninstalling transformers-4.33.0:
      Successfully uninstalled transformers-4.33.0
Successfully installed tokenizers-0.14.1 transformers-4.34.1
Note: you may need to restart

In [3]:
import torch
import os
import numpy as np
from torch import nn
import torch.nn.utils.prune as prune
import pandas as pd
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
from evaluate import evaluator
import evaluate

from torch.utils.data import Dataset, DataLoader
from optimum.pipelines import pipeline
from optimum.onnxruntime import ORTOptimizer, ORTModelForSequenceClassification
from optimum.onnxruntime.configuration import OptimizationConfig

from tqdm import tqdm

import time



In [4]:
class Config:
    device = "cuda"
    model_checkpoint = "unitary/toxic-bert"
    save_folder = 'onnx_checkpoint'
    b_s = 16

In [5]:
model = AutoModelForSequenceClassification.from_pretrained(Config.model_checkpoint).to(Config.device)
tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-cased')

Downloading (…)lve/main/config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [6]:
data = pd.read_csv("/kaggle/input/jigsaw-toxic-comment-classification-challenge/train.csv.zip")

In [7]:
data_1 = data[data.toxic == 0].sample(200)
data_2 = data[data.severe_toxic == 1].sample(200)
data_3 = data[data.identity_hate == 1].sample(200)
data_4 = data[data.obscene == 1].sample(200)
data_5 = data[data.threat == 1].sample(200)
data_6 = data[data.insult == 1].sample(200)
data_7 = data[data.identity_hate == 1].sample(200)
data_8 = data.sample(1000)
new_data = pd.concat([data_1,
                      data_2, 
                      data_3,
                      data_4,
                      data_5,
                      data_6,
                      data_7,
                      data_8,
                     ]).reset_index(drop=True)

In [8]:
class ToxicDs(Dataset):
    def __init__(self, data, tokenizer, length):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, ind):
        row = self.data.iloc[ind]
        sentence = row['comment_text']
        bert_sentence = self.tokenizer(sentence,
                        max_length=self.max_length,
                        pad_to_max_length=True,
                        add_special_tokens=True)

        return {
            "id": torch.LongTensor(bert_sentence['input_ids']),
            "mask":  torch.LongTensor(bert_sentence['attention_mask']),                      
        }
    
ds = ToxicDs(new_data, length = 256, tokenizer=tokenizer)
toxic_dataloader = DataLoader(ds, batch_size = Config.b_s, shuffle= False, drop_last=True)

In [9]:
model.eval()

start_time = time.time()
lst_times = []
with torch.no_grad():
    for batch in tqdm(toxic_dataloader):
        st_b_time = time.time()
        ids = batch["id"].to(Config.device)
        masks = batch["mask"].to(Config.device)
        y_true = model(ids, masks)['logits']
        lst_times.append(time.time() - st_b_time)
finish_time = time.time() - start_time

  0%|          | 0/150 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
100%|██████████| 150/150 [00:19<00:00,  7.69it/s]


In [10]:
print(f"Средняя время работы на батче: {np.round(np.mean(lst_times), 3)} сек")
print(f"Общее время работы: {np.round(np.mean(finish_time), 3)} сек")

Средняя время работы на батче: 0.09 сек
Общее время работы: 19.518 сек


In [11]:
ort_model = ORTModelForSequenceClassification.from_pretrained(Config.model_checkpoint, export=True,)
tokenizer_ort = AutoTokenizer.from_pretrained(Config.model_checkpoint)
# ort_model.save_pretrained(Config.save_folder)
# tokenizer.save_pretrained(Config.save_folder)

onnx_classifier = pipeline("text-classification",model=ort_model,tokenizer=tokenizer_ort, device = Config.device)

Framework not specified. Using pt to export to ONNX.


Downloading (…)okenizer_config.json:   0%|          | 0.00/174 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Using the export variant default. Available variants are:
	- default: The default ONNX variant.
Using framework PyTorch: 2.0.0
Overriding 1 configuration item(s)
	- use_cache -> False


verbose: False, log level: Level.ERROR



use_io_binding was set to False, setting it to True because it can provide a huge speedup on GPUs. It is possible to disable this feature manually by setting the use_io_binding attribute back to False.
2023-10-27 17:02:33.044138920 [W:onnxruntime:, session_state.cc:1162 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.
2023-10-27 17:02:33.044184284 [W:onnxruntime:, session_state.cc:1164 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.


In [18]:
ds_onnx = ToxicDs(new_data, length = 128, tokenizer=tokenizer_ort)
toxic_dataloader_onnx = DataLoader(ds_onnx, batch_size = Config.b_s, shuffle= False, drop_last=True)

In [19]:
start_time = time.time()
lst_times = []

for batch in tqdm(toxic_dataloader_onnx):
    st_b_time = time.time()
    ids = batch["id"].to(Config.device)
    masks = batch["mask"].to(Config.device)

    y_true = onnx_classifier.forward({
        "input_ids":ids,
        "attention_mask":masks,
        'token_type_ids': torch.ones_like(masks)
    })
    lst_times.append(time.time() - st_b_time)
finish_time = time.time() - start_time

100%|██████████| 150/150 [00:10<00:00, 14.28it/s]


In [20]:
print(f"Средняя время работы на батче: {np.round(np.mean(lst_times), 3)} сек")
print(f"Общее время работы: {np.round(np.mean(finish_time), 3)} сек")


Средняя время работы на батче: 0.058 сек
Общее время работы: 10.508 сек


In [22]:
print(f"Ускорение общего времени работы: {((19.518 - 10.508 ) / 19.518 * 100):.2f}%")
print(f"Ускорение работы на батче : {((0.09 - 0.058 ) / 0.09 * 100):.2f}%")

Ускорение общего времени работы: 46.16%
Ускорение работы на батче : 35.56%


In [16]:
# Базовая модель
model_size_bytes = sum(p.numel() for p in model.parameters() if p.requires_grad) * 4

model_size_mb = model_size_bytes / (1024 * 1024)
print(f"Размер модели: {model_size_mb:.2f} MB")

Размер модели: 417.66 MB


In [17]:

# ONNX модель
temp_model_path = "temp_ort_model"

ort_model.save_pretrained(temp_model_path)
model_size = os.path.getsize(temp_model_path)
model_size_mb = model_size_bytes / (1024 * 1024)
print(f"Размер модели: {(model_size / 10):.2f} MB")

Размер модели: 409.60 MB
