In [51]:
import torch
import numpy as np
import pandas as pd

from tqdm.auto import tqdm
from datasets import load_dataset
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score
from transformers import DataCollatorWithPadding
from transformers import BertForSequenceClassification, AutoTokenizer
from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig
import time
import warnings
warnings.filterwarnings('ignore')

### Загружаем выбранную нами модель

In [52]:


cedr = load_dataset('cedr')



No config specified, defaulting to: cedr/main
Reusing dataset cedr (/home/user/.cache/huggingface/datasets/cedr/main/0.1.1/117570489cbabbdf8de619bd31918a1cd680a7f286b89d04af340d0691dc2d66)


  0%|          | 0/2 [00:00<?, ?it/s]

In [53]:
def binarize_labels(labels):
    return [int(len(labels)==0) ] + [int(i in labels) for i in range(5)]

In [151]:
MODEL_NAME = 'cointegrated/rubert-tiny2-cedr-emotion-detection'

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=6,
    problem_type='multi_label_classification'
    )

In [55]:
cedr_mapped = cedr.map(
    lambda x: tokenizer(
        x['text'],
        truncation=True),
    batched=True
    ).map(
        lambda x: {'label': [float(y) for y in binarize_labels(x['labels'])]},
        batched=False,
        remove_columns=['text', 'labels', 'source']
        )

Loading cached processed dataset at /home/user/.cache/huggingface/datasets/cedr/main/0.1.1/117570489cbabbdf8de619bd31918a1cd680a7f286b89d04af340d0691dc2d66/cache-1c1c431253689271.arrow
Loading cached processed dataset at /home/user/.cache/huggingface/datasets/cedr/main/0.1.1/117570489cbabbdf8de619bd31918a1cd680a7f286b89d04af340d0691dc2d66/cache-56ddecb77220a0e4.arrow
Loading cached processed dataset at /home/user/.cache/huggingface/datasets/cedr/main/0.1.1/117570489cbabbdf8de619bd31918a1cd680a7f286b89d04af340d0691dc2d66/cache-671f02d6ea45e193.arrow
Loading cached processed dataset at /home/user/.cache/huggingface/datasets/cedr/main/0.1.1/117570489cbabbdf8de619bd31918a1cd680a7f286b89d04af340d0691dc2d66/cache-c0883c99c2d9b127.arrow


In [56]:
data_collator = DataCollatorWithPadding(tokenizer)

### Функция оценки качества

In [165]:
def predict_with_model(model, dataloader):
    preds = []
    facts = []

    for batch in tqdm(dataloader):
        facts.append(batch.labels.cpu().numpy())
        batch = batch.to(model.device)

        with torch.no_grad():
            pr = model(
                input_ids=batch.input_ids,
                attention_mask=batch.attention_mask,
                token_type_ids=batch.token_type_ids
                )
        preds.append(torch.softmax(pr.logits, -1).cpu().numpy())

    facts = np.concatenate(facts)
    preds = np.concatenate(preds)

    return facts, preds


def get_classification_report(facts, preds, model):
    aucs = {label:roc_auc_score(facts[:, i], preds[:, i]) for i, label in model.config.id2label.items()}
    return aucs


def evaluate_model(model, dev_dataloader):
    eval_start_time = time.time()
    facts, preds = predict_with_model(model, dev_dataloader)
    eval_end_time = time.time()
    eval_duration_time = eval_end_time - eval_start_time
    timer = 'Eval time:  ' +str(eval_duration_time)
    aucs = get_classification_report(facts, preds, model)
    aucs['overall'] = np.mean(list(aucs.values()))
    return aucs, timer

In [166]:


batch_size = 64



In [167]:
test_dataloader = DataLoader(
    cedr_mapped['test'],
    batch_size=batch_size,
    drop_last=False,
    shuffle=True,
    num_workers=0,
    collate_fn=data_collator
)

In [168]:
test_results = evaluate_model(model, test_dataloader)

  0%|          | 0/30 [00:00<?, ?it/s]

In [169]:


test_results



({'no_emotion': 0.9285738020867945,
  'joy': 0.9511928957992504,
  'sadness': 0.956353783198774,
  'surprise': 0.8908363111599781,
  'fear': 0.8954949670239243,
  'anger': 0.7511348890153671,
  'overall': 0.8955977747140146},
 'Eval time:  2.5283966064453125')

### Применим встроенную функцию квантизации. В данном случае для берта нормально работает пока только динамическая (и то в режиме бэты) 

In [170]:
quantized_model_dino = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)
print(quantized_model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(83828, 312, padding_idx=0)
      (position_embeddings): Embedding(2048, 312)
      (token_type_embeddings): Embedding(2, 312)
      (LayerNorm): LayerNorm((312,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): DynamicQuantizedLinear(in_features=312, out_features=312, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (key): DynamicQuantizedLinear(in_features=312, out_features=312, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (value): DynamicQuantizedLinear(in_features=312, out_features=312, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (dropout): Dropout(p=0.1, inplace=False)
            )
     

### Тут можно глянуть размер оригинальной и ужатой модели.

In [171]:
import os

In [173]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

print_size_of_model(model)
print_size_of_model(quantized_model_dino)

Size (MB): 116.819895
Size (MB): 109.663935


### Модель изначально небольшая, поэтому особого выигрыша в сжатии нет. Но размер уменьшен

In [174]:

test_results

({'no_emotion': 0.9285738020867945,
  'joy': 0.9511928957992504,
  'sadness': 0.956353783198774,
  'surprise': 0.8908363111599781,
  'fear': 0.8954949670239243,
  'anger': 0.7511348890153671,
  'overall': 0.8955977747140146},
 'Eval time:  2.5283966064453125')

In [175]:
test_results_quant = evaluate_model(quantized_model, test_dataloader)

  0%|          | 0/30 [00:00<?, ?it/s]

In [176]:
test_results_quant

({'no_emotion': 0.9280011915047137,
  'joy': 0.952709375121587,
  'sadness': 0.9570129749296481,
  'surprise': 0.8933479934029687,
  'fear': 0.8979921052953181,
  'anger': 0.753019920318725,
  'overall': 0.8970139267621602},
 'Eval time:  2.2547237873077393')

### Видим, что метрики примерно теже - разница только в 3 знаке после запятой, и то часто она выше

для сравнения влупим берта обыкновенного

In [48]:
model_big = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_big )

model = BertForSequenceClassification.from_pretrained(
    model_big ,
    num_labels=6,
    problem_type='multi_label_classification'
    )

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [50]:
quantized_model = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)
print(quantized_model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (key): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (value): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
              (dropout): Dropout(p=0.1, inplace=False)
            

In [51]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

print_size_of_model(model)
print_size_of_model(quantized_model)

Size (MB): 438.012366
Size (MB): 181.48241
