In [1]:
import torch
import numpy as np
import os
import pandas as pd

In [2]:
# Проверяем доступна ли GPU и задаем вычислительное устройство
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('Available GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU')
    device = torch.device("cpu")

No GPU available, using the CPU


In [3]:
from transformers import BertTokenizer

# Используем BERT tokenizer, приводим к нижнему регистру
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

In [4]:
from transformers import BertForSequenceClassification, AdamW, BertConfig

# Загружаем предобученный BertForSequenceClassification
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased", # название предобученной модели
    num_labels = 2, # 2 класса
    output_attentions = False, # attention веса не будут возвращаться моделью
    output_hidden_states = False, # hidden-states не будут возвращаться моделью
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# Обученную модель можно сохранить для дальнейшего использования
output_dir = './model2_save/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
# сохранять нужно как модель, так и tokenizer
model_to_save = model.module if hasattr(model, 'module') else model 
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

('./model2_save/tokenizer_config.json',
 './model2_save/special_tokens_map.json',
 './model2_save/vocab.txt',
 './model2_save/added_tokens.json')

In [6]:
# BertForSequenceClassification - наш вариант
model = BertForSequenceClassification.from_pretrained('./model_save/')
tokenizer = BertTokenizer.from_pretrained('./model_save/')

In [7]:
# Далее загруженная модель может использоваться для дообучения или для работы
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [8]:
# Проверим как оно работает
sentence = 'They caused him to become president by making him.'
sentence = 'Clearly, John probably will immediately learn French perfectly.'

enc_s = tokenizer.encode(sentence,                      
                        add_special_tokens = True, # Указываем, что нам нужно добавить служебные токены
                        padding = 'max_length',  # дополнение до макс.длины
                        max_length = 64,         # максимальная длина предложений
                   )
# Формируем список id токенов
input_ids = np.array(enc_s)

# Создаем attention mask для виртуальных токенов
attention_mask = [int(id_ > 0) for id_ in input_ids]

model.eval()
batch = tuple(t.to(device) for t in torch.Tensor([input_ids, attention_mask]))
b_input_ids, b_input_mask = batch
with torch.no_grad():
    outputs = model( b_input_ids.unsqueeze(0).to(torch.long), token_type_ids=None, attention_mask=b_input_mask.unsqueeze(0))
    
logits = outputs.logits
logits = logits.detach().cpu().numpy()
predicted_label = np.argmax(logits, axis=1).flatten()
print(predicted_label)

  batch = tuple(t.to(device) for t in torch.Tensor([input_ids, attention_mask]))


[1]
