In [None]:
pip install transformers datasets evaluate accelerate

In [20]:
import pandas as pd
import numpy as np
import torch
import evaluate
from tqdm import tqdm
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification, TrainingArguments, Trainer

In [9]:
data = load_dataset("ai-forever/kinopoisk-sentiment-classification")

In [10]:
model_name = "cointegrated/rubert-tiny-sentiment-balanced"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name, ignore_mismatched_sizes=True
)

In [11]:
if torch.cuda.is_available():
    model.cuda()

In [12]:
def preprocess_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

In [13]:
train = data['train']
val = data['validation']
test = data['test']
del data

In [14]:
train = train.map(preprocess_function,remove_columns = ['text','id','label_text'], batched=True).with_format('torch') 

In [15]:
val = val.map(preprocess_function,remove_columns = ['text','id','label_text'], batched=True).with_format('torch') 

In [16]:
accuracy = evaluate.load("accuracy")

In [17]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [18]:
training_args = TrainingArguments(
    output_dir="my_awesome_model",
    learning_rate=3e-4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

# Создание тренера
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

  trainer = Trainer(


In [19]:
# Обучение модели
trainer.train()

  0%|          | 0/657 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


{'loss': 0.9286, 'grad_norm': 13.467000007629395, 'learning_rate': 7.168949771689497e-05, 'epoch': 0.76}


  0%|          | 0/94 [00:00<?, ?it/s]

{'eval_loss': 0.7944239377975464, 'eval_accuracy': 0.6313333333333333, 'eval_runtime': 2.5759, 'eval_samples_per_second': 582.327, 'eval_steps_per_second': 36.492, 'epoch': 1.0}
{'train_runtime': 58.5914, 'train_samples_per_second': 179.207, 'train_steps_per_second': 11.213, 'train_loss': 0.8940519916412493, 'epoch': 1.0}


TrainOutput(global_step=657, training_loss=0.8940519916412493, metrics={'train_runtime': 58.5914, 'train_samples_per_second': 179.207, 'train_steps_per_second': 11.213, 'total_flos': 77439269376000.0, 'train_loss': 0.8940519916412493, 'epoch': 1.0})

In [21]:
torch.cuda.empty_cache()

In [22]:
model.eval() 

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(29564, 312, padding_idx=0)
      (position_embeddings): Embedding(512, 312)
      (token_type_embeddings): Embedding(2, 312)
      (LayerNorm): LayerNorm((312,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-2): 3 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=312, out_features=312, bias=True)
              (key): Linear(in_features=312, out_features=312, bias=True)
              (value): Linear(in_features=312, out_features=312, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=312, out_features=312, bias=True)
              (LayerNorm): LayerNorm((312,), eps=1e-1

In [23]:
del train
del val

In [24]:
label = test['label']
test = test.remove_columns(['label'])

In [25]:
lst=[]
with torch.no_grad():
    for input in tqdm(test['text']):
        t = tokenizer(input, padding=True, truncation=True, return_tensors='pt')
        outputs = model(**{k: v.to(model.device) for k, v in t.items()}).logits.argmax(-1)
        lst.append(int(outputs.cpu()))

100%|██████████| 1500/1500 [00:04<00:00, 303.23it/s]


In [26]:
print(accuracy.compute(predictions=lst, references=label))

{'accuracy': 0.614}
