In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import numpy as np
import pandas as pd

import copy
import traceback
import datetime
import joblib
import re
import os
import random
import string
import time

from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt
%matplotlib inline

import torch
from torch.utils.data import TensorDataset, RandomSampler
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torchtext import data
from torchtext import datasets
from torchtext import vocab
from torchtext.vocab import Vectors, GloVe
from transformers import AutoTokenizer, AutoModelWithLMHead, AdamW, BertForSequenceClassification
from transformers import BertTokenizer, BertModel

from tqdm.notebook import tqdm

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
import pymorphy2
from sklearn.base import BaseEstimator, TransformerMixin

import gensim

In [3]:
class ConfigExperiment:
    seed = 42
    device = "cuda" if torch.cuda.is_available() else "cpu"
    batch_size = 32
    num_epochs = 3
    num_workers = 0
    patience = 5
    early_stopping_delta = 1e-4
    save_dirname = "models"
    
config = ConfigExperiment()

In [4]:
def init_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic=True
    
init_random_seed(config.seed)

### Подготовка данных

In [5]:
train = pd.read_csv("../data/train_processed_data.csv", index_col=False)
validate = pd.read_csv("../data/validate_processed_data.csv", index_col=False)
test = pd.read_csv("../data/test_processed_data.csv", index_col=False)

In [6]:
train.head()

Unnamed: 0,text,target
0,оставаться самый нужный и самый близкие ) весь...,1
1,"такой приятный чувство , когда ты знаешь , что...",1
2,день начинаться с лень вообще ничто делать не ...,0
3,at_user at_user ксюша поход вплотную там суп з...,1
4,"at_user с днём рождение at_user , творческий у...",1


In [7]:
tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")

In [8]:
init_token = tokenizer.cls_token
eos_token = tokenizer.sep_token
pad_token = tokenizer.pad_token
unk_token = tokenizer.unk_token

print(init_token, eos_token, pad_token, unk_token)

[CLS] [SEP] [PAD] [UNK]


In [9]:
init_token_idx = tokenizer.cls_token_id
eos_token_idx = tokenizer.sep_token_id
pad_token_idx = tokenizer.pad_token_id
unk_token_idx = tokenizer.unk_token_id

print(init_token_idx, eos_token_idx, pad_token_idx, unk_token_idx)

101 102 0 100


In [10]:
max_input_length = tokenizer.max_model_input_sizes['bert-base-uncased']

print(max_input_length)

512


In [11]:
def tokenize_and_cut(sentence):
    tokens = tokenizer.tokenize(sentence) 
    tokens = tokens[:max_input_length-2]
    return tokens

In [12]:
TEXT = data.Field(batch_first=True,
                  use_vocab=False,
                  tokenize=tokenize_and_cut,
                  preprocessing=tokenizer.convert_tokens_to_ids,
                  init_token=init_token_idx,
                  eos_token=eos_token_idx,
                  pad_token=pad_token_idx,
                  unk_token=unk_token_idx)

LABEL = data.LabelField(sequential=False, use_vocab=False, dtype=torch.float)

fields = [('text',TEXT), ('label', LABEL)]

train_data, valid_data, test_data = data.TabularDataset.splits(
    path="../data/",
    train="train_processed_data.csv",
    validation="validate_processed_data.csv",
    test="test_processed_data.csv",
    format="csv",
    fields=fields,
    skip_header=True)

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    sort_key = lambda x: x.text,
    batch_size=config.batch_size,
    device=config.device)

In [13]:
print(f"Number of training examples: {len(train_data)}")
print(f"Number of validation examples: {len(valid_data)}")
print(f"Number of testing examples: {len(test_data)}")

Number of training examples: 136100
Number of validation examples: 45367
Number of testing examples: 45367


In [14]:
print(vars(train_data.examples[0]))

{'text': [26676, 3248, 880, 852, 36700, 852, 851, 3248, 880, 852, 35637, 122, 11417, 3760, 6260, 852, 48752, 122, 851, 877, 4564, 29309, 122, 13496, 1997, 877, 14455, 3248, 1699, 24287, 36700, 852, 877, 3451, 3474, 8953, 122], 'label': '1'}


In [15]:
tokens = tokenizer.convert_ids_to_tokens(vars(train_data.examples[0])['text'])

print(tokens)

['оставаться', 'сам', '##ы', '##и', 'нужны', '##и', 'и', 'сам', '##ы', '##и', 'близкие', ')', 'весь', 'ост', '##ально', '##и', 'уходить', ')', 'и', 'я', 'только', 'рада', ')', 'потому', 'что', 'я', 'никогда', 'сам', 'не', 'понять', 'нужны', '##и', 'я', 'человек', 'или', 'нет', ')']


### Создание модели

In [16]:
class BERTGRUSentiment(nn.Module):
    def __init__(self, bert, hidden_dim, output_dim, n_layers, bidirectional, dropout):
        super().__init__()
        self.bert = bert
        embedding_dim = bert.config.to_dict()['hidden_size']
        
        self.rnn = nn.GRU(embedding_dim,
                          hidden_dim,
                          num_layers = n_layers,
                          bidirectional = bidirectional,
                          batch_first = True,
                          dropout = 0 if n_layers < 2 else dropout)
        
        self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        #text = [batch size, sent len] 

        embedded = self.bert(text)[0]    
        #embedded = [batch size, sent len, emb dim]
        
        _, hidden = self.rnn(embedded)
        #hidden = [n layers * n directions, batch size, emb dim]
        
        if self.rnn.bidirectional:
            hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
        else:
            hidden = self.dropout(hidden[-1,:,:])  
        #hidden = [batch size, hid dim]
        
        output = self.out(hidden)
        #output = [batch size, out dim]
        
        return output

In [17]:
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.25

In [17]:
bert = BertModel.from_pretrained("DeepPavlov/rubert-base-cased")

model = BERTGRUSentiment(bert, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)

In [21]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [19]:
print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 180,612,609 trainable parameters


In [20]:
for name, param in model.named_parameters():                
    if name.startswith('bert'):
        param.requires_grad = False
        
print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 2,759,169 trainable parameters


In [21]:
for name, param in model.named_parameters():                
    if param.requires_grad:
        print(name)

rnn.weight_ih_l0
rnn.weight_hh_l0
rnn.bias_ih_l0
rnn.bias_hh_l0
rnn.weight_ih_l0_reverse
rnn.weight_hh_l0_reverse
rnn.bias_ih_l0_reverse
rnn.bias_hh_l0_reverse
rnn.weight_ih_l1
rnn.weight_hh_l1
rnn.bias_ih_l1
rnn.bias_hh_l1
rnn.weight_ih_l1_reverse
rnn.weight_hh_l1_reverse
rnn.bias_ih_l1_reverse
rnn.bias_hh_l1_reverse
out.weight
out.bias


In [22]:
param_optimizer = list(model.named_parameters())
to_decay = ["bias", "gamma", "beta"]
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in param_optimizer if not any(nd in n for nd in to_decay)],
        "weight_decay_rate": 0.01
    },
    {
        "params": [p for n, p in param_optimizer if any(nd in n for nd in to_decay)],
        "weight_decay_rate": 0.00
    }
]

optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5)

In [23]:
criterion = nn.BCEWithLogitsLoss()

model = model.to(config.device)
criterion = criterion.to(config.device)

In [18]:
class Trainer:
    def __init__(self, model, train_dataloader: DataLoader, valid_dataloader: DataLoader, 
                 criterion, optimizer, scheduler, config: ConfigExperiment, model_name: str):
        self.model = model
        self.train_dataloader = train_dataloader
        self.valid_dataloader = valid_dataloader
        self.criterion = criterion
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.device = config.device
        self.config = config
        self.threshold = 0.5
        self.model_name = model_name
        self.train_metrics = {
            'avg_loss': [],
            'accuracy': [],
            'f1': [],
        }
        self.valid_metrics = {
            'avg_loss': [],
            'accuracy': [],
            'f1': [],
        }
        self.counter = 0
        self.delta = config.early_stopping_delta
      
    def run(self):
        self.model.to(self.device)
        best_valid_loss = float('inf')
        best_target_metric = 0

        try:
            for i_epoch in tqdm(range(self.config.num_epochs), desc='Epochs', total=config.num_epochs, position=1, leave=True):
                start_time = time.time()

                train_loss, train_outputs, train_targets = self._train()
                valid_loss, valid_outputs, valid_targets = self._evaluate()
                    
                self.train_metrics["avg_loss"].append(train_loss)
                self.train_metrics["accuracy"].append(accuracy_score(train_targets, train_outputs.round() > self.threshold))
                self.train_metrics["f1"].append(f1_score(train_targets, train_outputs.round() > self.threshold, average="macro"))
                
                self.valid_metrics["avg_loss"].append(valid_loss)
                self.valid_metrics["accuracy"].append(accuracy_score(valid_targets, valid_outputs.round() > self.threshold))
                self.valid_metrics["f1"].append(f1_score(valid_targets, valid_outputs.round() > self.threshold, average="macro"))
                
                end_time = time.time()
                epoch_mins, epoch_secs = self._epoch_time(start_time, end_time)
                self.print_progress(i_epoch, epoch_mins, epoch_secs)
                
                if self.scheduler:
                    self.scheduler.step(self.valid_metrics["f1"][-1])
                
                if valid_loss < best_valid_loss:
                    best_valid_loss = valid_loss
                    torch.save(model.state_dict(), f"{config.save_dirname}/{self.model_name}.pth")
                    
                if self.valid_metrics["f1"][-1] > best_target_metric:
                    self.counter = 0
                    best_target_metric = self.valid_metrics["f1"][-1]
                    torch.save(model.state_dict(), f"{config.save_dirname}/{self.model_name}.pth")
                else:
                    self.counter += 1
                    
                if self.counter > self.config.patience:
                    print("EarlyStopping")
                    break
        except KeyboardInterrupt:
            pass
        
        return self.train_metrics, self.valid_metrics
        
    def _train(self):
        model.train()
        epoch_loss = 0
        epoch_output = None
        epoch_target = None
        for i, batch in tqdm(enumerate(self.train_dataloader), desc='Train', total=len(self.train_dataloader), position=2, leave=True):
            loss_iten, outputs = self._train_process(batch)
            epoch_loss += loss_iten 

            if epoch_output is None:
                epoch_output = outputs.cpu().data
            else:
                epoch_output = torch.cat((epoch_output, outputs.cpu().data))

            if epoch_target is None:
                epoch_target = batch.label.cpu().data
            else:
                epoch_target = torch.cat((epoch_target, batch.label.cpu().data))
            
        return epoch_loss / len(self.train_dataloader), epoch_output, epoch_target
    
    def _train_process(self, batch):      
        self.optimizer.zero_grad()
        outputs = self.model(batch.text).squeeze(1)
        loss = self.criterion(outputs, batch.label)
        loss.backward()
        self.optimizer.step()
        return loss.item(), outputs
            
    def _evaluate(self):
        self.model.eval()
        epoch_loss = 0
        epoch_output = None
        epoch_target = None
        with torch.no_grad():
            for i, batch in tqdm(enumerate(self.valid_dataloader), desc='Valid', total=len(self.valid_dataloader), position=3, leave=True):
                outputs = self.model(batch.text).squeeze(1)
                loss = criterion(outputs, batch.label)
                epoch_loss += loss.item()
                
                if epoch_output is None:
                    epoch_output = outputs.cpu().data
                else:
                    epoch_output = torch.cat((epoch_output, outputs.cpu().data))

                if epoch_target is None:
                    epoch_target = batch.label.cpu().data
                else:
                    epoch_target = torch.cat((epoch_target, batch.label.cpu().data))

        return epoch_loss / len(self.valid_dataloader), epoch_output, epoch_target
 
    def _epoch_time(self, start_time, end_time):
        elapsed_time = end_time - start_time
        elapsed_mins = int(elapsed_time / 60)
        elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
        return elapsed_mins, elapsed_secs

    def print_progress(self, i_epoch, epoch_mins, epoch_secs):
        if type(i_epoch) != str:
            i_epoch = i_epoch + 1
            print(f"Epoch: {i_epoch:02} | Time: {epoch_mins}m {epoch_secs}s")
            print("Training Results - Average Loss: {:.4f} | accuracy: {:.4f} | f1: {:.4f}"
                .format(
                    self.train_metrics['avg_loss'][-1], 
                    self.train_metrics['accuracy'][-1],
                    self.train_metrics['f1'][-1],
                ))      
        else:
            print(f"Epoch: {i_epoch} | Time: {epoch_mins}m {epoch_secs}s")
        print("Evaluating Results - Average Loss: {:.4f} | accuracy: {:.4f} | f1: {:.4f}"
            .format( 
                self.valid_metrics['avg_loss'][-1],
                self.valid_metrics['accuracy'][-1],
                self.valid_metrics['f1'][-1],
            ))
        print()

    def set_model(self, model: nn.Module):
        self.model = model
        
    def evaluate(self, dataloader: DataLoader):
        self.valid_dataloader = dataloader
        self.model.to(self.device)
        start_time = time.time()

        valid_loss, valid_outputs, valid_targets = self._evaluate()

        self.valid_metrics["avg_loss"].append(valid_loss)
        self.valid_metrics["accuracy"].append(accuracy_score(valid_targets, valid_outputs.round() > self.threshold))
        self.valid_metrics["f1"].append(f1_score(valid_targets, valid_outputs.round() > self.threshold, average="macro"))

        end_time = time.time()
        epoch_mins, epoch_secs = self._epoch_time(start_time, end_time)
        self.print_progress("evaluate", epoch_mins, epoch_secs)


In [25]:
trainer = Trainer(model, train_iterator, valid_iterator, criterion, optimizer, None, config, "10_bert_classification_with_gru")
trainer.run();

HBox(children=(FloatProgress(value=0.0, description='Epochs', max=3.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Train', max=4254.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='Valid', max=1418.0, style=ProgressStyle(description_width…


Epoch: 01 | Time: 7m 25s
Training Results - Average Loss: 0.1454 | accuracy: 0.9311 | f1: 0.9311
Evaluating Results - Average Loss: 0.0603 | accuracy: 0.9726 | f1: 0.9726



HBox(children=(FloatProgress(value=0.0, description='Train', max=4254.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='Valid', max=1418.0, style=ProgressStyle(description_width…


Epoch: 02 | Time: 7m 29s
Training Results - Average Loss: 0.0601 | accuracy: 0.9740 | f1: 0.9740
Evaluating Results - Average Loss: 0.0417 | accuracy: 0.9813 | f1: 0.9813



HBox(children=(FloatProgress(value=0.0, description='Train', max=4254.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='Valid', max=1418.0, style=ProgressStyle(description_width…


Epoch: 03 | Time: 7m 33s
Training Results - Average Loss: 0.0449 | accuracy: 0.9810 | f1: 0.9810
Evaluating Results - Average Loss: 0.0304 | accuracy: 0.9873 | f1: 0.9873




In [29]:
trainer = Trainer(model, train_iterator, valid_iterator, criterion, optimizer, None, config, "10_bert_classification_with_gru")
model.load_state_dict(torch.load(f'{config.save_dirname}/10_bert_classification_with_gru.pth'))
trainer.set_model(model)
trainer.evaluate(test_iterator)

HBox(children=(FloatProgress(value=0.0, description='Valid', max=1418.0, style=ProgressStyle(description_width…


Epoch: evaluate | Time: 1m 36s
Evaluating Results - Average Loss: 0.0302 | accuracy: 0.9874 | f1: 0.9874



In [26]:
bert = BertModel.from_pretrained("DeepPavlov/rubert-base-cased")

model = BERTGRUSentiment(bert, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)

model.load_state_dict(torch.load(f'{config.save_dirname}/10_bert_classification_with_gru.pth'))

for name, param in list(model.named_parameters())[150:]:                
    if name.startswith('bert'):
        param.requires_grad = True
        
print(f'The model has {count_parameters(model):,} trainable parameters')

RuntimeError: CUDA out of memory. Tried to allocate 352.00 MiB (GPU 0; 7.93 GiB total capacity; 2.69 GiB already allocated; 353.88 MiB free; 2.82 GiB reserved in total by PyTorch)

In [None]:
param_optimizer = list(model.named_parameters())
to_decay = ["bias", "gamma", "beta"]
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in param_optimizer if not any(nd in n for nd in to_decay)],
        "weight_decay_rate": 0.01
    },
    {
        "params": [p for n, p in param_optimizer if any(nd in n for nd in to_decay)],
        "weight_decay_rate": 0.00
    }
]

optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5)


criterion = nn.BCEWithLogitsLoss()

model = model.to(config.device)
criterion = criterion.to(config.device)

In [None]:
config.batch_size = 4

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    sort_key=lambda x: x.text,
    batch_size=config.batch_size,
    device=config.device)

In [25]:
trainer = Trainer(model, train_iterator, valid_iterator, criterion, optimizer, None, config, "10_bert_classification_with_gru")
trainer.run();

HBox(children=(FloatProgress(value=0.0, description='Epochs', max=3.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Train', max=34025.0, style=ProgressStyle(description_widt…





RuntimeError: CUDA out of memory. Tried to allocate 352.00 MiB (GPU 0; 7.93 GiB total capacity; 2.69 GiB already allocated; 353.88 MiB free; 2.82 GiB reserved in total by PyTorch) (malloc at /pytorch/c10/cuda/CUDACachingAllocator.cpp:289)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x46 (0x7f4ebb66f536 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x1cf1e (0x7f4ebb8b8f1e in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libc10_cuda.so)
frame #2: <unknown function> + 0x1df9e (0x7f4ebb8b9f9e in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libc10_cuda.so)
frame #3: at::native::empty_cuda(c10::ArrayRef<long>, c10::TensorOptions const&, c10::optional<c10::MemoryFormat>) + 0x135 (0x7f4ebe4479e5 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #4: <unknown function> + 0xf688bb (0x7f4ebca338bb in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #5: <unknown function> + 0xfb21a7 (0x7f4ebca7d1a7 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #6: <unknown function> + 0x1073c49 (0x7f4ef9350c49 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #7: <unknown function> + 0x1073f87 (0x7f4ef9350f87 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #8: <unknown function> + 0xe1ff1e (0x7f4ef90fcf1e in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #9: at::native::zeros(c10::ArrayRef<long>, c10::TensorOptions const&) + 0x2d (0x7f4ef91017dd in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #10: <unknown function> + 0x113f0f3 (0x7f4ef941c0f3 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #11: <unknown function> + 0x1074339 (0x7f4ef9351339 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #12: <unknown function> + 0x1071922 (0x7f4ef934e922 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #13: <unknown function> + 0x1074339 (0x7f4ef9351339 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #14: at::native::embedding_dense_backward_cuda(at::Tensor const&, at::Tensor const&, long, long, bool) + 0xafa (0x7f4ebdef1e7a in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #15: <unknown function> + 0xf6ff2c (0x7f4ebca3af2c in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #16: <unknown function> + 0x10c42fc (0x7f4ef93a12fc in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #17: <unknown function> + 0x2aa51d1 (0x7f4efad821d1 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #18: <unknown function> + 0x10c42fc (0x7f4ef93a12fc in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #19: at::native::embedding_backward(at::Tensor const&, at::Tensor const&, long, long, bool, bool) + 0x124 (0x7f4ef8eed954 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #20: <unknown function> + 0x114f290 (0x7f4ef942c290 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #21: <unknown function> + 0x2c4e76a (0x7f4efaf2b76a in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #22: <unknown function> + 0x1187b89 (0x7f4ef9464b89 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #23: torch::autograd::generated::EmbeddingBackward::apply(std::vector<at::Tensor, std::allocator<at::Tensor> >&&) + 0x1cd (0x7f4efab8497d in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #24: <unknown function> + 0x2d89705 (0x7f4efb066705 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #25: torch::autograd::Engine::evaluate_function(std::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 0x16f3 (0x7f4efb063a03 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #26: torch::autograd::Engine::thread_main(std::shared_ptr<torch::autograd::GraphTask> const&, bool) + 0x3d2 (0x7f4efb0647e2 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #27: torch::autograd::Engine::thread_init(int) + 0x39 (0x7f4efb05ce59 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #28: torch::autograd::python::PythonEngine::thread_init(int) + 0x38 (0x7f4f079a05f8 in /home/science/projects/nlp_university/venv/lib/python3.8/site-packages/torch/lib/libtorch_python.so)
frame #29: <unknown function> + 0xbd6df (0x7f4f7a2506df in /usr/lib/x86_64-linux-gnu/libstdc++.so.6)
frame #30: <unknown function> + 0x76db (0x7f4f824d46db in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #31: clone + 0x3f (0x7f4f81a58a3f in /lib/x86_64-linux-gnu/libc.so.6)


In [40]:
trainer = Trainer(model, train_iterator, valid_iterator, criterion, optimizer, None, config, "10_bert_classification_with_gru")
model.load_state_dict(torch.load(f'{config.save_dirname}/10_bert_classification_with_gru.pth'))
trainer.set_model(model)
trainer.evaluate(test_iterator)

HBox(children=(FloatProgress(value=0.0, description='Valid', max=2836.0, style=ProgressStyle(description_width…


Epoch: evaluate | Time: 1m 42s
Evaluating Results - Average Loss: 0.0204 | accuracy: 0.9912 | f1: 0.9912

