# Обучение нейронной сети для решения задачи NER

In [3]:
import pandas as pd
from tqdm import tqdm
import json
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForTokenClassification
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ExponentialLR
from sklearn.metrics import accuracy_score, f1_score

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
with open('/content/drive/MyDrive/data4_allnames.json', 'r') as json_file:
    data = json.load(json_file)['data']

In [6]:
tokenizer = AutoTokenizer.from_pretrained('DeepPavlov/xlm-roberta-large-en-ru')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/582 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/944k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.51M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/238 [00:00<?, ?B/s]

In [7]:
class dataset(Dataset):
    def __init__(self, data: dict, tokenizer, max_len: int):
        self.len = len(data)
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len
        print('Label prepare')
        self.__prepare_labels__()

    def __prepare_labels__(self) -> list:
        labels = []
        tokens_l = []
        for i in tqdm(range(0, len(self.data))):
            text = self.data[i]['text']
            name = self.data[i]['name']
            name.sort()
            startidx = 0
            IO = []
            tokens_t = []
            for j in range(len(name)):
                tokens = self.tokenizer.tokenize(text[startidx:name[j][0]])
                IO.extend([0]*len(tokens))
                tokens_t.extend(tokens)

                tokens = self.tokenizer.tokenize(text[name[j][0]:name[j][1]])
                IO.extend([1]*len(tokens))
                tokens_t.extend(tokens)
                startidx = name[j][1]
            tokens = self.tokenizer.tokenize(text[startidx:])
            IO.extend([0]*len(tokens))
            tokens_t.extend(tokens)

            labels.append(IO)
            tokens_l.append(tokens_t)
        self.labels = labels
        self.tokens = tokens_l

    def __getitem__(self, idx):
        tokenized_text = self.tokens[idx]
        labels = self.labels[idx]
        maxlen = self.max_len

        if (len(tokenized_text) > maxlen):
          tokenized_text = tokenized_text[:maxlen]
          labels = labels[:maxlen]
        else:
          tokenized_text = tokenized_text + ['[PAD]' for _ in range(maxlen - len(tokenized_text))]
          labels = labels + [0 for _ in range(maxlen - len(labels))]

        attn_mask = [1 if tok != '[PAD]' else 0 for tok in tokenized_text]

        ids = self.tokenizer.convert_tokens_to_ids(tokenized_text)

        return {
              'ids': torch.tensor(ids, dtype=torch.long),
              'mask': torch.tensor(attn_mask, dtype=torch.long),
              'targets': torch.tensor(labels, dtype=torch.long)
        }

    def __len__(self):
        return self.len

In [8]:
train_data, val_data = train_test_split(data,
                                        random_state = 77,
                                        test_size = 0.3)

In [9]:
MAX_LEN = 512
train_dataset = dataset(train_data, tokenizer, MAX_LEN)
val_dataset = dataset(val_data, tokenizer, MAX_LEN)

Label prepare


100%|██████████| 11091/11091 [00:16<00:00, 666.62it/s]


Label prepare


100%|██████████| 4754/4754 [00:08<00:00, 589.25it/s]


In [12]:
train_params = {'batch_size': 3,
                }

test_params = {'batch_size': 3,
                }

train_loader = DataLoader(train_dataset, shuffle=True, **train_params)
val_loader = DataLoader(val_dataset, **test_params)

In [13]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [14]:
model = AutoModelForTokenClassification.from_pretrained('DeepPavlov/xlm-roberta-large-en-ru',
                                                   num_labels=2)
model.to(device)

config.json:   0%|          | 0.00/722 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.36G [00:00<?, ?B/s]

Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at DeepPavlov/xlm-roberta-large-en-ru and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


XLMRobertaForTokenClassification(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(35054, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=1024, out_featur

In [15]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.00001)
scheduler = ExponentialLR(optimizer, gamma=0.9)

In [16]:
def train():
    tr_accuracy = []
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_loss = []
    model.train()
    acc = []
    f1 = []

    for idx, batch in tqdm(enumerate(train_loader)):

        ids = batch['ids'].to(device, dtype = torch.long)
        mask = batch['mask'].to(device, dtype = torch.long)
        targets = batch['targets'].to(device, dtype = torch.long)

        outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
        loss, tr_logits = outputs.loss, outputs.logits
        tr_loss.append(loss.item())

        nb_tr_steps += 1

        # compute training accuracy
        flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
        active_logits = tr_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        # now, use mask to determine where we should compare predictions with targets
        active_accuracy = mask.view(-1) == 1 # active accuracy is also of shape (batch_size * seq_len,)
        targets = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        acc.append(accuracy_score(targets.to('cpu').numpy(), predictions.to('cpu').numpy()))
        f1.append(f1_score(targets.to('cpu').numpy(), predictions.to('cpu').numpy()))

        if len(tr_loss) % 50 == 0:
            print("Training loss: ", sum(tr_loss[-50:]) / len(tr_loss[-50:]))
            print("Training accuracy: ", sum(acc[-50]) / len(acc[-50:]))
            print("Training F1: ", sum(f1[-50:]) / len(f1[-50:]))

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    scheduler.step()

    epoch_loss = sum(tr_loss[-32:]) / len(tr_loss[-32:])
    print(f"Training loss epoch: {epoch_loss}")
    print(f"Training Accuracy epoch: {acc_sum/acc_amount}")

In [17]:
train()

50it [00:48,  1.04it/s]

Training loss:  0.08912918768823147
Training accuracy:  0.9091251702725777
Training F1:  0.008


99it [01:36,  1.01it/s]

Training loss:  0.03790824256837368
Training accuracy:  0.9611403279706139
Training F1:  0.6606414434830158


149it [02:27,  1.01s/it]

Training loss:  0.022870921343564988
Training accuracy:  0.9731617881456378
Training F1:  0.8196245526602866


199it [03:17,  1.01s/it]

Training loss:  0.01890181325376034
Training accuracy:  0.979934439149168
Training F1:  0.8573548303670862


249it [04:08,  1.01s/it]

Training loss:  0.016670558694750072
Training accuracy:  0.9814269542126732
Training F1:  0.8803834338780052


299it [04:58,  1.01s/it]

Training loss:  0.010105416625738144
Training accuracy:  0.986950239184661
Training F1:  0.9276454804307783


349it [05:49,  1.01s/it]

Training loss:  0.012206819241400808
Training accuracy:  0.9902562921942755
Training F1:  0.9383540504704034


399it [06:40,  1.01s/it]

Training loss:  0.01004419445991516
Training accuracy:  0.9938658651035187
Training F1:  0.9436125190296059


449it [07:30,  1.01s/it]

Training loss:  0.025220740241929888
Training accuracy:  0.9833011388385963
Training F1:  0.8986641276717622


499it [08:21,  1.01s/it]

Training loss:  0.018069524466991425
Training accuracy:  0.9767311781472445
Training F1:  0.8696189597306385


549it [09:11,  1.01s/it]

Training loss:  0.01079185972455889
Training accuracy:  0.9918513118849205
Training F1:  0.9412606698991814


599it [10:02,  1.01s/it]

Training loss:  0.0060440999222919345
Training accuracy:  0.9942982484211703
Training F1:  0.9640277586768906


649it [10:52,  1.01s/it]

Training loss:  0.007102389249484986
Training accuracy:  0.9923865133470055
Training F1:  0.9461052785615788


699it [11:43,  1.01s/it]

Training loss:  0.006832580544287339
Training accuracy:  0.9943309503499781
Training F1:  0.9684571712619885


749it [12:33,  1.01s/it]

Training loss:  0.005269959655124694
Training accuracy:  0.9953987579331447
Training F1:  0.970221066866614


799it [13:24,  1.01s/it]

Training loss:  0.006650964936707169
Training accuracy:  0.9948771641559029
Training F1:  0.9621655357569164


849it [14:14,  1.01s/it]

Training loss:  0.006747743673622608
Training accuracy:  0.9906420198232165
Training F1:  0.9441289667102424


899it [15:05,  1.02s/it]

Training loss:  0.009390078706201167
Training accuracy:  0.9910544982847082
Training F1:  0.9490192539884535


949it [15:56,  1.01s/it]

Training loss:  0.006082221149699763
Training accuracy:  0.9955734554499028
Training F1:  0.9756183653691308


999it [16:46,  1.01s/it]

Training loss:  0.004842377952300012
Training accuracy:  0.9967332775324845
Training F1:  0.9764862624335536


1049it [17:37,  1.01s/it]

Training loss:  0.005891124026384205
Training accuracy:  0.9955567103903743
Training F1:  0.9675984892989499


1099it [18:27,  1.01s/it]

Training loss:  0.0054508237028494475
Training accuracy:  0.9958365999268162
Training F1:  0.9573632274591762


1149it [19:18,  1.01s/it]

Training loss:  0.004034022653941065
Training accuracy:  0.9954863196941628
Training F1:  0.9733361999675965


1199it [20:09,  1.01s/it]

Training loss:  0.00549261984298937
Training accuracy:  0.9970704591276014
Training F1:  0.9823484491578376


1249it [20:59,  1.01s/it]

Training loss:  0.004848419707268476
Training accuracy:  0.9957534541781322
Training F1:  0.9701348391056025


1299it [21:50,  1.08s/it]

Training loss:  0.006641539545962587
Training accuracy:  0.9921863195846385
Training F1:  0.9681614951168538


1349it [22:40,  1.02s/it]

Training loss:  0.0048892174707725645
Training accuracy:  0.9963322845028059
Training F1:  0.9789490993282333


1399it [23:31,  1.01s/it]

Training loss:  0.004479388244799338
Training accuracy:  0.9970716731918162
Training F1:  0.9756688166474576


1449it [24:21,  1.01s/it]

Training loss:  0.0038099805638194084
Training accuracy:  0.9966577232829293
Training F1:  0.9746849639872905


1499it [25:12,  1.01s/it]

Training loss:  0.0041367380711017174
Training accuracy:  0.9970273143189928
Training F1:  0.9796708880181189


1549it [26:03,  1.01s/it]

Training loss:  0.004999417343642562
Training accuracy:  0.9954027560516705
Training F1:  0.9706078685643366


1599it [26:53,  1.01s/it]

Training loss:  0.004255875666858628
Training accuracy:  0.9973833128409986
Training F1:  0.9802280156795605


1649it [27:44,  1.01s/it]

Training loss:  0.0027266331808641553
Training accuracy:  0.9960671234853329
Training F1:  0.9807570987064266


1699it [28:34,  1.01s/it]

Training loss:  0.0067132398847024885
Training accuracy:  0.9945401941795836
Training F1:  0.9667015872633519


1749it [29:25,  1.01s/it]

Training loss:  0.0030337096948642285
Training accuracy:  0.9978118282703514
Training F1:  0.9848693486491932


1799it [30:16,  1.02s/it]

Training loss:  0.0022248192246479446
Training accuracy:  0.9986041683750517
Training F1:  0.9900449063136689


1849it [31:06,  1.01s/it]

Training loss:  0.003805793414940126
Training accuracy:  0.996709329321578
Training F1:  0.9822602053508714


1899it [31:57,  1.01s/it]

Training loss:  0.004421068640658632
Training accuracy:  0.9965462662815625
Training F1:  0.9760330998513059


1949it [32:47,  1.02s/it]

Training loss:  0.0025012290294398553
Training accuracy:  0.9962270454354732
Training F1:  0.9830437430965131


1999it [33:38,  1.01s/it]

Training loss:  0.0017431196017423645
Training accuracy:  0.9979862588832724
Training F1:  0.9909860179095868


2049it [34:28,  1.01s/it]

Training loss:  0.003150752488290891
Training accuracy:  0.9958720621187662
Training F1:  0.9755401495915267


2099it [35:19,  1.01s/it]

Training loss:  0.0019689688517246394
Training accuracy:  0.9982528338327953
Training F1:  0.9888926605977757


2149it [36:10,  1.01s/it]

Training loss:  0.0017723110454971902
Training accuracy:  0.9977873840523073
Training F1:  0.9858653187389114


2199it [37:00,  1.01s/it]

Training loss:  0.0030578362883534283
Training accuracy:  0.9983250051858417
Training F1:  0.9853920774562593


2249it [37:51,  1.02s/it]

Training loss:  0.0030681589522282594
Training accuracy:  0.9964443513982877
Training F1:  0.9826371964221171


2299it [38:42,  1.02s/it]

Training loss:  0.001798958815925289
Training accuracy:  0.9982580877691017
Training F1:  0.9922808732214251


2349it [39:32,  1.01s/it]

Training loss:  0.002216384870989714
Training accuracy:  0.998551370599744
Training F1:  0.9906848310344859


2399it [40:23,  1.01s/it]

Training loss:  0.0027629442285979165
Training accuracy:  0.9960685711611105
Training F1:  0.9808816316557166


2449it [41:13,  1.02s/it]

Training loss:  0.0016847152667469345
Training accuracy:  0.9985943121895585
Training F1:  0.990758756081357


2499it [42:04,  1.01s/it]

Training loss:  0.002098030738416128
Training accuracy:  0.9981275982421097
Training F1:  0.9902054985445483


2549it [42:55,  1.01s/it]

Training loss:  0.0017871150749124353
Training accuracy:  0.997690441253514
Training F1:  0.9876398916241206


2599it [43:45,  1.01s/it]

Training loss:  0.0021024866958032364
Training accuracy:  0.9984543189571415
Training F1:  0.9872072246880848


2649it [44:36,  1.01s/it]

Training loss:  0.002223971162457019
Training accuracy:  0.9988683553880136
Training F1:  0.9888996005049137


2699it [45:26,  1.01s/it]

Training loss:  0.0020005394850159063
Training accuracy:  0.9972287035599926
Training F1:  0.9818212633349032


2749it [46:17,  1.02s/it]

Training loss:  0.002996393121429719
Training accuracy:  0.9978406085932514
Training F1:  0.9840363093395845


2799it [47:07,  1.01s/it]

Training loss:  0.0030017132597276943
Training accuracy:  0.9965442181223664
Training F1:  0.9757955821781377


2849it [47:58,  1.02s/it]

Training loss:  0.0010727626021252946
Training accuracy:  0.999307308269629
Training F1:  0.9908634270029714


2899it [48:49,  1.01s/it]

Training loss:  0.0012406721607840154
Training accuracy:  0.9988703699201659
Training F1:  0.9936040360925427


2949it [49:39,  1.01s/it]

Training loss:  0.0017173331754747779
Training accuracy:  0.9979485719783437
Training F1:  0.9837852662242906


2999it [50:30,  1.01s/it]

Training loss:  0.0025017880933592097
Training accuracy:  0.9988749687090775
Training F1:  0.9931453280729383


3049it [51:20,  1.01s/it]

Training loss:  0.005198499243124388
Training accuracy:  0.9973662135471912
Training F1:  0.9881920291511903


3099it [52:11,  1.01s/it]

Training loss:  0.0036646491230931133
Training accuracy:  0.9968157335436767
Training F1:  0.9834236579107888


3149it [53:02,  1.01s/it]

Training loss:  0.002245723991654813
Training accuracy:  0.9975161378770313
Training F1:  0.977703474085828


3199it [53:52,  1.01s/it]

Training loss:  0.0013973499440180603
Training accuracy:  0.998604758470147
Training F1:  0.9923316422672542


3249it [54:43,  1.02s/it]

Training loss:  0.0016466768804821186
Training accuracy:  0.9992807018519371
Training F1:  0.99517261749145


3299it [55:34,  1.01s/it]

Training loss:  0.0018404535559238865
Training accuracy:  0.998106801100315
Training F1:  0.9801988773163478


3349it [56:24,  1.02s/it]

Training loss:  0.0009067030888400041
Training accuracy:  0.9995317446972994
Training F1:  0.9961770100935752


3399it [57:15,  1.02s/it]

Training loss:  0.009782740254886449
Training accuracy:  0.9920833397028974
Training F1:  0.9463299938009606


3449it [58:06,  1.01s/it]

Training loss:  0.003278917629504576
Training accuracy:  0.9966255358236521
Training F1:  0.9802674009385594


3499it [58:56,  1.01s/it]

Training loss:  0.00261096729489509
Training accuracy:  0.9973925637521448
Training F1:  0.9863725428462051


3549it [59:47,  1.01s/it]

Training loss:  0.0022369440164766276
Training accuracy:  0.9978938532930828
Training F1:  0.9868224034923633


3599it [1:00:38,  1.01s/it]

Training loss:  0.0017181350031751207
Training accuracy:  0.9988047683479493
Training F1:  0.988881725215127


3649it [1:01:28,  1.01s/it]

Training loss:  0.0027727286165463738
Training accuracy:  0.9977795339511398
Training F1:  0.9863891905571383


3697it [1:02:17,  1.01s/it]

Training loss epoch: 0.002479028777770509





NameError: name 'acc_sum' is not defined

In [18]:
torch.save(model.state_dict(), '/content/drive/MyDrive/model_xlm_9864.pickle')

In [19]:
def val():
    tr_accuracy = []
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_loss = []
    model.eval()
    acc = []
    f1 = []

    for idx, batch in tqdm(enumerate(val_loader)):

        ids = batch['ids'].to(device, dtype = torch.long)
        mask = batch['mask'].to(device, dtype = torch.long)
        targets = batch['targets'].to(device, dtype = torch.long)


        outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
        loss, tr_logits = outputs.loss, outputs.logits
        tr_loss.append(loss.item())

        nb_tr_steps += 1

        flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
        active_logits = tr_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        # now, use mask to determine where we should compare predictions with targets
        active_accuracy = mask.view(-1) == 1 # active accuracy is also of shape (batch_size * seq_len,)
        targets = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        acc.append(accuracy_score(targets.to('cpu').numpy(), predictions.to('cpu').numpy()))
        f1.append(f1_score(targets.to('cpu').numpy(), predictions.to('cpu').numpy(), average="macro"))

        if len(tr_loss) % 25 == 0:
            print("Val loss: ", sum(tr_loss[-25:]) / len(tr_loss[-25:]))
            print("Val accuracy: ", sum(acc[-25:]) / len(acc[-25:]))
            print("Val F1: ", sum(f1[-25:]) / len(f1[-25:]))

    epoch_loss = sum(tr_loss[-32:]) / len(tr_loss[-32:])
    print(f"Val loss epoch: {epoch_loss}")

In [21]:
val()

25it [00:07,  3.17it/s]

Val loss:  0.001369015403688536
Val accuracy:  0.9988904325579459
Val F1:  0.9962766109320836


50it [00:15,  3.21it/s]

Val loss:  0.0012710119283292443
Val accuracy:  0.9992749782370385
Val F1:  0.9978500800021165


75it [00:23,  3.19it/s]

Val loss:  0.0005652729573193938
Val accuracy:  0.9996174571377712
Val F1:  0.9986413727966658


100it [00:31,  3.18it/s]

Val loss:  0.0011441543184628243
Val accuracy:  0.9987759909603185
Val F1:  0.9958631084504891


125it [00:39,  3.17it/s]

Val loss:  0.001637979587976588
Val accuracy:  0.9990430764225359
Val F1:  0.9969553613061157


150it [00:47,  3.12it/s]

Val loss:  0.0018254056145815413
Val accuracy:  0.9986640725368516
Val F1:  0.9941291545274648


175it [00:55,  3.08it/s]

Val loss:  0.00098677971662255
Val accuracy:  0.9986666637860435
Val F1:  0.9958753517426331


200it [01:03,  3.08it/s]

Val loss:  0.0015896757980226538
Val accuracy:  0.9985274189885124
Val F1:  0.9960471471099326


225it [01:11,  3.05it/s]

Val loss:  0.00054762714804383
Val accuracy:  0.9994038987599447
Val F1:  0.997908527918548


250it [01:19,  3.06it/s]

Val loss:  0.002790802308009006
Val accuracy:  0.997781610290835
Val F1:  0.9926864577540107


275it [01:27,  3.01it/s]

Val loss:  0.0019167024281341583
Val accuracy:  0.9975221432271328
Val F1:  0.9934096346877473


300it [01:36,  2.96it/s]

Val loss:  0.001383317117288243
Val accuracy:  0.9985761086968706
Val F1:  0.9942394032410098


325it [01:44,  3.03it/s]

Val loss:  0.0015269340229860972
Val accuracy:  0.9986110968901872
Val F1:  0.9963258330655593


350it [01:52,  3.02it/s]

Val loss:  0.001337735232518753
Val accuracy:  0.9987595540286068
Val F1:  0.9920049129739357


375it [02:01,  3.03it/s]

Val loss:  0.0019095307230600155
Val accuracy:  0.9984062259370537
Val F1:  0.9942292059629401


400it [02:09,  3.05it/s]

Val loss:  0.0007396990159759298
Val accuracy:  0.9995153511046886
Val F1:  0.9973754699435611


425it [02:17,  3.05it/s]

Val loss:  0.0010617494060716126
Val accuracy:  0.9992949376352753
Val F1:  0.9968898665770928


450it [02:25,  3.04it/s]

Val loss:  0.0007523769281397108
Val accuracy:  0.9995466934809408
Val F1:  0.9979601422787241


475it [02:34,  3.01it/s]

Val loss:  0.0007915968450834043
Val accuracy:  0.9993191634989198
Val F1:  0.996541141465107


500it [02:42,  3.03it/s]

Val loss:  0.0023340504862426315
Val accuracy:  0.9985375669039565
Val F1:  0.9962440220291249


525it [02:50,  3.04it/s]

Val loss:  0.0019616673050040843
Val accuracy:  0.998047794183875
Val F1:  0.9914699344362041


550it [02:58,  3.03it/s]

Val loss:  0.002388266886409838
Val accuracy:  0.9989518337118269
Val F1:  0.9959869481094246


575it [03:07,  3.04it/s]

Val loss:  0.0012527088323258794
Val accuracy:  0.9981894152006467
Val F1:  0.9938583918657663


600it [03:15,  3.01it/s]

Val loss:  0.0011953983770217746
Val accuracy:  0.9989473108889254
Val F1:  0.9972867042434561


625it [03:23,  3.02it/s]

Val loss:  0.001847273627645336
Val accuracy:  0.9986666944490977
Val F1:  0.9943946998112938


650it [03:31,  3.02it/s]

Val loss:  0.0028519210674858185
Val accuracy:  0.997648057750547
Val F1:  0.9918353351422892


675it [03:40,  3.05it/s]

Val loss:  0.0004089089248736855
Val accuracy:  0.9995961614143433
Val F1:  0.999134532911448


700it [03:48,  3.04it/s]

Val loss:  0.0007352592104871292
Val accuracy:  0.999687866241372
Val F1:  0.9991777923782547


725it [03:56,  3.00it/s]

Val loss:  0.0009346883127000183
Val accuracy:  0.9988789155081971
Val F1:  0.9964430615324376


750it [04:05,  3.03it/s]

Val loss:  0.0016973443247843534
Val accuracy:  0.9986420188055543
Val F1:  0.9932238685749817


775it [04:13,  3.03it/s]

Val loss:  0.0017497982460190542
Val accuracy:  0.9981343263772038
Val F1:  0.9954288066684822


800it [04:21,  3.04it/s]

Val loss:  0.0020526319784403314
Val accuracy:  0.9974815626674892
Val F1:  0.9919660739832458


825it [04:29,  3.01it/s]

Val loss:  0.0013204361716634594
Val accuracy:  0.9989807292129308
Val F1:  0.9952313825271965


850it [04:38,  3.02it/s]

Val loss:  0.0014990641677286476
Val accuracy:  0.9986609164109018
Val F1:  0.9943350968324325


875it [04:46,  3.04it/s]

Val loss:  0.0011767973421956412
Val accuracy:  0.9989495057064747
Val F1:  0.9940608888407167


900it [04:54,  3.00it/s]

Val loss:  0.0018258472440356853
Val accuracy:  0.9989773648557967
Val F1:  0.9949873070886849


925it [05:02,  3.03it/s]

Val loss:  0.0014831781637622042
Val accuracy:  0.999237294572785
Val F1:  0.9973428580176267


950it [05:11,  3.02it/s]

Val loss:  0.0011672288803674746
Val accuracy:  0.9988817454989973
Val F1:  0.995676968397969


975it [05:19,  3.02it/s]

Val loss:  0.0016462452488485723
Val accuracy:  0.9968210781625654
Val F1:  0.9926683283619369


1000it [05:27,  3.04it/s]

Val loss:  0.001365056159265805
Val accuracy:  0.9981464158707554
Val F1:  0.9915308401714937


1025it [05:36,  3.00it/s]

Val loss:  0.0017031845623569097
Val accuracy:  0.9993378804551838
Val F1:  0.9975793314292645


1050it [05:44,  3.04it/s]

Val loss:  0.0007519905114895664
Val accuracy:  0.9993619996674343
Val F1:  0.997399504024759


1075it [05:52,  3.02it/s]

Val loss:  0.002200929993268801
Val accuracy:  0.9983048883025636
Val F1:  0.9941054858498312


1100it [06:00,  2.99it/s]

Val loss:  0.0005615275561285671
Val accuracy:  0.9996001501678182
Val F1:  0.9972561153235171


1125it [06:09,  3.02it/s]

Val loss:  0.002284587851027027
Val accuracy:  0.9985926551965689
Val F1:  0.9943722500744019


1150it [06:17,  3.01it/s]

Val loss:  0.001281121832143981
Val accuracy:  0.9992476966139754
Val F1:  0.9970373863871622


1175it [06:25,  3.01it/s]

Val loss:  0.0024910696054575967
Val accuracy:  0.9965255249651526
Val F1:  0.9867794134135832


1200it [06:34,  3.03it/s]

Val loss:  0.002764019733440364
Val accuracy:  0.9975719952187471
Val F1:  0.9900791735328227


1225it [06:42,  3.00it/s]

Val loss:  0.002939423503703438
Val accuracy:  0.9980379749185434
Val F1:  0.9934042881355919


1250it [06:50,  3.03it/s]

Val loss:  0.0014169453442445957
Val accuracy:  0.9992728407820205
Val F1:  0.997661054089926


1275it [06:58,  3.03it/s]

Val loss:  0.001532842439773958
Val accuracy:  0.9985712899774997
Val F1:  0.9954696555635971


1300it [07:07,  3.01it/s]

Val loss:  0.002365685153490631
Val accuracy:  0.9982337838240274
Val F1:  0.9930963248989382


1325it [07:15,  3.02it/s]

Val loss:  0.0016335834647179582
Val accuracy:  0.999244163350721
Val F1:  0.9962093356468589


1350it [07:23,  3.01it/s]

Val loss:  0.0017256932771124412
Val accuracy:  0.9983912257864415
Val F1:  0.9940221060941116


1375it [07:31,  3.02it/s]

Val loss:  0.0022727877844590694
Val accuracy:  0.9983735302099828
Val F1:  0.9949972042971492


1400it [07:40,  3.04it/s]

Val loss:  0.0015288333575153956
Val accuracy:  0.998926892416176
Val F1:  0.9963544302164603


1425it [07:48,  3.02it/s]

Val loss:  0.0013824776969704545
Val accuracy:  0.9979005520939103
Val F1:  0.9940701137527991


1450it [07:56,  3.03it/s]

Val loss:  0.002207404871151084
Val accuracy:  0.9986108569582199
Val F1:  0.9962702853835862


1475it [08:04,  3.02it/s]

Val loss:  0.0011341351504961495
Val accuracy:  0.999227539634858
Val F1:  0.9979445246583364


1500it [08:13,  3.01it/s]

Val loss:  0.001961194614414126
Val accuracy:  0.9982650960975179
Val F1:  0.9932800366933715


1525it [08:21,  3.01it/s]

Val loss:  0.0015902023165835998
Val accuracy:  0.9973410689172947
Val F1:  0.9918713839134585


1550it [08:29,  3.04it/s]

Val loss:  0.0015839362939004786
Val accuracy:  0.9993699651087361
Val F1:  0.9971142282432217


1575it [08:37,  3.02it/s]

Val loss:  0.001382182610250311
Val accuracy:  0.9983589040811826
Val F1:  0.9940853994693647


1585it [08:41,  3.04it/s]

Val loss epoch: 0.0011662682424002924





In [22]:
for idx, batch in tqdm(enumerate(train_loader)):
    if idx==0:

        ids = batch['ids'].to(device, dtype = torch.long)
        mask = batch['mask'].to(device, dtype = torch.long)
        targets = batch['targets'].to(device, dtype = torch.long)

        outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
        loss, tr_logits = outputs.loss, outputs.logits

        flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
        active_logits = tr_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
    else: break

1it [00:00, 23.26it/s]


In [23]:
tr_logits[0][:100]

tensor([[ 3.4924, -4.1411],
        [-3.5146,  4.0253],
        [-3.6593,  4.0609],
        [ 3.6932, -5.0133],
        [ 4.3382, -5.2632],
        [ 4.7030, -5.1718],
        [ 3.6055, -4.3241],
        [ 4.3836, -4.9923],
        [ 1.4666, -2.1907],
        [-3.6613,  4.5043],
        [-2.8929,  4.0115],
        [ 3.8535, -4.8592],
        [ 4.5035, -5.1777],
        [ 4.3001, -5.1229],
        [ 4.6853, -5.3138],
        [ 3.9623, -4.3422],
        [ 3.9753, -4.3517],
        [ 3.9952, -4.7968],
        [-3.5200,  4.3695],
        [-3.1462,  4.0493],
        [ 3.7984, -5.2290],
        [ 4.3840, -5.1624],
        [ 5.1483, -5.8744],
        [ 4.8928, -5.6284],
        [ 5.1260, -5.6830],
        [ 4.8706, -5.8235],
        [ 5.0296, -5.7687],
        [ 4.9136, -5.5723],
        [ 4.9862, -5.9334],
        [ 3.9249, -4.6148],
        [ 1.4660, -2.1913],
        [ 4.8604, -5.7044],
        [ 4.5439, -5.3385],
        [ 5.3385, -5.9935],
        [ 4.8612, -5.7313],
        [ 5.0252, -6

In [24]:
flattened_targets[:100]

tensor([0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0], device='cuda:0')

In [25]:
targets[0][:100]

tensor([0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0], device='cuda:0')

In [27]:
for i in range(0, 512):
  if targets[0][i] == 1:
      m = tokenizer.decode(ids[0][i])
      print(m)

MT
SS
М
ТС
М
ТС
М
ТС


In [28]:
for i in range(0, 512):
  if flattened_predictions[i] == 1:
      m = tokenizer.decode(ids[0][i])
      print(m)

MT
SS
М
ТС
М
ТС
М
ТС
