In [1]:
import argparse
import glob
import logging
import os
import random

import numpy as np
import torch
from seqeval.metrics import precision_score, recall_score, f1_score
from tensorboardX import SummaryWriter
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange

from transformers import AdamW, WarmupLinearSchedule
from transformers import WEIGHTS_NAME, BertConfig, BertForTokenClassification, BertTokenizer
import pandas as pd

import logging
import os
from io import open

logger = logging.getLogger(__name__)

I1104 15:55:43.196853 140440240211712 file_utils.py:39] PyTorch version 1.2.0 available.
I1104 15:55:43.426519 140440240211712 modeling_xlnet.py:194] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .


In [2]:
MODEL_CLASSES = {
    "bert": (BertConfig, BertForTokenClassification, BertTokenizer),
}

class InputExample(object):
    """A single training/test example for token classification."""

    def __init__(self, guid, words, labels):
        """Constructs a InputExample.
        Args:
            guid: Unique id for the example.
            words: list. The words of the sequence.
            labels: (Optional) list. The labels for each word of the sequence. This should be
            specified for train and dev examples, but not for test examples.
        """
        self.guid = guid
        self.words = words
        self.labels = labels


class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self, input_ids, input_mask, segment_ids, label_ids):
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_ids = label_ids

class SentenceGetter(object):
    
    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w, t) for w, t in zip(s["text"].values.tolist(),
                                                           s["tag"].values.tolist())]
        self.grouped = self.data.groupby("sentence").apply(agg_func)
        self.sentences = [s for s in self.grouped]
    
    def get_next(self):
        try:
            s = self.grouped["Sentence: {}".format(self.n_sent)]
            self.n_sent += 1
            return s
        except:
            return None

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [3]:

device = "cuda"
n_gpu = 0

# Set seed
set_seed(42)

data = pd.read_csv("../train.txt", sep='\t', encoding="latin1").fillna(method="ffill")


In [4]:
getter = SentenceGetter(data)

getter.sentences[:2]
#Sacarle las oraciones que empiezan con "<", el autor, abstract, etc

[[('<', 'O'),
  ('?', 'O'),
  ('xml', 'O'),
  ('version=', 'O'),
  ('\t_\t_\tO\t0\n5\t1.0\t_\t_\tO\t0\n6\t', 'O'),
  ('encoding=', 'O'),
  ('\t_\t_\tO\t0\n9\tUTF-8\t_\t_\tO\t0\n10\t', 'O'),
  ('standalone=', 'O'),
  ('\t_\t_\tO\t0\n13\tno\t_\t_\tO\t0\n14\t', 'O'),
  ('?', 'O'),
  ('>', 'O')],
 [('<', 'O'),
  ('Document', 'O'),
  ('xmlns', 'O'),
  (':', 'O'),
  ('gate=', 'O'),
  ('\t_\t_\tO\t1\n23\thttp\t_\t_\tO\t1\n24\t:\t_\t_\tO\t1\n25\t//www.gate.ac.uk\t_\t_\tO\t1\n26\t',
   'O'),
  ('name=', 'O'),
  ('\t_\t_\tO\t1\n29\tA15_M06_Interactive_Motion_Generation_from_Examples_CITATION_PURPOSE_M_v1.xml\t_\t_\tO\t1\n30\t',
   'O'),
  ('>', 'O')]]

In [5]:
sentences = [[s[0] for s in sent] for sent in getter.sentences]
labels = [[s[1] for s in sent] for sent in getter.sentences]

num_labels = len(labels)

In [6]:
examples = [InputExample(guid, words, labels) for guid, (words, labels) in enumerate(zip(sentences, labels))]
print(examples[15].words)
print(examples[15].labels)

['They', 'are', 'generated', 'in', 'real', 'time', 'so', 'that', 'we', 'can', 'author', 'complex', 'motions', 'interactively', '.']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']


In [7]:
with torch.cuda.device(n_gpu):
    print(torch.cuda.current_device())

0


In [8]:
with torch.cuda.device(n_gpu):
    # Use cross entropy ignore index as padding label id so that only real label ids contribute to the loss later
    pad_token_label_id = CrossEntropyLoss().ignore_index

    model_type = "bert"
    config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]

    bert_type = "bert-large-uncased"
    config = config_class.from_pretrained(bert_type,
                                              num_labels=num_labels)
    tokenizer = tokenizer_class.from_pretrained(bert_type,
                                                    do_lower_case=True)
    model = model_class.from_pretrained(bert_type, from_tf=False,
                                            config=config)
    model.to(device)

    'done'

I1104 15:55:56.999062 140440240211712 configuration_utils.py:151] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-config.json from cache at /home/dfurman/.cache/torch/transformers/6dfaed860471b03ab5b9acb6153bea82b6632fb9bbe514d3fff050fe1319ee6d.4c88e2dec8f8b017f319f6db2b157fee632c0860d9422e4851bd0d6999f9ce38
I1104 15:55:57.001684 140440240211712 configuration_utils.py:168] Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_labels": 16611,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pruned_heads": {},
  "torchscript": false,
  "type_vocab_size": 2,
  "use_bfloat16": false,
  "vocab_size": 30522
}

I1104 15:55:

In [9]:
def train(train_dataset, model, tokenizer, labels, pad_token_label_id):
    """ Train the model """
    
    train_batch_size = 4
    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=train_batch_size)

    num_train_epochs = 5.0
    t_total = len(train_dataloader) // num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {"params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
         "weight_decay": 0.1},
        {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=5e-5, eps=1e-8)
    scheduler = WarmupLinearSchedule(optimizer, warmup_steps=0, t_total=t_total)

    # Train!
#     print("***** Running training *****")
#     print("  Num examples = %d", len(train_dataset))
#     print("  Num Epochs = %d", num_train_epochs)
#     print("  Total optimization steps = %d", t_total)

    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(int(num_train_epochs), desc="Epoch", disable=False)
    set_seed(42)  # Added here for reproductibility (even between python 2 and 3)
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=False)
        for step, batch in enumerate(epoch_iterator):
            model.train()
            batch = tuple(t.to(device) for t in batch)
            inputs = {"input_ids": batch[0],
                        "attention_mask": batch[1],
                        "token_type_ids": batch[2],
                        # XLM and RoBERTa don"t use segment_ids
                        "labels": batch[3]
            }
            outputs = model(**inputs)
            loss = outputs[0]  # model outputs are always tuple in pytorch-transformers (see doc)
            loss.backwards()
            scheduler.step()  # Update learning rate schedule
            optimizer.step()
            model.zero_grad()
            global_step += 1

    return global_step, tr_loss / global_step

In [7]:
# def load_and_cache_examples(tokenizer, labels, pad_token_label_id, mode):

#     # Load data features from cache or dataset file
#     cached_features_file = "cached_{}_{}_{}".format(mode, "bert-large-cased", 300)

#     logger.info("Creating features from dataset file")
#     features = convert_examples_to_features(examples, labels, 300, tokenizer,
#                                                 cls_token_at_end=False,
#                                                 # xlnet has a cls token at the end
#                                                 cls_token=tokenizer.cls_token,
#                                                 cls_token_segment_id=0,
#                                                 sep_token=tokenizer.sep_token,
#                                                 sep_token_extra=False,
#                                                 # roberta uses an extra separator b/w pairs of sentences, cf. github.com/pytorch/fairseq/commit/1684e166e3da03f5b600dbb7855cb98ddfcd0805
#                                                 pad_on_left=False,
#                                                 # pad on the left for xlnet
#                                                 pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
#                                                 pad_token_segment_id=0,
#                                                 pad_token_label_id=pad_token_label_id
#                                                 )
#     torch.save(features, cached_features_file)


#     # Convert to Tensors and build dataset
#     all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
#     all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
#     all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
#     all_label_ids = torch.tensor([f.label_ids for f in features], dtype=torch.long)

#     dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
#     return dataset

In [8]:
# def read_examples_from_file(data_dir, mode):
#     file_path = "../{}.txt".format(mode)
#     guid_index = 1
#     examples = []
#     with open(file_path, encoding="utf-8") as f:
#         words = []
#         labels = []
#         for line in f:
#             if line.startswith("-DOCSTART-") or line == "" or line == "\n":
#                 if words:
#                     examples.append(InputExample(guid="{}-{}".format(mode, guid_index),
#                                                  words=words,
#                                                  labels=labels))
#                     guid_index += 1
#                     words = []
#                     labels = []
#             else:
#                 splits = line.split(" ")
#                 words.append(splits[0])
#                 if len(splits) > 1:
#                     labels.append(splits[-1].replace("\n", ""))
#                 else:
#                     # Examples could have no label for mode = "test"
#                     labels.append("O")
#         if words:
#             examples.append(InputExample(guid="%s-%d".format(mode, guid_index),
#                                          words=words,
#                                          labels=labels))
#     return examples

In [10]:
def convert_examples_to_features(examples,
                                 label_list,
                                 max_seq_length,
                                 tokenizer,
                                 cls_token_at_end=False,
                                 cls_token="[CLS]",
                                 cls_token_segment_id=1,
                                 sep_token="[SEP]",
                                 sep_token_extra=False,
                                 pad_on_left=False,
                                 pad_token=0,
                                 pad_token_segment_id=0,
                                 pad_token_label_id=-1,
                                 sequence_a_segment_id=0,
                                 mask_padding_with_zero=True):
    """ Loads a data file into a list of `InputBatch`s
        `cls_token_at_end` define the location of the CLS token:
            - False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
            - True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS]
        `cls_token_segment_id` define the segment id associated to the CLS token (0 for BERT, 2 for XLNet)
    """

    label_map = {label: i for i, label in enumerate(label_list)}

    features = []
    for (ex_index, example) in enumerate(examples):
#         if ex_index % 10000 == 0:
#             print("Writing example {} of {}".format(ex_index, len(examples)))
#             print("E.g: {}".format(example.words))

        tokens = []
        label_ids = []
        for word, label in zip(example.words, example.labels):
            word_tokens = tokenizer.tokenize(word)
            tokens.extend(word_tokens)
            # Use the real label id for the first token of the word, and padding ids for the remaining tokens
            label_ids.extend([label_map[label]] + [pad_token_label_id] * (len(word_tokens) - 1))

        # Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
        special_tokens_count = 3 if sep_token_extra else 2
        if len(tokens) > max_seq_length - special_tokens_count:
            tokens = tokens[:(max_seq_length - special_tokens_count)]
            label_ids = label_ids[:(max_seq_length - special_tokens_count)]

        # The convention in BERT is:
        # (a) For sequence pairs:
        #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
        #  type_ids:   0   0  0    0    0     0       0   0   1  1  1  1   1   1
        # (b) For single sequences:
        #  tokens:   [CLS] the dog is hairy . [SEP]
        #  type_ids:   0   0   0   0  0     0   0
        #
        # Where "type_ids" are used to indicate whether this is the first
        # sequence or the second sequence. The embedding vectors for `type=0` and
        # `type=1` were learned during pre-training and are added to the wordpiece
        # embedding vector (and position vector). This is not *strictly* necessary
        # since the [SEP] token unambiguously separates the sequences, but it makes
        # it easier for the model to learn the concept of sequences.
        #
        # For classification tasks, the first vector (corresponding to [CLS]) is
        # used as as the "sentence vector". Note that this only makes sense because
        # the entire model is fine-tuned.
        tokens += [sep_token]
        label_ids += [pad_token_label_id]
        if sep_token_extra:
            # roberta uses an extra separator b/w pairs of sentences
            tokens += [sep_token]
            label_ids += [pad_token_label_id]
        segment_ids = [sequence_a_segment_id] * len(tokens)

        if cls_token_at_end:
            tokens += [cls_token]
            label_ids += [pad_token_label_id]
            segment_ids += [cls_token_segment_id]
        else:
            tokens = [cls_token] + tokens
            label_ids = [pad_token_label_id] + label_ids
            segment_ids = [cls_token_segment_id] + segment_ids

        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.
        input_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

        # Zero-pad up to the sequence length.
        padding_length = max_seq_length - len(input_ids)
        if pad_on_left:
            input_ids = ([pad_token] * padding_length) + input_ids
            input_mask = ([0 if mask_padding_with_zero else 1] * padding_length) + input_mask
            segment_ids = ([pad_token_segment_id] * padding_length) + segment_ids
            label_ids = ([pad_token_label_id] * padding_length) + label_ids
        else:
            input_ids += ([pad_token] * padding_length)
            input_mask += ([0 if mask_padding_with_zero else 1] * padding_length)
            segment_ids += ([pad_token_segment_id] * padding_length)
            label_ids += ([pad_token_label_id] * padding_length)

        assert len(input_ids) == max_seq_length
        assert len(input_mask) == max_seq_length
        assert len(segment_ids) == max_seq_length
        assert len(label_ids) == max_seq_length

#         if ex_index > 10 and ex_index < 12:
#             print("*** Example ***")
#             print("guid: %s", example.guid)
#             print("tokens: %s", " ".join([str(x) for x in tokens]))
#             print("input_ids: %s", " ".join([str(x) for x in input_ids]))
#             print("input_mask: %s", " ".join([str(x) for x in input_mask]))
#             print("segment_ids: %s", " ".join([str(x) for x in segment_ids]))
#             print("label_ids: %s", " ".join([str(x) for x in label_ids]))

        features.append(
                InputFeatures(input_ids=input_ids,
                              input_mask=input_mask,
                              segment_ids=segment_ids,
                              label_ids=label_ids))
    return features

In [11]:
with torch.cuda.device(n_gpu):
    possible_labels = ["O", "B-claim", "I-claim"]
    features = convert_examples_to_features(examples, possible_labels, 50, tokenizer,
                                                    cls_token_at_end=False,
                                                    # xlnet has a cls token at the end
                                                    cls_token=tokenizer.cls_token,
                                                    cls_token_segment_id=0,
                                                    sep_token=tokenizer.sep_token,
                                                    sep_token_extra=False,
                                                    # roberta uses an extra separator b/w pairs of sentences, cf. github.com/pytorch/fairseq/commit/1684e166e3da03f5b600dbb7855cb98ddfcd0805
                                                    pad_on_left=False,
                                                    # pad on the left for xlnet
                                                    pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
                                                    pad_token_segment_id=0,
                                                    pad_token_label_id=pad_token_label_id
                                                    )


        # Convert to Tensors and build dataset
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
    all_label_ids = torch.tensor([f.label_ids for f in features], dtype=torch.long)



    train_dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)

In [12]:
#train_dataset = load_and_cache_examples(tokenizer, labels, pad_token_label_id, mode="train")
with torch.cuda.device(n_gpu):
    global_step, tr_loss = train(train_dataset, model, tokenizer, labels, pad_token_label_id)
    print(" global_step = %s, average loss = %s", global_step, tr_loss)

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]
Iteration:   0%|          | 0/4153 [00:00<?, ?it/s][A

AttributeError: 'Tensor' object has no attribute 'backwards'

In [14]:
tokenizer

<transformers.tokenization_bert.BertTokenizer at 0x7f30ae2615f8>

In [76]:
# checkpoints = ["test_BERT_eval_during_train"]

In [32]:
# args = {}
# eval_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode="dev")

In [13]:
eval_sampler = SequentialSampler(eval_dataset)

In [14]:
#eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=1)

In [65]:
testdata = pd.read_csv("../test_m_short.txt", sep='\t', encoding="latin1").fillna(method="ffill")

getter = SentenceGetter(testdata)

len(getter.sentences)

36

In [66]:
sentences_test = [[s[0] for s in sent] for sent in getter.sentences]
labels_test = [[s[1] for s in sent] for sent in getter.sentences]

num_labels = len(labels_test)

examples_test = [InputExample(guid, words, labels) for guid, (words, labels) in enumerate(zip(sentences_test, labels_test))]

len(examples_test)

36

In [79]:
with torch.cuda.device(1):
    device = "cuda"
    possible_labels = ["O", "B-claim", "I-claim"]
    features_test = convert_examples_to_features(examples_test, possible_labels, 50, tokenizer,
                                                    cls_token_at_end=False,
                                                    # xlnet has a cls token at the end
                                                    cls_token=tokenizer.cls_token,
                                                    cls_token_segment_id=0,
                                                    sep_token=tokenizer.sep_token,
                                                    sep_token_extra=False,
                                                    # roberta uses an extra separator b/w pairs of sentences, cf. github.com/pytorch/fairseq/commit/1684e166e3da03f5b600dbb7855cb98ddfcd0805
                                                    pad_on_left=False,
                                                    # pad on the left for xlnet
                                                    pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
                                                    pad_token_segment_id=0,
                                                    pad_token_label_id=pad_token_label_id
                                                    )

    print(len(features_test))
        # Convert to Tensors and build dataset
    all_input_ids = torch.tensor([f.input_ids for f in features_test], dtype=torch.long).to(device)
    all_input_mask = torch.tensor([f.input_mask for f in features_test], dtype=torch.long).to(device)
    all_segment_ids = torch.tensor([f.segment_ids for f in features_test], dtype=torch.long).to(device)
    all_label_ids = torch.tensor([f.label_ids for f in features_test], dtype=torch.long).to(device)
    print(all_label_ids)

    test_dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
    test_sampler = SequentialSampler(test_dataset)
    test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=8)

*** Example ***
guid: %s 11
tokens: %s [CLS] < abstract > we introduce a new method for efficiently sim ##ulating liquid with extreme amounts of spatial adapt ##ivity . [SEP]
input_ids: %s 101 1026 10061 1028 2057 8970 1037 2047 4118 2005 18228 21934 10924 6381 2007 6034 8310 1997 13589 15581 7730 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
input_mask: %s 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
segment_ids: %s 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
label_ids: %s -100 0 0 0 0 0 0 0 0 0 0 0 -100 0 0 0 0 0 0 0 -100 0 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100
36
tensor([[-100,    0,    0,  ..., -100,    0, -100],
        [-100,    0,    0,  ..., -100, -100, -100],
        [-100,    0, -100,  ..., -100, -100, -100],
        ...,
        [-100,    1,    2,  

In [90]:
import sys

eval_loss = 0.0
nb_eval_steps = 0
preds = None
out_label_ids = None
model.eval()
device = "cuda"
with torch.cuda.device(1):
    for batch in tqdm(test_dataloader, desc="Evaluating"):
        batch = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            b_input_ids, b_input_mask, b_token_type_ids, b_labels = batch
#             inputs = {"input_ids": batch[0],
#                       "attention_mask": batch[1],
#                       "token_type_ids": batch[2],
#                       # XLM and RoBERTa don"t use segment_ids
#                       "labels": batch[3]}
            loss = model(b_input_ids, attention_mask=b_input_mask, token_type_ids=b_token_type_ids, labels=b_labels)
            logits = model(b_input_ids, attention_mask=b_input_mask, token_type_ids=b_token_type_ids)
            print(logits)
#         tmp_eval_loss, logits = outputs[:2]
            
            
            eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        if preds is None:
            preds = logits[0].detach().cpu().numpy()
            out_label_ids = b_labels.detach().cpu().numpy()
            print("PREDS")
            print(np.argmax(preds, axis=2))
            print("VERDADERAS")
            print(inputs['labels'])
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)
#         logits = logits.detach().cpu().numpy()
#         #Capaz esto es voraz
#         preds.extend([list(p) for p in np.argmax(logits, axis=2)])
#         out_label_ids.append(inputs["labels"])










Evaluating:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A






Evaluating:  20%|██        | 1/5 [00:00<00:00,  6.59it/s][A[A[A[A[A[A[A

(tensor([[[-0.0312,  0.3613, -0.4594,  ...,  0.4336,  0.4693,  0.0137],
         [ 0.0873, -0.0297,  0.3326,  ...,  0.1856, -0.3763,  0.2610],
         [ 0.1595, -0.1624,  0.1918,  ...,  0.1285,  0.2487, -0.0732],
         ...,
         [ 0.1833, -0.0594, -0.0156,  ...,  0.0214, -0.2107, -0.0089],
         [ 0.5880, -0.1975,  0.0153,  ..., -0.3094,  0.3273, -0.0809],
         [-0.0802,  0.1822, -0.0763,  ..., -0.2808,  0.5590, -0.1955]],

        [[ 0.1007,  0.4859, -0.4418,  ...,  0.2053,  0.3504,  0.1614],
         [ 0.0248,  0.1275,  0.3337,  ...,  0.1054, -0.2797,  0.3138],
         [ 0.2574,  0.0589,  0.0108,  ..., -0.0288, -0.2933,  0.0314],
         ...,
         [ 0.3397, -0.0125, -0.0254,  ...,  0.0068, -0.0972, -0.2804],
         [ 0.6195, -0.3266, -0.0448,  ..., -0.4077, -0.3333,  0.3048],
         [-0.0184,  0.5473, -0.3238,  ...,  0.0255, -0.0113, -0.2949]],

        [[ 0.1245,  0.0950, -0.3904,  ...,  0.0606, -0.0280,  0.1485],
         [-0.0889, -0.0579,  0.1897,  ...,  

AttributeError: 'tuple' object has no attribute 'detach'

In [78]:
out_label_ids.shape[1]

50

In [75]:
with torch.cuda.device(1):
    eval_loss = eval_loss / nb_eval_steps
    preds = np.argmax(preds, axis=2)

    label_map = {i: label for i, label in enumerate(labels_test)}

    out_label_list = [[] for _ in range(out_label_ids.shape[0])]
    preds_list = [[] for _ in range(out_label_ids.shape[0])]

    for i in range(out_label_ids.shape[0]):
        for j in range(out_label_ids.shape[1]):
            if out_label_ids[i, j] != pad_token_label_id:
                out_label_list[i].append(label_map[out_label_ids[i][j]])
                preds_list[i].append(label_map[preds[i][j]])

    results = {
        "loss": eval_loss,
        "precision": precision_score(out_label_list, preds_list),
        "recall": recall_score(out_label_list, preds_list),
        "f1": f1_score(out_label_list, preds_list)
    }

KeyError: 15440

In [47]:
label_map[valid_tags[0]]


KeyError: tensor(-100, device='cuda:1')