In [1]:
import copy
import csv
import json
import logging
import os
import torch
from torch.utils.data import TensorDataset
logger = logging.getLogger(__name__)

In [2]:
class InputExample(object):
    def __init__(self, guid, text_a, label):
        self.guid = guid
        self.text_a = text_a
        self.label = label
    def __repr__(self):
        return str(self.to_json_string())
    def to_dict(self):
        output = copy.deepcopy(self.__dict__)
        return output
    def to_json_string(self):
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"

In [3]:
class InputFeatures(object):
    def __init__(self, input_ids, attention_mask, token_type_ids, label_id, e1_mask, e2_mask):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.token_type_ids = token_type_ids
        self.label_id = label_id
        self.e1_mask = e1_mask
        self.e2_mask = e2_mask
    def __repr__(self):
        return str(self.to_json_string())
    def to_dict(self):
        output = copy.deepcopy(self.__dict__)
        return output
    def to_json_string(self):
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"

In [4]:
class SemEvalProcessor(object):
    def __init__(self, args):
        self.args = args
        self.relation_labels = get_label(args)
    def _read_tsv(cls, input_file, quotechar=None):
        with open(input_file, "r", encoding="utf-8") as f:
            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
            lines = []
            for line in reader:
                lines.append(line)
            return lines
    def _create_examples(self, lines, set_type):
        examples = []
        for (i, line) in enumerate(lines):
            guid = "%s-%s" % (set_type, i)
            text_a = line[1]
            label = self.relation_labels.index(line[0])
            if i % 1000 == 0:
                logger.info(line)
            examples.append(InputExample(guid=guid, text_a=text_a, label=label))
        return examples
    def get_examples(self, mode):
        file_to_read = None
        if mode == "train":
            file_to_read = self.args.train_file
        elif mode == "dev":
            file_to_read = self.args.dev_file
        elif mode == "test":
            file_to_read = self.args.test_file

        logger.info("LOOKING AT {}".format(os.path.join(self.args.data_dir, file_to_read)))
        return self._create_examples(self._read_tsv(os.path.join(self.args.data_dir, file_to_read)), mode)

In [5]:
processors = {"semeval": SemEvalProcessor}
def convert_examples_to_features(
    examples,
    max_seq_len,
    tokenizer,
    cls_token="[CLS]",
    cls_token_segment_id=0,
    sep_token="[SEP]",
    pad_token=0,
    pad_token_segment_id=0,
    sequence_a_segment_id=0,
    add_sep_token=False,
    mask_padding_with_zero=True,
):
    features = []
    for (ex_index, example) in enumerate(examples):
        if ex_index % 5000 == 0:
            logger.info("Writing example %d of %d" % (ex_index, len(examples)))
        tokens_a = tokenizer.tokenize(example.text_a)
        
        e11_p = tokens_a.index("<e1>")  # the start position of entity1
        e12_p = tokens_a.index("</e1>")  # the end position of entity1
        e21_p = tokens_a.index("<e2>")  # the start position of entity2
        e22_p = tokens_a.index("</e2>")  # the end position of entity2

        # Replace the token
        tokens_a[e11_p] = "$"
        tokens_a[e12_p] = "$"
        tokens_a[e21_p] = "#"
        tokens_a[e22_p] = "#"

        # Add 1 because of the [CLS] token
        e11_p += 1
        e12_p += 1
        e21_p += 1
        e22_p += 1

        # Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
        if add_sep_token:
            special_tokens_count = 2
        else:
            special_tokens_count = 1
        if len(tokens_a) > max_seq_len - special_tokens_count:
            tokens_a = tokens_a[: (max_seq_len - special_tokens_count)]

        tokens = tokens_a
        if add_sep_token:
            tokens += [sep_token]

        token_type_ids = [sequence_a_segment_id] * len(tokens)

        tokens = [cls_token] + tokens
        token_type_ids = [cls_token_segment_id] + token_type_ids

        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real tokens are attended to.
        attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

        # Zero-pad up to the sequence length.
        padding_length = max_seq_len - len(input_ids)
        input_ids = input_ids + ([pad_token] * padding_length)
        attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
        token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)

        # e1 mask, e2 mask
        e1_mask = [0] * len(attention_mask)
        e2_mask = [0] * len(attention_mask)

        for i in range(e11_p, e12_p + 1):
            e1_mask[i] = 1
        for i in range(e21_p, e22_p + 1):
            e2_mask[i] = 1

        assert len(input_ids) == max_seq_len, "Error with input length {} vs {}".format(len(input_ids), max_seq_len)
        assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(
            len(attention_mask), max_seq_len
        )
        assert len(token_type_ids) == max_seq_len, "Error with token type length {} vs {}".format(
            len(token_type_ids), max_seq_len
        )

        label_id = int(example.label)

        if ex_index < 5:
            logger.info("*** Example ***")
            logger.info("guid: %s" % example.guid)
            logger.info("tokens: %s" % " ".join([str(x) for x in tokens]))
            logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
            logger.info("attention_mask: %s" % " ".join([str(x) for x in attention_mask]))
            logger.info("token_type_ids: %s" % " ".join([str(x) for x in token_type_ids]))
            logger.info("label: %s (id = %d)" % (example.label, label_id))
            logger.info("e1_mask: %s" % " ".join([str(x) for x in e1_mask]))
            logger.info("e2_mask: %s" % " ".join([str(x) for x in e2_mask]))

        features.append(
            InputFeatures(
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids,
                label_id=label_id,
                e1_mask=e1_mask,
                e2_mask=e2_mask,
            )
        )

    return features

In [6]:
def load_and_cache_examples(args, tokenizer, mode):
    processor = processors[args.task](args)

    # Load data features from cache or dataset file
    cached_features_file = os.path.join(
        args.data_dir,
        "cached_{}_{}_{}_{}".format(
            mode,
            args.task,
            list(filter(None, args.model_name_or_path.split("/"))).pop(),
            args.max_seq_len,
        ),
    )

    if os.path.exists(cached_features_file):
        logger.info("Loading features from cached file %s", cached_features_file)
        features = torch.load(cached_features_file)
    else:
        logger.info("Creating features from dataset file at %s", args.data_dir)
        if mode == "train":
            examples = processor.get_examples("train")
        elif mode == "dev":
            examples = processor.get_examples("dev")
        elif mode == "test":
            examples = processor.get_examples("test")
        else:
            raise Exception("For mode, Only train, dev, test is available")

        features = convert_examples_to_features(
            examples, args.max_seq_len, tokenizer, add_sep_token=args.add_sep_token
        )
        logger.info("Saving features into cached file %s", cached_features_file)
        torch.save(features, cached_features_file)

    # Convert to Tensors and build dataset
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
    all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
    all_e1_mask = torch.tensor([f.e1_mask for f in features], dtype=torch.long)  # add e1 mask
    all_e2_mask = torch.tensor([f.e2_mask for f in features], dtype=torch.long)  # add e2 mask

    all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)

    dataset = TensorDataset(
        all_input_ids,
        all_attention_mask,
        all_token_type_ids,
        all_label_ids,
        all_e1_mask,
        all_e2_mask,
    )
    return dataset

# Model

In [7]:
import torch
import torch.nn as nn
from transformers import BertModel, BertPreTrainedModel


class FCLayer(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_rate=0.0, use_activation=True):
        super(FCLayer, self).__init__()
        self.use_activation = use_activation
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.dropout(x)
        if self.use_activation:
            x = self.tanh(x)
        return self.linear(x)

2022-04-23 21:40:29.069027: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-04-23 21:40:29.069059: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [8]:
class RBERT(BertPreTrainedModel):
    def __init__(self, config, args):
        super(RBERT, self).__init__(config)
        self.bert = BertModel(config=config)  # Load pretrained bert

        self.num_labels = config.num_labels

        self.cls_fc_layer = FCLayer(config.hidden_size, config.hidden_size, args.dropout_rate)
        self.entity_fc_layer = FCLayer(config.hidden_size, config.hidden_size, args.dropout_rate)
        self.label_classifier = FCLayer(
            config.hidden_size * 3,
            config.num_labels,
            args.dropout_rate,
            use_activation=False,
        )
    def entity_average(hidden_output, e_mask):
        """
        Average the entity hidden state vectors (H_i ~ H_j)
        :param hidden_output: [batch_size, j-i+1, dim]
        :param e_mask: [batch_size, max_seq_len]
                e.g. e_mask[0] == [0, 0, 0, 1, 1, 1, 0, 0, ... 0]
        :return: [batch_size, dim]
        """
        e_mask_unsqueeze = e_mask.unsqueeze(1)  # [b, 1, j-i+1]
        length_tensor = (e_mask != 0).sum(dim=1).unsqueeze(1)  # [batch_size, 1]

        # [b, 1, j-i+1] * [b, j-i+1, dim] = [b, 1, dim] -> [b, dim]
        sum_vector = torch.bmm(e_mask_unsqueeze.float(), hidden_output).squeeze(1)
        avg_vector = sum_vector.float() / length_tensor.float()  # broadcasting
        return avg_vector

    def forward(self, input_ids, attention_mask, token_type_ids, labels, e1_mask, e2_mask):
        outputs = self.bert(
            input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids
        )  # sequence_output, pooled_output, (hidden_states), (attentions)
        sequence_output = outputs[0]
        pooled_output = outputs[1]  # [CLS]

        # Average
        e1_h = self.entity_average(sequence_output, e1_mask)
        e2_h = self.entity_average(sequence_output, e2_mask)

        # Dropout -> tanh -> fc_layer (Share FC layer for e1 and e2)
        pooled_output = self.cls_fc_layer(pooled_output)
        e1_h = self.entity_fc_layer(e1_h)
        e2_h = self.entity_fc_layer(e2_h)

        # Concat -> fc_layer
        concat_h = torch.cat([pooled_output, e1_h, e2_h], dim=-1)
        logits = self.label_classifier(concat_h)

        outputs = (logits,) + outputs[2:]  # add hidden states and attention if they are here

        # Softmax
        if labels is not None:
            if self.num_labels == 1:
                loss_fct = nn.MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = nn.CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

            outputs = (loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)


# Official Evaluation

In [9]:
import os

EVAL_DIR = "eval"


def official_f1():
    # Run the perl script
    try:
        cmd = "perl {0}/semeval2010_task8_scorer-v1.2.pl {0}/proposed_answers.txt {0}/answer_keys.txt > {0}/result.txt".format(
            EVAL_DIR
        )
        os.system(cmd)
    except:
        raise Exception("perl is not installed or proposed_answers.txt is missing")

    with open(os.path.join(EVAL_DIR, "result.txt"), "r", encoding="utf-8") as f:
        macro_result = list(f)[-1]
        macro_result = macro_result.split(":")[1].replace(">>>", "").strip()
        macro_result = macro_result.split("=")[1].strip().replace("%", "")
        macro_result = float(macro_result) / 100

    return macro_result


In [None]:
#print("macro-averaged F1 = {}%".format(official_f1() * 100))

In [11]:
import logging
import os
import random

import numpy as np
import torch
from transformers import BertTokenizer

ADDITIONAL_SPECIAL_TOKENS = ["<e1>", "</e1>", "<e2>", "</e2>"]


def get_label(args):
    return [label.strip() for label in open(os.path.join(args.data_dir, args.label_file), "r", encoding="utf-8")]


def load_tokenizer(args):
    tokenizer = BertTokenizer.from_pretrained(args.model_name_or_path)
    tokenizer.add_special_tokens({"additional_special_tokens": ADDITIONAL_SPECIAL_TOKENS})
    return tokenizer


def write_prediction(args, output_file, preds):
    """
    For official evaluation script
    :param output_file: prediction_file_path (e.g. eval/proposed_answers.txt)
    :param preds: [0,1,0,2,18,...]
    """
    relation_labels = get_label(args)
    with open(output_file, "w", encoding="utf-8") as f:
        for idx, pred in enumerate(preds):
            f.write("{}\t{}\n".format(8001 + idx, relation_labels[pred]))


def init_logger():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )


def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if not args.no_cuda and torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)


def compute_metrics(preds, labels):
    assert len(preds) == len(labels)
    return acc_and_f1(preds, labels)


def simple_accuracy(preds, labels):
    return (preds == labels).mean()


def acc_and_f1(preds, labels, average="macro"):
    acc = simple_accuracy(preds, labels)
    return {
        "acc": acc,
        "f1": official_f1(),
    }


# Model for traning

In [12]:
import logging
import os

import numpy as np
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from tqdm import tqdm, trange
from transformers import AdamW, BertConfig, get_linear_schedule_with_warmup

from model import RBERT
from utils import compute_metrics, get_label, write_prediction

logger = logging.getLogger(__name__)


class Trainer(object):
    def __init__(self, args, train_dataset=None, dev_dataset=None, test_dataset=None):
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset

        self.label_lst = get_label(args)
        self.num_labels = len(self.label_lst)

        self.config = BertConfig.from_pretrained(
            args.model_name_or_path,
            num_labels=self.num_labels,
            finetuning_task=args.task,
            id2label={str(i): label for i, label in enumerate(self.label_lst)},
            label2id={label: i for i, label in enumerate(self.label_lst)},
        )
        self.model = RBERT.from_pretrained(args.model_name_or_path, config=self.config, args=args)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
        self.model.to(self.device)

    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(
            self.train_dataset,
            sampler=train_sampler,
            batch_size=self.args.train_batch_size,
        )

        if self.args.max_steps > 0:
            t_total = self.args.max_steps
            self.args.num_train_epochs = (
                self.args.max_steps // (len(train_dataloader) // self.args.gradient_accumulation_steps) + 1
            )
        else:
            t_total = len(train_dataloader) // self.args.gradient_accumulation_steps * self.args.num_train_epochs

        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": self.args.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=self.args.learning_rate,
            eps=self.args.adam_epsilon,
        )
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=self.args.warmup_steps,
            num_training_steps=t_total,
        )

        # Train!
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(self.train_dataset))
        logger.info("  Num Epochs = %d", self.args.num_train_epochs)
        logger.info("  Total train batch size = %d", self.args.train_batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)
        logger.info("  Logging steps = %d", self.args.logging_steps)
        logger.info("  Save steps = %d", self.args.save_steps)

        global_step = 0
        tr_loss = 0.0
        self.model.zero_grad()

        train_iterator = trange(int(self.args.num_train_epochs), desc="Epoch")

        for _ in train_iterator:
            epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(epoch_iterator):
                self.model.train()
                batch = tuple(t.to(self.device) for t in batch)  # GPU or CPU
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": batch[3],
                    "e1_mask": batch[4],
                    "e2_mask": batch[5],
                }
                outputs = self.model(**inputs)
                loss = outputs[0]

                if self.args.gradient_accumulation_steps > 1:
                    loss = loss / self.args.gradient_accumulation_steps

                loss.backward()

                tr_loss += loss.item()
                if (step + 1) % self.args.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm)

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    self.model.zero_grad()
                    global_step += 1

                    if self.args.logging_steps > 0 and global_step % self.args.logging_steps == 0:
                        self.evaluate("test")  # There is no dev set for semeval task

                    if self.args.save_steps > 0 and global_step % self.args.save_steps == 0:
                        self.save_model()

                if 0 < self.args.max_steps < global_step:
                    epoch_iterator.close()
                    break

            if 0 < self.args.max_steps < global_step:
                train_iterator.close()
                break

        return global_step, tr_loss / global_step

    def evaluate(self, mode):
        # We use test dataset because semeval doesn't have dev dataset
        if mode == "test":
            dataset = self.test_dataset
        elif mode == "dev":
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": batch[3],
                    "e1_mask": batch[4],
                    "e2_mask": batch[5],
                }
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)
        write_prediction(self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds)

        result = compute_metrics(preds, out_label_ids)
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  {} = {:.4f}".format(key, results[key]))

        return results

    def save_model(self):
        # Save model checkpoint (Overwrite)
        if not os.path.exists(self.args.model_dir):
            os.makedirs(self.args.model_dir)
        model_to_save = self.model.module if hasattr(self.model, "module") else self.model
        model_to_save.save_pretrained(self.args.model_dir)

        # Save training arguments together with the trained model
        torch.save(self.args, os.path.join(self.args.model_dir, "training_args.bin"))
        logger.info("Saving model checkpoint to %s", self.args.model_dir)

    def load_model(self):
        # Check whether model exists
        if not os.path.exists(self.args.model_dir):
            raise Exception("Model doesn't exists! Train first!")

        self.args = torch.load(os.path.join(self.args.model_dir, "training_args.bin"))
        self.model = RBERT.from_pretrained(self.args.model_dir, args=self.args)
        self.model.to(self.device)
        logger.info("***** Model Loaded *****")


# Main Function

In [13]:
import argparse
import itertools
import pandas as pd
import os

# from data_loader import load_and_cache_examples
#from trainer import Trainer
#from utils import init_logger, load_tokenizer, set_seed


def main(args):
    init_logger()
    set_seed(args)
    tokenizer = load_tokenizer(args)

    train_dataset = load_and_cache_examples(args, tokenizer, mode="train")
    test_dataset = load_and_cache_examples(args, tokenizer, mode="test")

    trainer = Trainer(args, train_dataset=train_dataset, test_dataset=test_dataset)

    if args.do_train:
        trainer.train()

    if args.do_eval:
        trainer.load_model()
        trainer.evaluate("test")

In [14]:
parser = argparse.ArgumentParser()

parser.add_argument("--task", default="semeval", type=str, help="The name of the task to train")
parser.add_argument(
    "--data_dir",
    default="./data",
    type=str,
    help="The input data dir. Should contain the .tsv files (or other data files) for the task.",
)

parser.add_argument("--model_dir", default="./model", type=str, help="Path to model")
parser.add_argument(
    "--eval_dir",
    default="./eval",
    type=str,
    help="Evaluation script, result directory",
)
parser.add_argument("--train_file", default="train.tsv", type=str, help="Train file")
parser.add_argument("--test_file", default="test.tsv", type=str, help="Test file")
parser.add_argument("--label_file", default="label.txt", type=str, help="Label file")

parser.add_argument(
    "--model_name_or_path",
    type=str,
    default="bert-base-uncased",
    help="Model Name or Path",
)

parser.add_argument("--seed", type=int, default=77, help="random seed for initialization")
parser.add_argument("--train_batch_size", default=16, type=int, help="Batch size for training.")
parser.add_argument("--eval_batch_size", default=32, type=int, help="Batch size for evaluation.")
parser.add_argument(
    "--max_seq_len",
    default=384,
    type=int,
    help="The maximum total input sequence length after tokenization.",
)
parser.add_argument(
    "--learning_rate",
    default=2e-5,
    type=float,
    help="The initial learning rate for Adam.",
)

parser.add_argument(
    "--num_train_epochs",
    default=10.0,
    type=float,
    help="Total number of training epochs to perform.",
)
parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
parser.add_argument(
    "--gradient_accumulation_steps",
    type=int,
    default=1,
    help="Number of updates steps to accumulate before performing a backward/update pass.",
)
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
parser.add_argument(
    "--max_steps",
    default=-1,
    type=int,
    help="If > 0: set total number of training steps to perform. Override num_train_epochs.",
)
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
parser.add_argument(
    "--dropout_rate",
    default=0.1,
    type=float,
    help="Dropout for fully-connected layers",
)

parser.add_argument("--logging_steps", type=int, default=250, help="Log every X updates steps.")
parser.add_argument(
    "--save_steps",
    type=int,
    default=250,
    help="Save checkpoint every X updates steps.",
)

parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the test set.")
parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")
parser.add_argument(
    "--add_sep_token",
    action="store_true",
    help="Add [SEP] token at the end of the sentence",
)


_StoreTrueAction(option_strings=['--add_sep_token'], dest='add_sep_token', nargs=0, const=True, default=False, type=None, choices=None, help='Add [SEP] token at the end of the sentence', metavar=None)

In [16]:
# remove prev data
os.remove('data/cached_test_semeval_bert-base-uncased_384') 
os.remove('eval/proposed_answers.txt') 

# Make it original Data
with open('eval/answer_keys_1.txt','r') as firstfile, open('eval/answer_keys.txt','w') as secondfile:
    # read content from first file
    cnt = 1
    for line in firstfile:
    # append content to second file
        secondfile.write(line)
        
test = pd.read_csv('data/test_1.tsv',sep='\t',names=[0,1])
test.to_csv("data/test.tsv", sep="\t",index=False,header=False)


args = parser.parse_args("--do_train --do_eval".split())
main(args)

04/23/2022 21:40:57 - INFO - __main__ -   Loading features from cached file ./data/cached_train_semeval_bert-base-uncased_384
04/23/2022 21:40:58 - INFO - __main__ -   Creating features from dataset file at ./data
04/23/2022 21:40:58 - INFO - __main__ -   LOOKING AT ./data/test.tsv
04/23/2022 21:40:58 - INFO - __main__ -   ['PLACE_OF_RESIDENCE(e1,e2)', '<e1> Vic Roschkov Sr. </e1> is a Canadian editorial cartoonist and illustrator, now living in London, Ontario, <e2> Canada </e2>.']
04/23/2022 21:40:58 - INFO - __main__ -   Writing example 0 of 915
04/23/2022 21:40:58 - INFO - __main__ -   *** Example ***
04/23/2022 21:40:58 - INFO - __main__ -   guid: test-0
04/23/2022 21:40:58 - INFO - __main__ -   tokens: [CLS] $ vic ro ##sch ##kov sr . $ is a canadian editorial cartoonist and illustrator , now living in london , ontario , # canada # .
04/23/2022 21:40:58 - INFO - __main__ -   input_ids: 101 1002 10967 20996 11624 7724 5034 1012 1002 2003 1037 3010 8368 19659 1998 13825 1010 2085 25

04/23/2022 21:40:58 - INFO - __main__ -   e2_mask: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
04/23/2022 21:40:58 - INFO - __main__ -   *** Example ***
04/23/2022 21:40:58 - INFO - __main__ -   guid: test-2
04/23/2022 21:40:58 - INFO - __main__ -   tokens: [CLS] $ philippe c

04/23/2022 21:40:58 - INFO - __main__ -   label: 3 (id = 3)
04/23/2022 21:40:58 - INFO - __main__ -   e1_mask: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
04/23/2022 21:40:58 - INFO - __main__ -   e2_mask: 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

Epoch:   0%|                                                                                                                                          | 0/10 [00:00<?, ?it/s]
Iteration:   0%|                                                                                                                                     | 0/229 [00:00<?, ?it/s][A
Iteration:   0%|▌                                                                                                                            | 1/229 [00:00<02:20,  1.62it/s][A
Iteration:   1%|█                                                                                                                            | 2/229 [00:01<02:15,  1.67it/s][A
Iteration:   1%|█▋                                                                                                                           | 3/229 [00:01<02:14,  1.68it/s][A
Iteration:   2%|██▏                                                                                                   

Iteration:  40%|█████████████████████████████████████████████████▎                                                                          | 91/229 [00:54<01:23,  1.66it/s][A
Iteration:  40%|█████████████████████████████████████████████████▊                                                                          | 92/229 [00:54<01:22,  1.66it/s][A
Iteration:  41%|██████████████████████████████████████████████████▎                                                                         | 93/229 [00:55<01:21,  1.66it/s][A
Iteration:  41%|██████████████████████████████████████████████████▉                                                                         | 94/229 [00:56<01:21,  1.66it/s][A
Iteration:  41%|███████████████████████████████████████████████████▍                                                                        | 95/229 [00:56<01:20,  1.66it/s][A
Iteration:  42%|███████████████████████████████████████████████████▉                                               

Iteration:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████▎                        | 183/229 [01:50<00:27,  1.64it/s][A
Iteration:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████▊                        | 184/229 [01:50<00:27,  1.65it/s][A
Iteration:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████▎                       | 185/229 [01:51<00:26,  1.64it/s][A
Iteration:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████▉                       | 186/229 [01:51<00:26,  1.65it/s][A
Iteration:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████▍                      | 187/229 [01:52<00:25,  1.65it/s][A
Iteration:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████

Evaluating:  72%|█████████████████████████████████████████████████████████████████████████████████████████▊                                  | 21/29 [00:08<00:03,  2.44it/s][A[A

Evaluating:  76%|██████████████████████████████████████████████████████████████████████████████████████████████                              | 22/29 [00:09<00:02,  2.45it/s][A[A

Evaluating:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████▎                         | 23/29 [00:09<00:02,  2.47it/s][A[A

Evaluating:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████▌                     | 24/29 [00:09<00:02,  2.46it/s][A[A

Evaluating:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▉                 | 25/29 [00:10<00:01,  2.47it/s][A[A

Evaluating:  90%|██████████████████████████████████████████████████████████████████████████████

Iteration:  45%|██████████████████████████████████████████████████████▊                                                                    | 102/229 [01:20<01:20,  1.58it/s][A
Iteration:  45%|███████████████████████████████████████████████████████▎                                                                   | 103/229 [01:21<01:19,  1.58it/s][A
Iteration:  45%|███████████████████████████████████████████████████████▊                                                                   | 104/229 [01:21<01:19,  1.58it/s][A
Iteration:  46%|████████████████████████████████████████████████████████▍                                                                  | 105/229 [01:22<01:18,  1.59it/s][A
Iteration:  46%|████████████████████████████████████████████████████████▉                                                                  | 106/229 [01:22<01:17,  1.59it/s][A
Iteration:  47%|█████████████████████████████████████████████████████████▍                                         

Iteration:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████▏                  | 194/229 [02:18<00:22,  1.59it/s][A
Iteration:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋                  | 195/229 [02:19<00:21,  1.59it/s][A
Iteration:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▎                 | 196/229 [02:20<00:20,  1.59it/s][A
Iteration:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊                 | 197/229 [02:20<00:20,  1.59it/s][A
Iteration:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▎                | 198/229 [02:21<00:19,  1.56it/s][A
Iteration:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████

Evaluating:  38%|███████████████████████████████████████████████                                                                             | 11/29 [00:04<00:07,  2.46it/s][A[A

Evaluating:  41%|███████████████████████████████████████████████████▎                                                                        | 12/29 [00:04<00:06,  2.45it/s][A[A

Evaluating:  45%|███████████████████████████████████████████████████████▌                                                                    | 13/29 [00:05<00:06,  2.43it/s][A[A

Evaluating:  48%|███████████████████████████████████████████████████████████▊                                                                | 14/29 [00:05<00:06,  2.40it/s][A[A

Evaluating:  52%|████████████████████████████████████████████████████████████████▏                                                           | 15/29 [00:06<00:05,  2.38it/s][A[A

Evaluating:  55%|████████████████████████████████████████████████████████████████████▍         

Iteration:  49%|████████████████████████████████████████████████████████████▏                                                              | 112/229 [01:27<01:13,  1.59it/s][A
Iteration:  49%|████████████████████████████████████████████████████████████▋                                                              | 113/229 [01:28<01:13,  1.59it/s][A
Iteration:  50%|█████████████████████████████████████████████████████████████▏                                                             | 114/229 [01:28<01:12,  1.59it/s][A
Iteration:  50%|█████████████████████████████████████████████████████████████▊                                                             | 115/229 [01:29<01:11,  1.59it/s][A
Iteration:  51%|██████████████████████████████████████████████████████████████▎                                                            | 116/229 [01:30<01:12,  1.56it/s][A
Iteration:  51%|██████████████████████████████████████████████████████████████▊                                    

Iteration:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▌             | 204/229 [02:26<00:15,  1.59it/s][A
Iteration:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████             | 205/229 [02:27<00:15,  1.60it/s][A
Iteration:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▋            | 206/229 [02:27<00:14,  1.60it/s][A
Iteration:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 207/229 [02:28<00:13,  1.59it/s][A
Iteration:  91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▋           | 208/229 [02:28<00:13,  1.59it/s][A
Iteration:  91%|███████████████████████████████████████████████████████████████████████████████████████████████████

Evaluating:   3%|████▎                                                                                                                        | 1/29 [00:00<00:11,  2.48it/s][A[A

Evaluating:   7%|████████▌                                                                                                                    | 2/29 [00:00<00:10,  2.48it/s][A[A

Evaluating:  10%|████████████▉                                                                                                                | 3/29 [00:01<00:10,  2.48it/s][A[A

Evaluating:  14%|█████████████████▏                                                                                                           | 4/29 [00:01<00:10,  2.48it/s][A[A

Evaluating:  17%|█████████████████████▌                                                                                                       | 5/29 [00:02<00:09,  2.48it/s][A[A

Evaluating:  21%|█████████████████████████▊                                                    

Iteration:  53%|█████████████████████████████████████████████████████████████████▌                                                         | 122/229 [01:33<01:09,  1.55it/s][A
Iteration:  54%|██████████████████████████████████████████████████████████████████                                                         | 123/229 [01:34<01:07,  1.56it/s][A
Iteration:  54%|██████████████████████████████████████████████████████████████████▌                                                        | 124/229 [01:34<01:06,  1.57it/s][A
Iteration:  55%|███████████████████████████████████████████████████████████████████▏                                                       | 125/229 [01:35<01:05,  1.58it/s][A
Iteration:  55%|███████████████████████████████████████████████████████████████████▋                                                       | 126/229 [01:35<01:05,  1.58it/s][A
Iteration:  55%|████████████████████████████████████████████████████████████████████▏                              

Iteration:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉        | 214/229 [02:32<00:09,  1.57it/s][A
Iteration:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍       | 215/229 [02:32<00:08,  1.58it/s][A
Iteration:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 216/229 [02:33<00:08,  1.59it/s][A
Iteration:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌      | 217/229 [02:33<00:07,  1.59it/s][A
Iteration:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 218/229 [02:34<00:06,  1.59it/s][A
Iteration:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████

Iteration:  33%|████████████████████████████████████████▌                                                                                   | 75/229 [00:47<01:37,  1.57it/s][A
Iteration:  33%|█████████████████████████████████████████▏                                                                                  | 76/229 [00:48<01:36,  1.58it/s][A
Iteration:  34%|█████████████████████████████████████████▋                                                                                  | 77/229 [00:49<01:36,  1.58it/s][A
Iteration:  34%|██████████████████████████████████████████▏                                                                                 | 78/229 [00:49<01:34,  1.59it/s][A
Iteration:  34%|██████████████████████████████████████████▊                                                                                 | 79/229 [00:50<01:34,  1.59it/s][A
Iteration:  35%|███████████████████████████████████████████▎                                                       

Iteration:  58%|███████████████████████████████████████████████████████████████████████▍                                                   | 133/229 [01:41<01:01,  1.57it/s][A
Iteration:  59%|███████████████████████████████████████████████████████████████████████▉                                                   | 134/229 [01:41<01:00,  1.58it/s][A
Iteration:  59%|████████████████████████████████████████████████████████████████████████▌                                                  | 135/229 [01:42<00:59,  1.59it/s][A
Iteration:  59%|█████████████████████████████████████████████████████████████████████████                                                  | 136/229 [01:43<00:58,  1.60it/s][A
Iteration:  60%|█████████████████████████████████████████████████████████████████████████▌                                                 | 137/229 [01:43<00:57,  1.60it/s][A
Iteration:  60%|██████████████████████████████████████████████████████████████████████████                         

Iteration:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 225/229 [02:39<00:02,  1.56it/s][A
Iteration:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 226/229 [02:40<00:01,  1.53it/s][A
Iteration:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 227/229 [02:41<00:01,  1.54it/s][A
Iteration: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 228/229 [02:41<00:00,  1.55it/s][A
Iteration: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 229/229 [02:42<00:00,  1.41it/s][A
Epoch:  50%|████████████████████████████████████████████████████████████████▌                                      

Iteration:  38%|██████████████████████████████████████████████▌                                                                             | 86/229 [00:54<01:29,  1.60it/s][A
Iteration:  38%|███████████████████████████████████████████████                                                                             | 87/229 [00:55<01:28,  1.60it/s][A
Iteration:  38%|███████████████████████████████████████████████▋                                                                            | 88/229 [00:55<01:27,  1.60it/s][A
Iteration:  39%|████████████████████████████████████████████████▏                                                                           | 89/229 [00:56<01:27,  1.61it/s][A
Iteration:  39%|████████████████████████████████████████████████▋                                                                           | 90/229 [00:57<01:26,  1.61it/s][A
Iteration:  40%|█████████████████████████████████████████████████▎                                                 

Iteration:  62%|████████████████████████████████████████████████████████████████████████████▊                                              | 143/229 [01:47<00:53,  1.60it/s][A
Iteration:  63%|█████████████████████████████████████████████████████████████████████████████▎                                             | 144/229 [01:48<00:53,  1.60it/s][A
Iteration:  63%|█████████████████████████████████████████████████████████████████████████████▉                                             | 145/229 [01:48<00:52,  1.60it/s][A
Iteration:  64%|██████████████████████████████████████████████████████████████████████████████▍                                            | 146/229 [01:49<00:51,  1.60it/s][A
Iteration:  64%|██████████████████████████████████████████████████████████████████████████████▉                                            | 147/229 [01:49<00:52,  1.57it/s][A
Iteration:  65%|███████████████████████████████████████████████████████████████████████████████▍                   

Iteration:   2%|██▏                                                                                                                          | 4/229 [00:02<02:20,  1.60it/s][A
Iteration:   2%|██▋                                                                                                                          | 5/229 [00:03<02:20,  1.60it/s][A
Iteration:   3%|███▎                                                                                                                         | 6/229 [00:03<02:19,  1.60it/s][A
Iteration:   3%|███▊                                                                                                                         | 7/229 [00:04<02:21,  1.57it/s][A
Iteration:   3%|████▎                                                                                                                        | 8/229 [00:05<02:24,  1.53it/s][A
Iteration:   4%|████▉                                                                                              

Iteration:  42%|███████████████████████████████████████████████████▉                                                                        | 96/229 [01:01<01:23,  1.59it/s][A
Iteration:  42%|████████████████████████████████████████████████████▌                                                                       | 97/229 [01:01<01:22,  1.60it/s][A
Iteration:  43%|█████████████████████████████████████████████████████                                                                       | 98/229 [01:02<01:22,  1.60it/s][A
Iteration:  43%|█████████████████████████████████████████████████████▌                                                                      | 99/229 [01:02<01:21,  1.60it/s][A
Iteration:  44%|█████████████████████████████████████████████████████▋                                                                     | 100/229 [01:03<01:20,  1.60it/s][A
Iteration:  44%|██████████████████████████████████████████████████████▏                                            

Iteration:  67%|██████████████████████████████████████████████████████████████████████████████████▏                                        | 153/229 [01:53<00:48,  1.57it/s][A
Iteration:  67%|██████████████████████████████████████████████████████████████████████████████████▋                                        | 154/229 [01:54<00:47,  1.59it/s][A
Iteration:  68%|███████████████████████████████████████████████████████████████████████████████████▎                                       | 155/229 [01:54<00:46,  1.59it/s][A
Iteration:  68%|███████████████████████████████████████████████████████████████████████████████████▊                                       | 156/229 [01:55<00:45,  1.60it/s][A
Iteration:  69%|████████████████████████████████████████████████████████████████████████████████████▎                                      | 157/229 [01:56<00:45,  1.60it/s][A
Iteration:  69%|████████████████████████████████████████████████████████████████████████████████████▊              

Iteration:   6%|███████▌                                                                                                                    | 14/229 [00:08<02:19,  1.54it/s][A
Iteration:   7%|████████                                                                                                                    | 15/229 [00:09<02:17,  1.56it/s][A
Iteration:   7%|████████▋                                                                                                                   | 16/229 [00:10<02:15,  1.57it/s][A
Iteration:   7%|█████████▏                                                                                                                  | 17/229 [00:10<02:13,  1.58it/s][A
Iteration:   8%|█████████▋                                                                                                                  | 18/229 [00:11<02:13,  1.58it/s][A
Iteration:   8%|██████████▎                                                                                        

Iteration:  46%|████████████████████████████████████████████████████████▉                                                                  | 106/229 [01:07<01:19,  1.55it/s][A
Iteration:  47%|█████████████████████████████████████████████████████████▍                                                                 | 107/229 [01:07<01:17,  1.57it/s][A
Iteration:  47%|██████████████████████████████████████████████████████████                                                                 | 108/229 [01:08<01:16,  1.57it/s][A
Iteration:  48%|██████████████████████████████████████████████████████████▌                                                                | 109/229 [01:09<01:15,  1.58it/s][A
Iteration:  48%|███████████████████████████████████████████████████████████                                                                | 110/229 [01:09<01:14,  1.59it/s][A
Iteration:  48%|███████████████████████████████████████████████████████████▌                                       

Evaluating:  14%|█████████████████▏                                                                                                           | 4/29 [00:01<00:10,  2.47it/s][A[A

Evaluating:  17%|█████████████████████▌                                                                                                       | 5/29 [00:02<00:09,  2.46it/s][A[A

Evaluating:  21%|█████████████████████████▊                                                                                                   | 6/29 [00:02<00:09,  2.46it/s][A[A

Evaluating:  24%|██████████████████████████████▏                                                                                              | 7/29 [00:02<00:08,  2.46it/s][A[A

Evaluating:  28%|██████████████████████████████████▍                                                                                          | 8/29 [00:03<00:08,  2.45it/s][A[A

Evaluating:  31%|██████████████████████████████████████▊                                       

Iteration:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 209/229 [02:29<00:12,  1.57it/s][A
Iteration:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊          | 210/229 [02:29<00:12,  1.58it/s][A
Iteration:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 211/229 [02:30<00:11,  1.59it/s][A
Iteration:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊         | 212/229 [02:30<00:10,  1.59it/s][A
Iteration:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍        | 213/229 [02:31<00:10,  1.59it/s][A
Iteration:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████

Iteration:  31%|█████████████████████████████████████▉                                                                                      | 70/229 [00:44<01:40,  1.59it/s][A
Iteration:  31%|██████████████████████████████████████▍                                                                                     | 71/229 [00:45<01:39,  1.59it/s][A
Iteration:  31%|██████████████████████████████████████▉                                                                                     | 72/229 [00:45<01:38,  1.59it/s][A
Iteration:  32%|███████████████████████████████████████▌                                                                                    | 73/229 [00:46<01:37,  1.59it/s][A
Iteration:  32%|████████████████████████████████████████                                                                                    | 74/229 [00:47<01:38,  1.57it/s][A
Iteration:  33%|████████████████████████████████████████▌                                                          

Iteration:  71%|███████████████████████████████████████████████████████████████████████████████████████                                    | 162/229 [01:42<00:42,  1.59it/s][A
Iteration:  71%|███████████████████████████████████████████████████████████████████████████████████████▌                                   | 163/229 [01:43<00:41,  1.60it/s][A
Iteration:  72%|████████████████████████████████████████████████████████████████████████████████████████                                   | 164/229 [01:44<00:40,  1.60it/s][A
Iteration:  72%|████████████████████████████████████████████████████████████████████████████████████████▌                                  | 165/229 [01:44<00:39,  1.60it/s][A
Iteration:  72%|█████████████████████████████████████████████████████████████████████████████████████████▏                                 | 166/229 [01:45<00:39,  1.60it/s][A
Iteration:  73%|█████████████████████████████████████████████████████████████████████████████████████████▋         

Iteration:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋     | 219/229 [02:34<00:06,  1.62it/s][A
Iteration:  96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏    | 220/229 [02:35<00:05,  1.63it/s][A
Iteration:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 221/229 [02:36<00:04,  1.63it/s][A
Iteration:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 222/229 [02:36<00:04,  1.63it/s][A
Iteration:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 223/229 [02:37<00:03,  1.64it/s][A
Iteration:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████

Iteration:  35%|███████████████████████████████████████████▎                                                                                | 80/229 [00:49<01:31,  1.62it/s][A
Iteration:  35%|███████████████████████████████████████████▊                                                                                | 81/229 [00:49<01:30,  1.63it/s][A
Iteration:  36%|████████████████████████████████████████████▍                                                                               | 82/229 [00:50<01:30,  1.63it/s][A
Iteration:  36%|████████████████████████████████████████████▉                                                                               | 83/229 [00:51<01:29,  1.63it/s][A
Iteration:  37%|█████████████████████████████████████████████▍                                                                              | 84/229 [00:51<01:29,  1.63it/s][A
Iteration:  37%|██████████████████████████████████████████████                                                     

Iteration:  75%|████████████████████████████████████████████████████████████████████████████████████████████▍                              | 172/229 [01:45<00:34,  1.63it/s][A
Iteration:  76%|████████████████████████████████████████████████████████████████████████████████████████████▉                              | 173/229 [01:46<00:34,  1.63it/s][A
Iteration:  76%|█████████████████████████████████████████████████████████████████████████████████████████████▍                             | 174/229 [01:47<00:33,  1.63it/s][A
Iteration:  76%|█████████████████████████████████████████████████████████████████████████████████████████████▉                             | 175/229 [01:47<00:33,  1.63it/s][A
Iteration:  77%|██████████████████████████████████████████████████████████████████████████████████████████████▌                            | 176/229 [01:48<00:32,  1.63it/s][A
Iteration:  77%|███████████████████████████████████████████████████████████████████████████████████████████████    

Iteration: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 229/229 [02:37<00:00,  1.46it/s][A
Epoch: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [26:26<00:00, 158.64s/it]
04/23/2022 22:07:34 - INFO - __main__ -   ***** Model Loaded *****
04/23/2022 22:07:34 - INFO - __main__ -   ***** Running evaluation on test dataset *****
04/23/2022 22:07:34 - INFO - __main__ -     Num examples = 915
04/23/2022 22:07:34 - INFO - __main__ -     Batch size = 32
Evaluating: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:11<00:00,  2.51it/s]
Use of uninitialized value in subtraction (-) at eval/semeval2010_task8_scorer-v1.2.pl line 286.
Use of uninitialized value in printf at eval/semeval2010_task8_scorer-v1.2.pl line 286.
04/23/2022 

## Testing Evaluations

In [17]:
args_test = parser.parse_args("--do_eval".split())
main(args_test)

04/23/2022 22:08:15 - INFO - __main__ -   Loading features from cached file ./data/cached_train_semeval_bert-base-uncased_384
04/23/2022 22:08:16 - INFO - __main__ -   Loading features from cached file ./data/cached_test_semeval_bert-base-uncased_384
Some weights of the model checkpoint at bert-base-uncased were not used when initializing RBERT: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing RBERT from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RBERT from the checkpoint of a model that you expect to be exactly identical (initializing

## Predict the relations in sentence

In [18]:
# Function of coreferance resolutions
def coref_resolution(text):
    """Function that executes coreference resolution on a given text"""
    doc = nlp(text)
    # fetches tokens with whitespaces from spacy document
    tok_list = list(token.text_with_ws for token in doc)
    for cluster in doc._.coref_clusters:
        # get tokens from representative cluster name
        cluster_main_words = set(cluster.main.text.split(' '))
        for coref in cluster:
            if coref != cluster.main:  # if coreference element is not the representative element of that cluster
                if coref.text != cluster.main.text and bool(set(coref.text.split(' ')).intersection(cluster_main_words)) == False:
                    # if coreference element text and representative element text are not equal and none of the coreference element words are in representative element. This was done to handle nested coreference scenarios
                    tok_list[coref.start] = cluster.main.text + \
                        doc[coref.end-1].whitespace_
                    for i in range(coref.start+1, coref.end):
                        tok_list[i] = ""

    return "".join(tok_list)

In [19]:
import spacy
import neuralcoref
# Load SpaCy
nlp = spacy.load("en_core_web_sm")
# Add neural coref to SpaCy's pipe
neuralcoref.add_to_pipe(nlp)

def predict_relation(sentence):
    # coreferance resolved
    print("Given sentance:\n",sentence)
    sentence = coref_resolution(sentence)
    print("\nAfter Co-referance resoluation sentance is:\n",sentence)
    
    ent_dict = {}
    doc = nlp(sentence)
    # entity identifies 
    if doc.ents:
        for ent in doc.ents:
            if ent.label_ in ['ORG', 'DATE', 'PERSON', 'LOC','GPE','NORP']:
                ent_dict[ent.text] = ent.label_
          
    # First get all the entities in the sentence
#     entities = wikifier(sentence)
#     for i in entities:
#         ent_dict[i['title']] = i['label']
        
    # take combinations of entities
    tagged_sent = []
    for permutation in itertools.combinations(ent_dict, 2):
        st = sentence
        sub = permutation[0]
        obj = permutation[1]

        if ent_dict[sub] != 'DATE':
            sub_pos = st.find(sub)
            obj_pos = st.find(obj)
            st = st[:sub_pos] + '<e1> ' + sub + ' </e1>' + st[sub_pos+len(sub):] 
            diff = len(st) - len(sentence)
            st = st[:obj_pos+diff] + '<e2> ' + obj + ' </e2>' + st[diff+obj_pos+len(obj):] 
            tagged_sent.append(st)     
        
    # remove prev data
    os.remove('data/cached_test_semeval_bert-base-uncased_384') 
    os.remove('eval/proposed_answers.txt') 
    
    # put example in test file
    test = pd.read_csv('data/test.tsv',sep='\t', names=[0,1])
    test.iloc[-len(tagged_sent):,:][1] = tagged_sent
    test.iloc[-len(tagged_sent):,:][0] = test.iloc[-len(tagged_sent),:][0]

    test.iloc[-len(tagged_sent):,:].to_csv("data/test.tsv", sep="\t",index=False,header=False)
    
    # Make it original answer_keys
    ans_txt = []
    with open('eval/answer_keys_1.txt','r') as firstfile, open('eval/answer_keys.txt','w') as secondfile:
        # read content from first file
        cnt = 1
        for line in firstfile:
        # append content to second file
            secondfile.write(line)
            if cnt>915-len(tagged_sent):
                ans_txt.append('8'+str(cnt)+'\tDATE_OF_BIRTH(e1,e2)\n')
            else:
                ans_txt.append(line)

            cnt = cnt +1
            
    # answer keys updated to partial
    pred = []
    with open("eval/answer_keys.txt", 'w') as fp:
        for line in ans_txt[:len(tagged_sent)]:
            fp.write(line)
            
    # Lets predit the realation
    args_test = parser.parse_args("--do_eval".split())
    main(args_test)
    
    pred = []
    with open("eval/proposed_answers.txt") as fp:
        for line in fp:
            pred.append(line.strip().split('\t')[1]) #.split('(')[0]
            
    test_pred = pd.read_csv('data/test.tsv',sep='\t',names=[0,1])

    sub = []
    obj = []
    for l in test_pred[1]:
        e1_s = l.find('<e1>')
        e1_e = l.find('</e1>')

        e2_s = l.find('<e2>')
        e2_e = l.find('</e2>')

        sub.append(l[e1_s+4:e1_e])
        obj.append(l[e2_s+4:e2_e])

    test_pred['entity_1'] = sub
    test_pred['realation'] = pred
    test_pred['entity_2'] = obj
    
    return test_pred.iloc[-len(tagged_sent):,-3:].reset_index(drop=True)

04/23/2022 22:08:37 - INFO - neuralcoref -   Loading model from /home/mane1/.neuralcoref_cache/neuralcoref


In [20]:
sentence = 'The youngest of 13 surviving children, Tagore was born on 7 May 1861 in the Jorasanko mansion in Calcutta, the son of Debendranath Tagore and Sarada Devi.'

In [21]:
print("Given sentance:\n",sentence)

Given sentance:
 The youngest of 13 surviving children, Tagore was born on 7 May 1861 in the Jorasanko mansion in Calcutta, the son of Debendranath Tagore and Sarada Devi.


In [22]:
predict_relation(sentence)

Given sentance:
 The youngest of 13 surviving children, Tagore was born on 7 May 1861 in the Jorasanko mansion in Calcutta, the son of Debendranath Tagore and Sarada Devi.

After Co-referance resoluation sentance is:
 The youngest of 13 surviving children, Tagore was born on 7 May 1861 in the Jorasanko mansion in Calcutta, the son of Debendranath Tagore and Sarada Devi.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
04/23/2022 22:08:41 - INFO - __main__ -   Loading features from cached file ./data/cached_train_semeval_bert-base-uncased_384
04/23/2022 22:08:42 - INFO - __main__ -   Creating features from dataset file at ./data
04/23/2022 22:08:42 - INFO - __main__ -   LOOKING AT ./data/test.tsv
04/23/2022 22:08:42 - INFO - __main__ -   ['EMPLOYEE_OR_MEMBER_OF(e1,e2)', 'The youngest of 13 surviving children, <e1> Tagore </e1> was born on <e2> 7 May 1861 </e2> in the Jorasanko mansion

04/23/2022 22:08:42 - INFO - __main__ -   label: 0 (id = 0)
04/23/2022 22:08:42 - INFO - __main__ -   e1_mask: 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
04/23/2022 22:08:42 - INFO - __main__ -   e2_mask: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 

04/23/2022 22:08:42 - INFO - __main__ -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
04/23/2022 22:08:42 - INFO - __main__ -   label: 0 (id = 0)
04/23/2022 22:08:42 - INFO - __main__ -   e1_mask: 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

04/23/2022 22:08:47 - INFO - __main__ -   ***** Model Loaded *****
04/23/2022 22:08:47 - INFO - __main__ -   ***** Running evaluation on test dataset *****
04/23/2022 22:08:47 - INFO - __main__ -     Num examples = 7
04/23/2022 22:08:47 - INFO - __main__ -     Batch size = 32
Evaluating: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 10.68it/s]
Use of uninitialized value in subtraction (-) at eval/semeval2010_task8_scorer-v1.2.pl line 286.
Use of uninitialized value in printf at eval/semeval2010_task8_scorer-v1.2.pl line 286.
04/23/2022 22:08:47 - INFO - __main__ -   ***** Eval results *****
04/23/2022 22:08:47 - INFO - __main__ -     acc = 0.0000
04/23/2022 22:08:47 - INFO - __main__ -     f1 = 0.0571
04/23/2022 22:08:47 - INFO - __main__ -     loss = 9.6017


Unnamed: 0,entity_1,realation,entity_2
0,Tagore,"DATE_OF_BIRTH(e1,e2)",7 May 1861
1,Tagore,"PLACE_OF_BIRTH(e1,e2)",Calcutta
2,Tagore,"PLACE_OF_BIRTH(e1,e2)",Debendranath Tagore
3,Tagore,"PLACE_OF_BIRTH(e1,e2)",Sarada Devi
4,Calcutta,"PLACE_OF_BIRTH(e1,e2)",Debendranath Tagore
5,Calcutta,"PLACE_OF_BIRTH(e1,e2)",Sarada Devi
6,Debendranath Tagore,"PLACE_OF_BIRTH(e1,e2)",Sarada Devi
