In [31]:
! pip install nltk



In [37]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\odaim\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [38]:
import json
import os
from unidecode import unidecode

from nltk.tokenize import word_tokenize

In [78]:
def normalize_nyt_sample(sample):
    data = json.loads(sample)
    sentence = data['sentText'].replace('Jr.', 'Jr')
    sentence = sentence.replace('U.S.A.', 'U.S.A')
    sentence = sentence.replace('P.M.', 'P.M')
    tokens = word_tokenize(sentence)
    norm = {}
    norm['doc_key'] = data['articleId']
    norm['sentences'] = [tokens]
    norm['ner'] = []
    norm['relations'] = []
    norm['clusters'] = []
    
    entities = []
    rels = []
    # print(tokens)
    for entity in data['entityMentions']:
        ent = entity['text'].replace('Jr.', 'Jr')
        ent = ent.replace('U.S.A.', 'U.S.A')
        ent = ent.replace('P.M.', 'P.M')
        ner = word_tokenize(ent)
        # print(ner)
        label = entity['label'].title()
        entities.append([tokens.index(ner[0]), tokens.index(ner[-1]), label])
    norm['ner'].append(entities)
        
    for relation in data['relationMentions']:
        label = relation['label'].split('/')[-1].replace('_','-').upper()
        source = unidecode(relation['em1Text'].replace('Jr.', 'Jr'))
        source = word_tokenize(source)
        # print(source)
        target = unidecode(relation['em2Text'].replace('Jr.', 'Jr'))
        target = word_tokenize(target)
        # print(target)
        rels.append([tokens.index(source[0]), tokens.index(source[-1]), tokens.index(target[0]), tokens.index(target[-1]), label])
    norm['relations'].append(rels)
    return norm

In [40]:
def write_normal_data(in_dir, out_dir):
    with open(in_dir) as f:
        for line in f:
            try:
                maped_sample = normalize_nyt_sample(line)
            except:
                print(line)
                break
            with open(out_dir, 'a') as normalized:
                normalized.write(json.dumps(maped_sample) + "\n")

In [41]:
nyt_data_dir = os.getcwd() + '/other_data/nyt_er_dataset/'

In [42]:
nyt_train_data_path = nyt_data_dir + 'train.json'
nyt_train_norm_data_path = nyt_data_dir + 'norm_train.json'
            
write_normal_data(nyt_train_data_path, nyt_train_norm_data_path)

In [43]:
nyt_valid_data_path = nyt_data_dir + 'valid.json'
nyt_valid_norm_data_path = nyt_data_dir + 'norm_valid.json'

write_normal_data(nyt_valid_data_path, nyt_valid_norm_data_path)

In [44]:
nyt_test_data_path = nyt_data_dir + 'test.json'
nyt_test_norm_data_path = nyt_data_dir + 'norm_test.json'

write_normal_data(nyt_test_data_path, nyt_test_norm_data_path)

In [45]:
%run entity_model/entity_setup.ipynb

In [46]:
task_ner_labels = {
    'ace04': ['FAC', 'WEA', 'LOC', 'VEH', 'GPE', 'ORG', 'PER'],
    'ace05': ['FAC', 'WEA', 'LOC', 'VEH', 'GPE', 'ORG', 'PER'],
    'scierc': ['Method', 'OtherScientificTerm', 'Task', 'Generic', 'Material', 'Metric'],
    'nyt': ['Location', 'Person', 'Organization']
}

In [47]:
data_dir = nyt_data_dir
output_dir = os.getcwd() + '/nyt_models/ent-scib-ctx0/'
task = 'nyt'
max_span_length = 8
context_window = 0
eval_batch_size = 32
test_pred_filename = 'ent_pred_test.json'
dev_pred_filename = 'ent_pred_dev.json'

In [48]:
train_data = os.path.join(data_dir, 'norm_train.json')
dev_data = os.path.join(data_dir, 'norm_valid.json')
test_data = os.path.join(data_dir, 'norm_test.json')

In [49]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [50]:
ner_label2id, ner_id2label = get_labelmap(task_ner_labels[task])

In [51]:
dev_data = Dataset(dev_data)
dev_samples, dev_ner = convert_dataset_to_samples(dev_data, max_span_length, ner_label2id=ner_label2id, context_window=context_window)
dev_batches = batchify(dev_samples, eval_batch_size)

01/27/2024 15:36:50 - INFO - root - # Overlap: 0
01/27/2024 15:36:50 - INFO - root - Extracted 5000 samples from 5000 documents, with 15923 NER labels, 37.763 avg input length, 100 max length
01/27/2024 15:36:50 - INFO - root - Max Length: 100, max NER: 13


In [60]:
data_dir = nyt_data_dir
output_dir = os.getcwd() + '/nyt_models/from-scratch/ent-scib-ctx0/'
task = 'nyt'
num_ner_labels = len(task_ner_labels[task]) + 1
max_span_length = 8
context_window = 300
eval_batch_size = 32
train_batch_size = 2
learning_rate = 1e-5
task_learning_rate = 5e-4
bertadam = True # If bertadam, then set correct_bias = False
num_epoch = 10 # number of the training epochs
warmup_proportion = 0.1 # the ratio of the warmup steps to the total steps
eval_per_epoch = 1 # how often evaluating the trained model on dev set during training
train_shuffle = True # whether to train with randomly shuffled data
print_loss_step = 100 # how often logging the loss value during training

In [53]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [140]:
model = EntityModel(model='allenai/scibert_scivocab_uncased', use_albert=False, max_span_length=max_span_length, num_ner_labels=num_ner_labels)

01/25/2024 23:22:25 - INFO - transformers.tokenization_utils_base - Model name 'allenai/scibert_scivocab_uncased' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc, bert-base-german-dbmdz-cased, bert-base-german-dbmdz-uncased, TurkuNLP/bert-base-finnish-cased-v1, TurkuNLP/bert-base-finnish-uncased-v1, wietsedv/bert-base-dutch-cased). Assuming 'allenai/scibert_scivocab_uncased' is a path, a model identifier, or url to a directory containing tokenizer files.
01/25/2024 23:22:29 - INFO - transformers.tokenization_utils_base - loading file https://s3.amazonaws.com/models.huggingface.co/bert/allenai/scibert_scivoca

In [54]:
train_data = Dataset(train_data)

In [142]:
train_samples, train_ner = convert_dataset_to_samples(train_data, max_span_length, ner_label2id=ner_label2id, context_window=context_window)
train_batches = batchify(train_samples, train_batch_size)
best_result = 0.0

param_optimizer = list(model.bert_model.named_parameters())
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer
        if 'bert' in n]},
    {'params': [p for n, p in param_optimizer
        if 'bert' not in n], 'lr': task_learning_rate}]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate, correct_bias=not(bertadam))
t_total = len(train_batches) * num_epoch
scheduler = get_linear_schedule_with_warmup(optimizer, int(t_total*warmup_proportion), t_total)

tr_loss = 0
tr_examples = 0
global_step = 0
eval_step = len(train_batches) // eval_per_epoch
for _ in tqdm(range(num_epoch), position=0, leave=True):
    if train_shuffle:
        random.shuffle(train_batches)
    for i in tqdm(range(len(train_batches)), position=0, leave=True):
        output_dict = model.run_batch(train_batches[i], training=True)
        loss = output_dict['ner_loss']
        loss.backward()

        tr_loss += loss.item()
        tr_examples += len(train_batches[i])
        global_step += 1

        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

        if global_step % print_loss_step == 0:
            logger.info('Epoch=%d, iter=%d, loss=%.5f'%(_, i, tr_loss / tr_examples))
            tr_loss = 0
            tr_examples = 0

        if global_step % eval_step == 0:
            f1 = evaluate(model, dev_batches, dev_ner)
            if f1 > best_result:
                best_result = f1
                logger.info('!!! Best valid (epoch=%d): %.2f' % (_, f1*100))
                save_model(model, output_dir)

01/25/2024 23:24:22 - INFO - root - # Overlap: 0
01/25/2024 23:24:22 - INFO - root - Extracted 56196 samples from 56196 documents, with 177461 NER labels, 37.817 avg input length, 100 max length
01/25/2024 23:24:22 - INFO - root - Max Length: 100, max NER: 20
  0%|          | 99/28098 [00:19<1:19:37,  5.86it/s]01/25/2024 23:24:42 - INFO - root - Epoch=0, iter=99, loss=526.39888
  1%|          | 199/28098 [00:36<1:17:48,  5.98it/s]01/25/2024 23:24:59 - INFO - root - Epoch=0, iter=199, loss=489.87780
  1%|          | 299/28098 [00:53<1:13:44,  6.28it/s]01/25/2024 23:25:16 - INFO - root - Epoch=0, iter=299, loss=419.22339
  1%|▏         | 399/28098 [01:11<1:16:48,  6.01it/s]01/25/2024 23:25:34 - INFO - root - Epoch=0, iter=399, loss=133.45635
  2%|▏         | 499/28098 [01:28<1:21:15,  5.66it/s]01/25/2024 23:25:51 - INFO - root - Epoch=0, iter=499, loss=28.05477
  2%|▏         | 599/28098 [01:46<1:21:01,  5.66it/s]01/25/2024 23:26:09 - INFO - root - Epoch=0, iter=599, loss=22.22239
  2%|▏

KeyboardInterrupt: 

In [20]:
bert_model_dir = output_dir
num_ner_labels = len(task_ner_labels[task]) + 1
model = EntityModel(model='allenai/scibert_scivocab_uncased', bert_model_dir=bert_model_dir, use_albert=False, max_span_length=max_span_length, num_ner_labels=num_ner_labels)

01/27/2024 15:18:36 - INFO - root - Loading BERT model from C:\Users\odaim\Documents\PURE reproduction/nyt_models/from-scratch/ent-scib-ctx0//
01/27/2024 15:18:36 - INFO - transformers.tokenization_utils_base - Model name 'C:\Users\odaim\Documents\PURE reproduction/nyt_models/from-scratch/ent-scib-ctx0//' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc, bert-base-german-dbmdz-cased, bert-base-german-dbmdz-uncased, TurkuNLP/bert-base-finnish-cased-v1, TurkuNLP/bert-base-finnish-uncased-v1, wietsedv/bert-base-dutch-cased). Assuming 'C:\Users\odaim\Documents\PURE reproduction/nyt_models/from-scratch/ent-scib-ct

In [24]:
dev_data = Dataset(os.path.join(data_dir, 'norm_valid.json'))
prediction_file = os.path.join(output_dir, dev_pred_filename)
    
dev_samples, dev_ner = convert_dataset_to_samples(dev_data, max_span_length, ner_label2id=ner_label2id, context_window=context_window)
dev_batches = batchify(dev_samples, eval_batch_size)
evaluate(model, dev_batches, dev_ner)
output_ner_predictions(model, dev_batches, dev_data, output_file=prediction_file)

01/27/2024 15:25:23 - INFO - root - # Overlap: 0
01/27/2024 15:25:23 - INFO - root - Extracted 5000 samples from 5000 documents, with 15923 NER labels, 37.763 avg input length, 100 max length
01/27/2024 15:25:23 - INFO - root - Max Length: 100, max NER: 13
01/27/2024 15:25:23 - INFO - root - Evaluating...
01/27/2024 15:28:32 - INFO - root - Accuracy: 0.998733
01/27/2024 15:28:32 - INFO - root - Cor: 14324, Pred TOT: 15405, Gold TOT: 15923
01/27/2024 15:28:32 - INFO - root - P: 0.92983, R: 0.89958, F1: 0.91445
01/27/2024 15:28:32 - INFO - root - Used time: 189.390934
01/27/2024 15:31:46 - INFO - root - Total pred entities: 15405
01/27/2024 15:31:46 - INFO - root - Output predictions to C:\Users\odaim\Documents\PURE reproduction/nyt_models/from-scratch/ent-scib-ctx0/ent_pred_dev.json..


In [23]:
test_data = Dataset(os.path.join(data_dir, 'norm_test.json'))
prediction_file = os.path.join(output_dir, test_pred_filename)
    
test_samples, test_ner = convert_dataset_to_samples(test_data, max_span_length, ner_label2id=ner_label2id, context_window=context_window)
test_batches = batchify(test_samples, eval_batch_size)
evaluate(model, test_batches, test_ner)
output_ner_predictions(model, test_batches, test_data, output_file=prediction_file)

01/27/2024 15:18:58 - INFO - root - # Overlap: 0
01/27/2024 15:18:58 - INFO - root - Extracted 5000 samples from 5000 documents, with 15861 NER labels, 37.855 avg input length, 100 max length
01/27/2024 15:18:58 - INFO - root - Max Length: 100, max NER: 18
01/27/2024 15:18:58 - INFO - root - Evaluating...
01/27/2024 15:22:09 - INFO - root - Accuracy: 0.998716
01/27/2024 15:22:09 - INFO - root - Cor: 14246, Pred TOT: 15341, Gold TOT: 15861
01/27/2024 15:22:09 - INFO - root - P: 0.92862, R: 0.89818, F1: 0.91315
01/27/2024 15:22:09 - INFO - root - Used time: 191.220874
01/27/2024 15:25:21 - INFO - root - Total pred entities: 15341
01/27/2024 15:25:21 - INFO - root - Output predictions to C:\Users\odaim\Documents\PURE reproduction/nyt_models/from-scratch/ent-scib-ctx0/ent_pred_test.json..


In [55]:
%run relation_model/relation_setup.ipynb

In [79]:
model_name = 'allenai/scibert_scivocab_uncased'
add_new_tokens = False
no_cuda = False
do_train = True
do_eval = True
eval_test = True
do_lower_case = True
entity_output_dir = os.getcwd() + '/nyt_models/from-scratch/ent-scib-ctx0/'
entity_predictions_dev = 'ent_pred_dev.json'
eval_with_gold = True
context_window = 0
max_seq_length = 128
entity_predictions_test = 'ent_pred_test.json'
seed = 0
output_dir = os.getcwd() + '/nyt_models/from-scratch/rel-scib-ctx0/'
negative_label = 'no_relation'
task = 'nyt'
train_mode = 'random_sorted'
train_batch_size = 8
eval_batch_size = 8
num_train_epochs = 2
train_file = nyt_train_norm_data_path
eval_per_epoch = 10
learning_rate = 2e-5
prediction_file = 'predictions.json'
BertLayerNorm = torch.nn.LayerNorm
train_mode = 'random_sorted'
bertadam = True
warmup_proportion = 0.1
eval_metric = 'f1'
task_rel_labels = {
    'ace04': ['PER-SOC', 'OTHER-AFF', 'ART', 'GPE-AFF', 'EMP-ORG', 'PHYS'],
    'ace05': ['ART', 'ORG-AFF', 'GEN-AFF', 'PHYS', 'PER-SOC', 'PART-WHOLE'],
    'scierc': ['PART-OF', 'USED-FOR', 'FEATURE-OF', 'CONJUNCTION', 'EVALUATE-FOR', 'HYPONYM-OF', 'COMPARE'],
    'nyt': ['ADVISORS', 'LOCATION', 'MAJOR-SHAREHOLDER-OF', 'PLACE-OF-DEATH', 'NATIONALITY', 'TEAMS', 'GEOGRAPHIC-DISTRIBUTION', 'INDUSTRY', 'MAJOR-SHAREHOLDERS', 'CAPITAL', 'ETHNICITY', 'COUNTRY', 'CONTAINS', 'CHILDREN', 'PEOPLE', 'COMPANY', 'PLACE-LIVED', 'FOUNDERS', 'ADMINISTRATIVE-DIVISIONS', 'PLACE-FOUNDED', 'NEIGHBORHOOD-OF', 'PROFESSION', 'PLACE-OF-BIRTH', 'RELIGION']
}

In [80]:
CLS = "[CLS]"
SEP = "[SEP]"

RelationModel = BertForRelation

device = torch.device("cuda" if torch.cuda.is_available() and not no_cuda else "cpu")
n_gpu = torch.cuda.device_count()

# train set
if do_train:
    train_dataset, train_examples, train_nrel = generate_relation_data(train_file, use_gold=True, context_window=context_window)
# dev set
if (do_eval and do_train) or (do_eval and not(eval_test)):
    eval_dataset, eval_examples, eval_nrel = generate_relation_data(os.path.join(entity_output_dir, entity_predictions_dev), use_gold=eval_with_gold, context_window=context_window)
# test set
if eval_test:
    test_dataset, test_examples, test_nrel = generate_relation_data(os.path.join(entity_output_dir, entity_predictions_test), use_gold=eval_with_gold, context_window=context_window)
    
setseed(seed)

if not do_train and not do_eval:
    raise ValueError("At least one of `do_train` or `do_eval` must be True.")

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
if do_train:
    logger.addHandler(logging.FileHandler(os.path.join(output_dir, "train.log"), 'w'))
else:
    logger.addHandler(logging.FileHandler(os.path.join(output_dir, "eval.log"), 'w'))
    
# get label_list
if os.path.exists(os.path.join(output_dir, 'label_list.json')):
    with open(os.path.join(output_dir, 'label_list.json'), 'r') as f:
        label_list = json.load(f)
else:
    label_list = [negative_label] + task_rel_labels[task]
    with open(os.path.join(output_dir, 'label_list.json'), 'w') as f:
        json.dump(label_list, f)
label2id = {label: i for i, label in enumerate(label_list)}
id2label = {i: label for i, label in enumerate(label_list)}
num_labels = len(label_list)

tokenizer = AutoTokenizer.from_pretrained(model_name, do_lower_case=do_lower_case)
if add_new_tokens:
    add_marker_tokens(tokenizer, task_ner_labels[task])

if os.path.exists(os.path.join(output_dir, 'special_tokens.json')):
    with open(os.path.join(output_dir, 'special_tokens.json'), 'r') as f:
        special_tokens = json.load(f)
else:
    special_tokens = {}
    
if do_eval and (do_train or not(eval_test)):
    eval_features = convert_examples_to_features(
        eval_examples, label2id, max_seq_length, tokenizer, special_tokens, unused_tokens=not(add_new_tokens))
    logger.info("***** Dev *****")
    logger.info("  Num examples = %d", len(eval_examples))
    logger.info("  Batch size = %d", eval_batch_size)
    all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
    all_sub_idx = torch.tensor([f.sub_idx for f in eval_features], dtype=torch.long)
    all_obj_idx = torch.tensor([f.obj_idx for f in eval_features], dtype=torch.long)
    eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids, all_sub_idx, all_obj_idx)
    eval_dataloader = DataLoader(eval_data, batch_size=eval_batch_size)
    eval_label_ids = all_label_ids

    
if do_train:
    train_features = convert_examples_to_features(
        train_examples, label2id, max_seq_length, tokenizer, special_tokens, unused_tokens=not(add_new_tokens))
    if train_mode == 'sorted' or train_mode == 'random_sorted':
        train_features = sorted(train_features, key=lambda f: np.sum(f.input_mask))
    else:
        random.shuffle(train_features)
    all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long)
    all_sub_idx = torch.tensor([f.sub_idx for f in train_features], dtype=torch.long)
    all_obj_idx = torch.tensor([f.obj_idx for f in train_features], dtype=torch.long)
    train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids, all_sub_idx, all_obj_idx)
    train_dataloader = DataLoader(train_data, batch_size=train_batch_size)
    train_batches = [batch for batch in train_dataloader]

    num_train_optimization_steps = len(train_dataloader) * num_train_epochs

    logger.info("***** Training *****")
    logger.info("  Num examples = %d", len(train_examples))
    logger.info("  Batch size = %d", train_batch_size)
    logger.info("  Num steps = %d", num_train_optimization_steps)

    best_result = None
    eval_step = max(1, len(train_batches) // eval_per_epoch)

    lr = learning_rate
    model = RelationModel.from_pretrained(
        'allenai/scibert_scivocab_uncased', cache_dir=str(PYTORCH_PRETRAINED_BERT_CACHE), num_rel_labels=num_labels)
    if hasattr(model, 'bert'):
        model.bert.resize_token_embeddings(len(tokenizer))
    elif hasattr(model, 'albert'):
        model.albert.resize_token_embeddings(len(tokenizer))
    else:
        raise TypeError("Unknown model class")

    model.to(device)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=lr, correct_bias=not(bertadam))
    scheduler = get_linear_schedule_with_warmup(optimizer, int(num_train_optimization_steps * warmup_proportion), num_train_optimization_steps)

    start_time = time.time()
    global_step = 0
    tr_loss = 0
    nb_tr_examples = 0
    nb_tr_steps = 0
    for epoch in range(int(num_train_epochs)):
        model.train()
        logger.info("Start epoch #{} (lr = {})...".format(epoch, lr))
        if train_mode == 'random' or train_mode == 'random_sorted':
            random.shuffle(train_batches)
        for step, batch in enumerate(train_batches):
            batch = tuple(t.to(device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids, sub_idx, obj_idx = batch
            loss = model(input_ids, segment_ids, input_mask, label_ids, sub_idx, obj_idx)
            if n_gpu > 1:
                loss = loss.mean()

            loss.backward()

            tr_loss += loss.item()
            nb_tr_examples += input_ids.size(0)
            nb_tr_steps += 1

            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
            global_step += 1

            if (step + 1) % eval_step == 0:
                logger.info('Epoch: {}, Step: {} / {}, used_time = {:.2f}s, loss = {:.6f}'.format(
                            epoch, step + 1, len(train_batches),
                            time.time() - start_time, tr_loss / nb_tr_steps))
                save_model = False
                if do_eval:
                    preds, result, logits = evaluate(model, device, eval_dataloader, eval_label_ids, num_labels, e2e_ngold=eval_nrel)
                    model.train()
                    result['global_step'] = global_step
                    result['epoch'] = epoch
                    result['learning_rate'] = lr
                    result['batch_size'] = train_batch_size

                    if (best_result is None) or (result[eval_metric] > best_result[eval_metric]):
                        best_result = result
          
    if eval_test: 
        eval_dataset = test_dataset
        eval_examples = test_examples
        eval_features = convert_examples_to_features(
            test_examples, label2id, max_seq_length, tokenizer, special_tokens, unused_tokens=not(add_new_tokens))
        eval_nrel = test_nrel
        logger.info(special_tokens)
        logger.info("***** Test *****")
        logger.info("  Num examples = %d", len(test_examples))
        logger.info("  Batch size = %d", eval_batch_size)
        all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids                                                                                                                                                                                                                                                                                                                                                                                               for f in eval_features], dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
        all_sub_idx = torch.tensor([f.sub_idx for f in eval_features], dtype=torch.long)
        all_obj_idx = torch.tensor([f.obj_idx for f in eval_features], dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids, all_sub_idx, all_obj_idx)
        eval_dataloader = DataLoader(eval_data, batch_size=eval_batch_size)
        eval_label_ids = all_label_ids
    model = RelationModel.from_pretrained(output_dir, num_rel_labels=num_labels)
    model.to(device)
    preds, result, logits = evaluate(model, device, eval_dataloader, eval_label_ids, num_labels, e2e_ngold=eval_nrel)

    logger.info('*** Evaluation Results ***')
    for key in sorted(result.keys()):
        logger.info("  %s = %s", key, str(result[key]))

    print_pred_json(eval_dataset, eval_examples, preds, id2label, os.path.join(output_dir, prediction_file))

01/27/2024 15:48:04 - INFO - run_relation - Generate relation data from C:\Users\odaim\Documents\PURE reproduction/other_data/nyt_er_dataset/norm_train.json
01/27/2024 15:48:11 - INFO - run_relation - #samples: 497776, max #sent.samples: 380
01/27/2024 15:48:12 - INFO - run_relation - Generate relation data from C:\Users\odaim\Documents\PURE reproduction/nyt_models/from-scratch/ent-scib-ctx0/ent_pred_dev.json
01/27/2024 15:48:13 - INFO - run_relation - #samples: 44856, max #sent.samples: 156
01/27/2024 15:48:13 - INFO - run_relation - Generate relation data from C:\Users\odaim\Documents\PURE reproduction/nyt_models/from-scratch/ent-scib-ctx0/ent_pred_test.json
01/27/2024 15:48:15 - INFO - run_relation - #samples: 44516, max #sent.samples: 306
01/27/2024 15:48:16 - INFO - transformers.configuration_utils - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/allenai/scibert_scivocab_uncased/config.json from cache at C:\Users\odaim/.cache\torch\transformers\199e