In [1]:
import json
import os
from tqdm import tqdm
import re
from IPython.core.debugger import set_trace
from pprint import pprint
import unicodedata
from transformers import AutoModel, BasicTokenizer, BertTokenizerFast
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import glob
import time
from layers import LayerNorm
import wandb
from utils import Preprocessor, HandshakingTaggingScheme
import logging
from glove import Glove
import numpy as np

In [2]:
logger = logging.getLogger()
logger.setLevel(logging.WARNING)

In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device {} will be used".format(device))

device cuda:0 will be used


In [4]:
pretrained_model_home = "/data/yubowen/experiments/relextr/pretrained_model"
project_root = "/data/yubowen/experiments/relextr"
data_home = os.path.join(project_root, "data")

experiment_dir = os.path.join(project_root, "exp")
experiment_name = "webnlg_single"
    
webnlg_data_dir = os.path.join(data_home, experiment_name, "triples")
webnlg_train_data_path = os.path.join(webnlg_data_dir, "train_triples.json")
webnlg_valid_data_path = os.path.join(webnlg_data_dir, "valid_triples.json")
webnlg_test_data_path_dict = {
    "test_triples": os.path.join(webnlg_data_dir, "test_triples.json"),
    "test_triples_epo": os.path.join(webnlg_data_dir, "test_split_by_type", "test_triples_epo.json"),
    "test_triples_seo": os.path.join(webnlg_data_dir, "test_split_by_type", "test_triples_seo.json"),
    "test_triples_normal": os.path.join(webnlg_data_dir, "test_split_by_type", "test_triples_normal.json"),
    "test_triples_1": os.path.join(webnlg_data_dir, "test_split_by_num", "test_triples_1.json"),
    "test_triples_2": os.path.join(webnlg_data_dir, "test_split_by_num", "test_triples_2.json"),
    "test_triples_3": os.path.join(webnlg_data_dir, "test_split_by_num", "test_triples_3.json"),
    "test_triples_4": os.path.join(webnlg_data_dir, "test_split_by_num", "test_triples_4.json"),
    "test_triples_5": os.path.join(webnlg_data_dir, "test_split_by_num", "test_triples_5.json"),
}

In [5]:
wandb.init(project = experiment_name, name = "BiLSTM")

Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable


W&B Run: https://app.wandb.ai/wycheng/webnlg_single/runs/2w4et4vj

In [6]:
# hyperparameters
config = wandb.config          # Initialize config
config.batch_size = 6          # input batch size for training (default: 64)
config.test_batch_size = 100    # input batch size for testing (default: 1000)
config.epochs = 50             # number of epochs to train (default: 10)
config.lr = 1e-3               # learning rate (default: 0.01)
config.seed = 2333               # random seed (default: 42)
config.log_interval = 10  
config.max_seq_len = 100
config.sliding_len = 20
config.loss_weight_recover_steps = 10000

config.word_embedding_dim = 100
config.rnn_hidden_size = 256
config.dropout = 0.1

config.word_embedding_path = os.path.join(data_home, "pretrained_word_embeddings", "glove_100_webnlg.emb")

torch.manual_seed(config.seed) # pytorch random seed
torch.backends.cudnn.deterministic = True

model_state_dict_dir = wandb.run.dir
schedule_state_dict_dir = wandb.run.dir

Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable


# Load Data

In [7]:
webnlg_train_data = json.load(open(webnlg_train_data_path, "r", encoding = "utf-8"))
webnlg_valid_data = json.load(open(webnlg_valid_data_path, "r", encoding = "utf-8"))
webnlg_test_data_dict = {}
for file_name, path in webnlg_test_data_path_dict.items():
    webnlg_test_data_dict[file_name] = json.load(open(path, "r", encoding = "utf-8"))

# Preprocess

In [8]:
def get_tok2char_span_map(text):
    tokens = text.split(" ")
    tok2char_span = []
    char_num = 0
    for tok in tokens:
        tok2char_span.append((char_num, char_num + len(tok)))
        char_num += len(tok) + 1 # +1: whitespace
    return tok2char_span

In [9]:
def tran2normal_samples(data):
    normal_sample_list = []
    for sample in tqdm(data, desc = "Transforming data format"):
        text = sample["text"]
        spo_list = sample["triple_list"]
        normal_sample = {
            "text": text,
            "id": sample["id"],
        }
        normal_rel_list = []
        for rel in spo_list:
            normal_rel_list.append({
                "subject": rel[0],
                "predicate": rel[1],
                "object": rel[2],
            })
        normal_sample["relation_list"] = normal_rel_list
        normal_sample_list.append(normal_sample)
    return normal_sample_list

In [10]:
preprocessor = Preprocessor(transform_func = tran2normal_samples, 
                            get_tok2char_span_map_func = get_tok2char_span_map)

train_data = preprocessor.get_normal_dataset(webnlg_train_data, add_id = True, dataset_name = "train")
valid_data = preprocessor.get_normal_dataset(webnlg_valid_data, add_id = True, dataset_name = "valid")
test_data_dict = {}
for file_name, data in webnlg_test_data_dict.items():
    preprocessed_data = preprocessor.get_normal_dataset(data, add_id = True, dataset_name = "file_name")
    test_data_dict[file_name] = preprocessed_data

5019it [00:00, 531033.04it/s]
Transforming data format: 100%|██████████| 5019/5019 [00:00<00:00, 154668.91it/s]
Adding token level spans: 100%|██████████| 5019/5019 [00:01<00:00, 4952.29it/s]
500it [00:00, 365548.54it/s]
Transforming data format: 100%|██████████| 500/500 [00:00<00:00, 139623.97it/s]
Adding token level spans: 100%|██████████| 500/500 [00:00<00:00, 4825.71it/s]
703it [00:00, 407940.75it/s]
Transforming data format: 100%|██████████| 703/703 [00:00<00:00, 101679.22it/s]
Adding token level spans: 100%|██████████| 703/703 [00:00<00:00, 4626.74it/s]
26it [00:00, 78398.21it/s]
Transforming data format: 100%|██████████| 26/26 [00:00<00:00, 62854.12it/s]
Adding token level spans: 100%|██████████| 26/26 [00:00<00:00, 3041.64it/s]
457it [00:00, 318246.21it/s]
Transforming data format: 100%|██████████| 457/457 [00:00<00:00, 80254.44it/s]
Adding token level spans: 100%|██████████| 457/457 [00:00<00:00, 4854.97it/s]
246it [00:00, 371685.44it/s]
Transforming data format: 100%|████████

# Split

In [11]:
def split_into_short_samples(sample_list, sliding_len = 50):
    new_sample_list = []
    for sample in tqdm(sample_list, desc = "Splitting"):
        text_id = sample["id"]
        text = sample["text"]
        
        offset_map = get_tok2char_span_map(text)
        tokens = [text[a:b] for a,b in offset_map]
        
        # sliding on token level
        split_sample_list = []
        for start_ind in range(0, len(tokens), sliding_len):
            end_ind = start_ind + config.max_seq_len
#             while "##" in tokens[end_ind]:
#                 end_ind += 1
            char_span_list = offset_map[start_ind:end_ind]
            char_level_span = (char_span_list[0][0], char_span_list[-1][1])
            sub_text = text[char_level_span[0]:char_level_span[1]]

            new_sample = {
                "id": text_id,
                "text": sub_text,
                "relation_list": []
            }
            for rel in sample["relation_list"]:
                subj_span = rel["subj_span"]
                obj_span = rel["obj_span"]
                if subj_span[0] >= start_ind and subj_span[1] <= end_ind \
                    and obj_span[0] >= start_ind and obj_span[1] <= end_ind:
                    new_rel = copy.deepcopy(rel)
                    new_rel["subj_span"] = (subj_span[0] - start_ind, subj_span[1] - start_ind)
                    new_rel["obj_span"] = (obj_span[0] - start_ind, obj_span[1] - start_ind)
                    new_sample["relation_list"].append(new_rel)
#                 else:
#                     set_trace()
            if len(new_sample["relation_list"]) > 0:
                split_sample_list.append(new_sample)
#         if len(split_sample_list) == 0:
#             set_trace()
        new_sample_list.extend(split_sample_list)
    return new_sample_list

In [12]:
short_train_data = split_into_short_samples(train_data, sliding_len = config.sliding_len)
short_valid_data = split_into_short_samples(valid_data, sliding_len = config.sliding_len)

Splitting: 100%|██████████| 5019/5019 [00:00<00:00, 5093.94it/s]
Splitting: 100%|██████████| 500/500 [00:00<00:00, 7473.76it/s]


In [13]:
short_test_data_dict = {}
for file_name, data in test_data_dict.items():
    short_test_data = split_into_short_samples(data, sliding_len = config.sliding_len)
    short_test_data_dict[file_name] = short_test_data

Splitting: 100%|██████████| 703/703 [00:00<00:00, 7418.19it/s]
Splitting: 100%|██████████| 26/26 [00:00<00:00, 3073.18it/s]
Splitting: 100%|██████████| 457/457 [00:00<00:00, 5398.77it/s]
Splitting: 100%|██████████| 246/246 [00:00<00:00, 16120.85it/s]
Splitting: 100%|██████████| 266/266 [00:00<00:00, 15762.94it/s]
Splitting: 100%|██████████| 171/171 [00:00<00:00, 8309.88it/s]
Splitting: 100%|██████████| 131/131 [00:00<00:00, 5844.38it/s]
Splitting: 100%|██████████| 90/90 [00:00<00:00, 4064.82it/s]
Splitting: 100%|██████████| 45/45 [00:00<00:00, 2782.93it/s]


In [14]:
print("train: {}".format(len(short_train_data)), "valid: {}".format(len(short_valid_data)))
for fil_name, data in short_test_data_dict.items():
        print("{}: {}".format(fil_name, len(data)))

train: 5882 valid: 580
test_triples: 827
test_triples_epo: 36
test_triples_seo: 576
test_triples_normal: 251
test_triples_1: 271
test_triples_2: 185
test_triples_3: 154
test_triples_4: 137
test_triples_5: 80


# Tagging

In [15]:
from collections import defaultdict
rel_set = set()
word2num = defaultdict(int)
word2idx = {'<PAD>':0, '<UNK>':1}
idx2word = {}
idx = 2

all_data = train_data + valid_data 
for data in list(short_test_data_dict.values()):
    all_data.extend(data)
    
for sample in tqdm(all_data):
    for rel in sample["relation_list"]:
        rel_set.add(rel["predicate"])
    text = sample['text']
    span_list = get_tok2char_span_map(text)
    for span in span_list:
        word = text[span[0]:span[1]]
        word2num[word] += 1

#过滤出现次数小于3的word
for k,v in word2num.items():
    if v < 3:
        continue
    word2idx[k] = idx
    idx += 1
for k,v in word2idx.items():
    idx2word[v] = k
rel_set = sorted(rel_set)

100%|██████████| 8036/8036 [00:00<00:00, 24785.95it/s]


In [16]:
len(word2idx)

2895

In [17]:
rel2id = {rel:ind for ind, rel in enumerate(rel_set)}

In [18]:
handshaking_tagger = HandshakingTaggingScheme(rel2id = rel2id, max_seq_len = config.max_seq_len)

In [19]:
def sample_equal_to(sample1, sample2):
    assert sample1["id"] == sample2["id"]
    assert sample1["text"] == sample2["text"]
    memory_set = set()
    for rel in sample2["relation_list"]:
        memory = "{}\u2E80{}\u2E80{}\u2E80{}\u2E80{}".format(rel["subject"], 
                                                             rel["predicate"], 
                                                             rel["object"], 
                                                             str(rel["subj_span"]), 
                                                             str(rel["obj_span"]))
        memory_set.add(memory)
    for rel in sample1["relation_list"]:
        memory = "{}\u2E80{}\u2E80{}\u2E80{}\u2E80{}".format(rel["subject"], 
                                                             rel["predicate"], 
                                                             rel["object"], 
                                                             str(rel["subj_span"]), 
                                                             str(rel["obj_span"]))
        if memory not in memory_set:
            set_trace()
            return False
    return True

# Dataset

In [20]:
# @specific
def get_indexed_train_valid_data(data):
    indexed_samples = []
    for ind, sample in tqdm(enumerate(data), desc = "Generate indexed train or valid data"):
        text = sample["text"]
        text_id = sample["id"]
        
        
        # tagging
        spots_tuple = handshaking_tagger.get_spots(sample)
        offset_map = get_tok2char_span_map(text)
        input_ids = []
        for idx in offset_map:
            word = text[idx[0]:idx[1]]
            if word not in word2idx:
                input_ids.append(word2idx['<UNK>'])
            else:
                input_ids.append(word2idx[word])
        if len(input_ids) < config.max_seq_len:
            input_ids.extend([word2idx['<PAD>']] * (config.max_seq_len - len(input_ids)))
        input_ids = torch.tensor(input_ids[:config.max_seq_len])
        
        sample_tp = (text_id,
                     text, 
                     input_ids,
                     offset_map,
                     spots_tuple,
                    )
        indexed_samples.append(sample_tp)       
    return indexed_samples

In [21]:
# @specific
def get_indexed_pred_data(data):
    indexed_samples = []
    for ind, sample in tqdm(enumerate(data), desc = "Generate indexed pred data"):
        text = sample["text"] 
        text_id = sample["id"]
        # @specific
        codes = tokenizer.encode_plus(text, 
                                    return_offsets_mapping = True, 
                                    add_special_tokens = False,
                                    max_length = max_seq_len, 
                                    pad_to_max_length = True)
        
        input_ids = torch.tensor(codes["input_ids"]).long()
        attention_mask = torch.tensor(codes["attention_mask"]).long()
        token_type_ids = torch.tensor(codes["token_type_ids"]).long()
        offset_map = codes["offset_mapping"]

        sample_tp = (text_id,
                     text, 
                     input_ids,
                     attention_mask,
                     token_type_ids,
                     offset_map,
                     )
        indexed_samples.append(sample_tp)       
    return indexed_samples

In [22]:
class MyDataset(Dataset):
    def __init__(self, data):
        self.data = data
        
    def __getitem__(self, index):
        return self.data[index]
    
    def __len__(self):
        return len(self.data)

In [23]:
def generate_train_dev_batch(batch_data):
    text_id_list = []
    text_list = []
    input_ids_list = []
    attention_mask_list = []
    token_type_ids_list = [] 
    offset_map_list = []
    ent_spots_list = []
    head_rel_spots_list = []
    tail_rel_spots_list = []
    
    for sample in batch_data:
        text_id_list.append(sample[0])
        text_list.append(sample[1])
        input_ids_list.append(sample[2])    
        offset_map_list.append(sample[3])
        
        ent_matrix_spots, head_rel_matrix_spots, tail_rel_matrix_spots = sample[4]
        ent_spots_list.append(ent_matrix_spots)
        head_rel_spots_list.append(head_rel_matrix_spots)
        tail_rel_spots_list.append(tail_rel_matrix_spots)
    
    # @specific: codes indexed by bert tokenizer
    batch_input_ids = torch.stack(input_ids_list, dim = 0)
    batch_attention_mask = None
    batch_token_type_ids = None

    batch_ent_shaking_tag = handshaking_tagger.sharing_spots2shaking_tag4batch(ent_spots_list)
    batch_head_rel_shaking_tag = handshaking_tagger.spots2shaking_tag4batch(head_rel_spots_list)
    batch_tail_rel_shaking_tag = handshaking_tagger.spots2shaking_tag4batch(tail_rel_spots_list)

    return text_id_list, text_list, batch_input_ids, batch_attention_mask, batch_token_type_ids, offset_map_list, batch_ent_shaking_tag, batch_head_rel_shaking_tag, batch_tail_rel_shaking_tag

In [24]:
def generate_pred_batch(batch_data):
    text_ids = []
    text_list = []
    input_ids = []
    attention_mask = []
    token_type_ids = [] 
    offset_map = []
    for sample in batch_data:
        text_ids.append(sample[0])
        text_list.append(sample[1])
        input_ids.append(sample[2])
        attention_mask.append(sample[3])        
        token_type_ids.append(sample[4])        
        offset_map.append(sample[5])
    input_ids = torch.stack(input_ids, dim = 0)
    attention_mask = torch.stack(attention_mask, dim = 0)
    token_type_ids = torch.stack(token_type_ids, dim = 0)
    return text_ids, text_list, input_ids, attention_mask, token_type_ids, offset_map

In [25]:
# @uni
def get_train_dev_dataloader_gen(indexed_train_sample_list, indexed_dev_sample_list, batch_size):
    train_dataloader = DataLoader(MyDataset(indexed_train_sample_list), 
                                      batch_size = batch_size, 
                                      shuffle = True, 
                                      num_workers = 6,
                                      drop_last = False,
                                      collate_fn = generate_train_dev_batch,
                                     )
    dev_dataloader = DataLoader(MyDataset(indexed_dev_sample_list), 
                              batch_size = batch_size, 
                              shuffle = True, 
                              num_workers = 6,
                              drop_last = False,
                              collate_fn = generate_train_dev_batch,
                             )
    return train_dataloader, dev_dataloader

In [26]:
indexed_train_data = get_indexed_train_valid_data(short_train_data)

Generate indexed train or valid data: 5882it [00:00, 7797.31it/s] 


In [27]:
indexed_valid_data = get_indexed_train_valid_data(short_valid_data)

Generate indexed train or valid data: 580it [00:00, 11233.93it/s]


In [28]:
# have a look at dataloader
train_dataloader, dev_dataloader = get_train_dev_dataloader_gen(indexed_train_data, indexed_valid_data, 32)

In [29]:
train_data_iter = iter(train_dataloader)
batch_data = next(train_data_iter)
text_id_list, text_list, batch_input_ids, \
batch_attention_mask, batch_token_type_ids, \
offset_map_list, batch_ent_shaking_tag, \
batch_head_rel_shaking_tag, batch_tail_rel_shaking_tag = batch_data

print(text_list[0])
print()
print(batch_input_ids[0].tolist())
print(batch_input_ids.size())
# print(batch_attention_mask.size())
# print(batch_token_type_ids.size())
print(len(offset_map_list))
print(batch_ent_shaking_tag.size())
print(batch_head_rel_shaking_tag.size())
print(batch_tail_rel_shaking_tag.size())

los Reyes , in Spain .

[973, 974, 4, 12, 966, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
torch.Size([32, 100])
32
torch.Size([32, 5050])
torch.Size([32, 171, 5050])
torch.Size([32, 171, 5050])


# Load Word Embedding Matrix

In [30]:
glove = Glove()
glove = glove.load(config.word_embedding_path)

In [31]:
# prepare embedding matrix
word_embedding_init_matrix = np.random.normal(-1, 1, size=(len(word2idx), config.word_embedding_dim))
count_in = 0

# 在预训练词向量中的用该预训练向量
# 不在预训练集里的用随机向量
for ind, tok in tqdm(idx2word.items(), desc="Embedding matrix initializing..."):
    if tok in glove.dictionary:
        count_in += 1
        word_embedding_init_matrix[ind] = glove.word_vectors[glove.dictionary[tok]]
        
print(count_in / len(idx2word)) # 命中预训练词向量的比例

Embedding matrix initializing...: 100%|██████████| 2895/2895 [00:00<00:00, 239956.33it/s]

0.9993091537132988





In [32]:
word_embedding_init_matrix = torch.FloatTensor(word_embedding_init_matrix)
word_embedding_init_matrix.size()

torch.Size([2895, 100])

# Model

In [33]:
class RelExtractor(nn.Module):
    def __init__(self, init_word_embedding_matrix, hidden_size, dropout, rel_size):
        super().__init__()
        # BiLSTM encoder
        self.word_embeds = nn.Embedding.from_pretrained(init_word_embedding_matrix, freeze = False)
        self.dropout = nn.Dropout(dropout)
        self.lstm = nn.LSTM(init_word_embedding_matrix.size()[-1], 
                            hidden_size // 2, 
                            num_layers = 2, 
                            bidirectional = True, 
                            batch_first = True)


        self.ent_fc = nn.Linear(hidden_size, 2)
        self.head_rel_fc_list = [nn.Linear(hidden_size, 3) for _ in range(rel_size)]
        self.tail_rel_fc_list = [nn.Linear(hidden_size, 3) for _ in range(rel_size)]
        
        for ind, fc in enumerate(self.head_rel_fc_list):
            self.register_parameter("weight_4_head_rel{}".format(ind), fc.weight)
            self.register_parameter("bias_4_head_rel{}".format(ind), fc.bias)
        for ind, fc in enumerate(self.tail_rel_fc_list):
            self.register_parameter("weight_4_tail_rel{}".format(ind), fc.weight)
            self.register_parameter("bias_4_tail_rel{}".format(ind), fc.bias)
            
        # conditional layer normaliztion
        fake_inputs = torch.zeros([config.batch_size, config.max_seq_len, hidden_size])
        self.cond_layer_norm = LayerNorm(fake_inputs.size(), hidden_size, conditional = True)
        
    def forward(self, input_ids, attention_mask, token_type_ids):
        # BiLSTM encoder
        embedding = self.word_embeds(input_ids)
        outputs, hidden = self.lstm(embedding)
        last_hidden_state = self.dropout(outputs)  # last_hidden_state: (batch_size, seq_len, hidden_size) 
        
        # shaking_hiddens: (batch_size, 1 + ... + seq_len, hidden_size)
        shaking_hiddens = self.shake_hands_afterwards(last_hidden_state)
        
        ent_shaking_outputs = self.ent_fc(shaking_hiddens)
            
        head_rel_shaking_outputs_list = []
        for fc in self.head_rel_fc_list:
            head_rel_shaking_outputs_list.append(fc(shaking_hiddens))
            
        tail_rel_shaking_outputs_list = []
        for fc in self.tail_rel_fc_list:
            tail_rel_shaking_outputs_list.append(fc(shaking_hiddens))
        
        head_rel_shaking_outputs = torch.stack(head_rel_shaking_outputs_list, dim = 1)
        tail_rel_shaking_outputs = torch.stack(tail_rel_shaking_outputs_list, dim = 1)
        
        return ent_shaking_outputs, head_rel_shaking_outputs, tail_rel_shaking_outputs

    def shake_hands_afterwards(self, seq_hiddens):
        '''
        seq_hiddens: (batch_size, seq_len, hidden_size) (32, 3, 5)
        return shake_hands_matrix_hiddens: (batch_size, (1 + seq_len) * seq_len / 2, hidden_size) (32, 5+4+3+2+1, 5)
        '''
        seq_len = seq_hiddens.size()[-2]
        shake_hands_hidden_list = []
        for ind in range(seq_len):
            hidden_each_step = seq_hiddens[:, ind, :]
            # seq_len - ind: only shake afterwards
            repeat_hidden_each_step = hidden_each_step[:, None, :].repeat(1, seq_len - ind, 1) 
    #         shake_hands_hidden = torch.cat([repeat_hidden_each_step, seq_hiddens[:, ind:, :]], dim = -1)
            shake_hands_hidden = self.cond_layer_norm(seq_hiddens[:, ind:, :], repeat_hidden_each_step)
            shake_hands_hidden_list.append(shake_hands_hidden)
        shake_hands_matrix_hiddens = torch.cat(shake_hands_hidden_list, dim = 1)
        return shake_hands_matrix_hiddens

In [34]:
rel_extractor = RelExtractor(word_embedding_init_matrix, config.rnn_hidden_size, config.dropout, len(rel2id))
rel_extractor = rel_extractor.to(device)

In [35]:
def bias_loss(weights = None):
    if weights is not None:
        weights = torch.FloatTensor(weights).to(device)
    cross_en = nn.CrossEntropyLoss(weight = weights)  
    return lambda pred, target: cross_en(pred.view(-1, pred.size()[-1]), target.view(-1))
loss_func = bias_loss()

In [36]:
def get_sample_accuracy(pred, truth):
    '''
    计算所有抽取字段都正确的样本比例
    即该batch的输出与truth全等的样本比例
    '''
    # (batch_size, ..., seq_len, tag_size) -> (batch_size, ..., seq_len)
    pred_id = torch.argmax(pred, dim = -1)
    # (batch_size, ..., seq_len) -> (batch_size, )，把每个sample压成一条seq
    pred_id = pred_id.view(pred_id.size()[0], -1)
    truth = truth.view(truth.size()[0], -1)
    
    # (batch_size, )，每个元素是pred与truth之间tag相同的数量
    correct_tag_num = torch.sum(torch.eq(truth, pred_id).float(), dim = 1)

    # seq维上所有tag必须正确，所以correct_tag_num必须等于seq的长度才算一个correct的sample
    sample_acc_ = torch.eq(correct_tag_num, torch.ones_like(correct_tag_num) * truth.size()[-1]).float()
    sample_acc = torch.mean(sample_acc_)
    
    return sample_acc

In [37]:
def get_rel_cpg(text_list, offset_map_list, 
                 batch_pred_ent_shaking_outputs,
                 batch_pred_head_rel_shaking_outputs,
                 batch_pred_tail_rel_shaking_outputs,
                 batch_gold_ent_shaking_tag,
                 batch_gold_head_rel_shaking_tag,
                 batch_gold_tail_rel_shaking_tag):
    batch_pred_ent_shaking_tag = torch.argmax(batch_pred_ent_shaking_outputs, dim = -1)
    batch_pred_head_rel_shaking_tag = torch.argmax(batch_pred_head_rel_shaking_outputs, dim = -1)
    batch_pred_tail_rel_shaking_tag = torch.argmax(batch_pred_tail_rel_shaking_outputs, dim = -1)
    
    correct_num, pred_num, gold_num = 0, 0, 0
    for ind in range(len(text_list)):
        text = text_list[ind]
        offset_map = offset_map_list[ind]
        gold_ent_shaking_tag, pred_ent_shaking_tag = batch_gold_ent_shaking_tag[ind], batch_pred_ent_shaking_tag[ind]
        gold_head_rel_shaking_tag, pred_head_rel_shaking_tag = batch_gold_head_rel_shaking_tag[ind], batch_pred_head_rel_shaking_tag[ind]
        gold_tail_rel_shaking_tag, pred_tail_rel_shaking_tag = batch_gold_tail_rel_shaking_tag[ind], batch_pred_tail_rel_shaking_tag[ind]
        
        pred_rel_list = handshaking_tagger.decode_rel_fr_shaking_tag(text, 
                                                  pred_ent_shaking_tag, 
                                                  pred_head_rel_shaking_tag, 
                                                  pred_tail_rel_shaking_tag, 
                                                  offset_map)
        gold_rel_list = handshaking_tagger.decode_rel_fr_shaking_tag(text, 
                                                  gold_ent_shaking_tag, 
                                                  gold_head_rel_shaking_tag, 
                                                  gold_tail_rel_shaking_tag, 
                                                  offset_map)

        gold_rel_set = set(["{}\u2E80{}\u2E80{}".format(rel["subject"], rel["predicate"], rel["object"]) for rel in gold_rel_list])
        pred_rel_set = set(["{}\u2E80{}\u2E80{}".format(rel["subject"], rel["predicate"], rel["object"]) for rel in pred_rel_list])
        
        for rel_str in pred_rel_set:
            if rel_str in gold_rel_set:
                correct_num += 1
        
        pred_num += len(gold_rel_set)
        gold_num += len(pred_rel_set)
        
    return correct_num, pred_num, gold_num

In [38]:
def get_scores(correct_num, pred_num, gold_num):
    minimini = 1e-10
    precision = correct_num / (pred_num + minimini)
    recall = correct_num / (gold_num + minimini)
    f1 = 2 * precision * recall / (precision + recall + minimini)
    return precision, recall, f1

# Train

In [39]:
# train step
def train_step(batch_train_data, optimizer, loss_weights):
    text_id_list, text_list, batch_input_ids, \
    batch_attention_mask, batch_token_type_ids, \
    offset_map_list, batch_ent_shaking_tag, \
    batch_head_rel_shaking_tag, batch_tail_rel_shaking_tag = batch_train_data
    
    batch_input_ids, \
    batch_attention_mask, \
    batch_token_type_ids, \
    batch_ent_shaking_tag, \
    batch_head_rel_shaking_tag, \
    batch_tail_rel_shaking_tag = (batch_input_ids.to(device), 
                                  None,
                                  None,
                              batch_ent_shaking_tag.to(device), 
                              batch_head_rel_shaking_tag.to(device), 
                              batch_tail_rel_shaking_tag.to(device)
                             )

    # zero the parameter gradients
    optimizer.zero_grad()
    
    ent_shaking_outputs, \
    head_rel_shaking_outputs, \
    tail_rel_shaking_outputs = rel_extractor(batch_input_ids, 
                                              batch_attention_mask, 
                                              batch_token_type_ids, 
                                             )

    w_ent, w_rel = loss_weights["ent"], loss_weights["rel"]
    loss = w_ent * loss_func(ent_shaking_outputs, batch_ent_shaking_tag) + \
            w_rel * loss_func(head_rel_shaking_outputs, batch_head_rel_shaking_tag) + \
            w_rel * loss_func(tail_rel_shaking_outputs, batch_tail_rel_shaking_tag)
    
    # bp time: 2s
    loss.backward()
    optimizer.step()
    
    ent_sample_acc = get_sample_accuracy(ent_shaking_outputs, 
                                          batch_ent_shaking_tag)
    head_rel_sample_acc = get_sample_accuracy(head_rel_shaking_outputs, 
                                             batch_head_rel_shaking_tag)
    tail_rel_sample_acc = get_sample_accuracy(tail_rel_shaking_outputs, 
                                             batch_tail_rel_shaking_tag)
    
    return loss.item(), ent_sample_acc.item(), head_rel_sample_acc.item(), tail_rel_sample_acc.item()

# valid step
def valid_step(batch_valid_data):
    text_id_list, text_list, batch_input_ids, \
    batch_attention_mask, batch_token_type_ids, \
    offset_map_list, batch_ent_shaking_tag, \
    batch_head_rel_shaking_tag, batch_tail_rel_shaking_tag = batch_valid_data
    
    batch_input_ids, \
    batch_attention_mask, \
    batch_token_type_ids, \
    batch_ent_shaking_tag, \
    batch_head_rel_shaking_tag, \
    batch_tail_rel_shaking_tag = (batch_input_ids.to(device), 
                                  None,
                                  None,
                              batch_ent_shaking_tag.to(device), 
                              batch_head_rel_shaking_tag.to(device), 
                              batch_tail_rel_shaking_tag.to(device)
                             )
    with torch.no_grad():
        ent_shaking_outputs, \
        head_rel_shaking_outputs, \
        tail_rel_shaking_outputs = rel_extractor(batch_input_ids, 
                                                  batch_attention_mask, 
                                                  batch_token_type_ids, 
                                                 )
    
    ent_sample_acc = get_sample_accuracy(ent_shaking_outputs, 
                                          batch_ent_shaking_tag)
    head_rel_sample_acc = get_sample_accuracy(head_rel_shaking_outputs, 
                                             batch_head_rel_shaking_tag)
    tail_rel_sample_acc = get_sample_accuracy(tail_rel_shaking_outputs, 
                                             batch_tail_rel_shaking_tag)
    
    rel_cpg = get_rel_cpg(text_list, offset_map_list, 
                            ent_shaking_outputs,
                            head_rel_shaking_outputs,
                            tail_rel_shaking_outputs,
                            batch_ent_shaking_tag,
                            batch_head_rel_shaking_tag,
                            batch_tail_rel_shaking_tag)
    
    return ent_sample_acc.item(), head_rel_sample_acc.item(), tail_rel_sample_acc.item(), rel_cpg

In [40]:
max_f1 = 0.
def train_n_valid(train_dataloader, dev_dataloader, optimizer, scheduler, num_epoch):  
    def train(dataloader, ep):
        # train
        rel_extractor.train()
        
        t_ep = time.time()
        start_lr = optimizer.param_groups[0]['lr']
        total_loss, total_ent_sample_acc, total_head_rel_sample_acc, total_tail_rel_sample_acc = 0., 0., 0., 0.
        for batch_ind, batch_train_data in enumerate(dataloader):
            t_batch = time.time()
            z = (2 * len(rel2id) + 1)
            steps_per_ep = len(dataloader)
            total_steps = config.loss_weight_recover_steps
            current_step = steps_per_ep * ep + batch_ind
            w_ent = max(1 / z + 1 - current_step / total_steps, 1 / z)
            w_rel = min((len(rel2id) / z) * current_step / total_steps, (len(rel2id) / z))
            loss_weights = {"ent": w_ent, "rel": w_rel}
            loss, ent_sample_acc, head_rel_sample_acc, tail_rel_sample_acc = train_step(batch_train_data, optimizer, loss_weights)
            scheduler.step()
            
            total_loss += loss
            total_ent_sample_acc += ent_sample_acc
            total_head_rel_sample_acc += head_rel_sample_acc
            total_tail_rel_sample_acc += tail_rel_sample_acc
            
            avg_loss = total_loss / (batch_ind + 1)
            avg_ent_sample_acc = total_ent_sample_acc / (batch_ind + 1)
            avg_head_rel_sample_acc = total_head_rel_sample_acc / (batch_ind + 1)
            avg_tail_rel_sample_acc = total_tail_rel_sample_acc / (batch_ind + 1)
            
            batch_print_format = "\rEpoch: {}/{}, batch: {}/{}, train_loss: {}, " + \
                                "t_ent_sample_acc: {}, t_head_rel_sample_acc: {}, t_tail_rel_sample_acc: {}," + \
                                 "lr: {}, batch_time: {}, total_time: {} -------------"
                    
            print(batch_print_format.format(ep + 1, num_epoch, 
                                            batch_ind + 1, len(dataloader), 
                                            avg_loss, 
                                            avg_ent_sample_acc,
                                            avg_head_rel_sample_acc,
                                            avg_tail_rel_sample_acc,
                                            optimizer.param_groups[0]['lr'],
                                            time.time() - t_batch,
                                            time.time() - t_ep,
                                           ), end="")
            
            if batch_ind % config.log_interval == 0:
                wandb.log({
                    "train_loss": avg_loss,
                    "train_ent_seq_acc": avg_ent_sample_acc,
                    "train_head_rel_acc": avg_head_rel_sample_acc,
                    "train_tail_rel_acc": avg_tail_rel_sample_acc,
                    "learning_rate": optimizer.param_groups[0]['lr'],
                    "time": time.time() - t_ep,
                })
        
    def valid(dataloader, ep):
        # valid
        rel_extractor.eval()
        
        t_ep = time.time()
        total_ent_sample_acc, total_head_rel_sample_acc, total_tail_rel_sample_acc = 0., 0., 0.
        total_rel_correct_num, total_rel_pred_num, total_rel_gold_num = 0, 0, 0
        for batch_ind, batch_valid_data in enumerate(tqdm(dataloader, desc = "Validating")):
            ent_sample_acc, head_rel_sample_acc, tail_rel_sample_acc, rel_cpg = valid_step(batch_valid_data)

            total_ent_sample_acc += ent_sample_acc
            total_head_rel_sample_acc += head_rel_sample_acc
            total_tail_rel_sample_acc += tail_rel_sample_acc
            
            total_rel_correct_num += rel_cpg[0]
            total_rel_pred_num += rel_cpg[1]
            total_rel_gold_num += rel_cpg[2]

        avg_ent_sample_acc = total_ent_sample_acc / len(dataloader)
        avg_head_rel_sample_acc = total_head_rel_sample_acc / len(dataloader)
        avg_tail_rel_sample_acc = total_tail_rel_sample_acc / len(dataloader)
        
        rel_prf = get_scores(total_rel_correct_num, total_rel_pred_num, total_rel_gold_num)
        
        log_dict = {
                        "val_ent_seq_acc": avg_ent_sample_acc,
                        "val_head_rel_acc": avg_head_rel_sample_acc,
                        "val_tail_rel_acc": avg_tail_rel_sample_acc,
                        "val_prec": rel_prf[0],
                        "val_recall": rel_prf[1],
                        "val_f1": rel_prf[2],
                        "time": time.time() - t_ep,
                    }
        pprint(log_dict)
        wandb.log(log_dict)
        
        return rel_prf[2]
        
    for ep in range(num_epoch):
        train(train_dataloader, ep)   
        valid_f1 = valid(dev_dataloader, ep)
        
        global max_f1
        if valid_f1 >= max_f1: 
            max_f1 = valid_f1
            if valid_f1 > 0.5: # save the best model
                modle_state_num = len(glob.glob(model_state_dict_dir + "/model_state_dict_*.pt"))
                torch.save(rel_extractor.state_dict(), os.path.join(model_state_dict_dir, "model_state_dict_{}.pt".format(modle_state_num)))
                scheduler_state_num = len(glob.glob(schedule_state_dict_dir + "/scheduler_state_dict_*.pt"))
                torch.save(scheduler.state_dict(), os.path.join(schedule_state_dict_dir, "scheduler_state_dict_{}.pt".format(scheduler_state_num))) 
        print("Current avf_f1: {}, Best f1: {}".format(valid_f1, max_f1))

In [41]:
def get_last_state_path(state_dir, pre_fix):
    max_file_num = -1
    last_state_path = None
    for path in glob.glob(state_dir + "/{}_*.pt".format(pre_fix)):
        file_num = re.search("state_dict_(\d+)\.pt", path).group(1)
        if int(file_num) > max_file_num:
            max_file_num = int(file_num)
            last_state_path = path
    return last_state_path

def get_model_state_path(state_dict_dir, state_dict_num):
    return os.path.join(state_dict_dir, "model_state_dict_{}.pt".format(state_dict_num))

In [42]:
# dataloader
print("preparing dataloader...")
train_dataloader, \
dev_dataloader = get_train_dev_dataloader_gen(indexed_train_data, 
                                            indexed_valid_data, 
                                            config.batch_size, 
                                            )
print("dataloaders done!")

preparing dataloader...
dataloaders done!


In [45]:
# optimizer
init_learning_rate = config.lr
optimizer = torch.optim.Adam(rel_extractor.parameters(), lr = init_learning_rate)
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, len(train_dataloader) * 2)

# decay_rate = 0.99
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 10, gamma = decay_rate)

In [None]:
epoch_num = config.epochs

# load the last best state (if any)
model_last_state_path = get_last_state_path(model_state_dict_dir, "model_state_dict")
if model_last_state_path is not None:
    rel_extractor.load_state_dict(torch.load(model_last_state_path))
    print("------------model state {} loaded ----------------".format(model_last_state_path.split("/")[-1]))
    
scheduler_last_state_path = get_last_state_path(schedule_state_dict_dir, "scheduler_state_dict")  
if scheduler_last_state_path is not None:
    scheduler.load_state_dict(torch.load(scheduler_last_state_path))
    print("------------scheduler state {} loaded ----------------".format(scheduler_last_state_path.split("/")[-1]))

train_n_valid(train_dataloader, dev_dataloader, optimizer, scheduler, epoch_num)

Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable


Epoch: 1/50, batch: 648/981, train_loss: 0.10525941145304521, t_ent_sample_acc: 0.0, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 0.0007548376332955956, batch_time: 1.987919807434082, total_time: 1372.7446355819702 --------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 1, 'content': ['2020-05-31T23:09:50.192969 Epoch: 1/50, batch: 553/981, train_loss: 0.12268471762211236, t_ent_sample_acc: 0.0, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 0.0008170805878433099, batch_time: 1.7877914905548096, total_time: 1162.7392621040344 -------------\r']}, 'wandb-events.jsonl': {'offset': 70, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.64, "system.gpu.0.powerPercent": 15.45, "system.gpu.process.0.powerWatts":

Epoch: 1/50, batch: 981/981, train_loss: 0.07069820342974682, t_ent_sample_acc: 0.0, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 0.000500800609406507, batch_time: 1.0636672973632812, total_time: 2012.4023225307465 --------------

Validating: 100%|██████████| 97/97 [00:42<00:00,  2.30it/s]

{'time': 42.24754452705383,
 'val_ent_seq_acc': 0.0,
 'val_f1': 0.0,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0,
 'val_recall': 0.0,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.0, Best f1: 0.0





Epoch: 2/50, batch: 814/981, train_loss: 0.002853785712999596, t_ent_sample_acc: 0.0010237510542611817, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 1.7982074949055793e-05, batch_time: 2.4107165336608887, total_time: 1814.8526718616486 ---------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 11, 'content': ['2020-05-31T23:51:26.278219 Epoch: 2/50, batch: 720/981, train_loss: 0.0028847315932378275, t_ent_sample_acc: 0.0009259259535206689, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 4.335761978812763e-05, batch_time: 2.4674410820007324, total_time: 1603.8712542057037 -------------\r']}, 'wandb-events.jsonl': {'offset': 147, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.68, "system.gpu.0.powerPercent": 15.47, "system.gpu.

Epoch: 2/50, batch: 981/981, train_loss: 0.002811742359326125, t_ent_sample_acc: 0.0008494733518538246, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 6.409758326331172e-10, batch_time: 1.2008121013641357, total_time: 2191.7397031784058 ---------------

Validating: 100%|██████████| 97/97 [00:50<00:00,  1.91it/s]

{'time': 50.77512216567993,
 'val_ent_seq_acc': 0.0,
 'val_f1': 0.0,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0,
 'val_recall': 0.0,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.0, Best f1: 0.0





Epoch: 3/50, batch: 981/981, train_loss: 0.0020560365302913523, t_ent_sample_acc: 0.003058104066673769, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 0.000500800609406507, batch_time: 1.2323575019836426, total_time: 2192.593568086624 ----------------

Validating: 100%|██████████| 97/97 [00:46<00:00,  2.10it/s]

{'time': 46.25279355049133,
 'val_ent_seq_acc': 0.0,
 'val_f1': 0.0,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0,
 'val_recall': 0.0,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.0, Best f1: 0.0





Epoch: 4/50, batch: 190/981, train_loss: 0.0016870900008239245, t_ent_sample_acc: 0.0008771930085985284, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 0.00035098391018530817, batch_time: 2.1076157093048096, total_time: 414.7561867237091 --------------

requests_with_retry encountered retryable exception: ('Connection aborted.', OSError("(104, 'ECONNRESET')")). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 21, 'content': ['2020-06-01T00:32:14.297944 Epoch: 3/50, batch: 806/981, train_loss: 0.0021220223858135594, t_ent_sample_acc: 0.002274607181105365, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 0.0006390495770444032, batch_time: 2.317578077316284, total_time: 1809.062795639038 -------------\r']}, 'wandb-events.jsonl': {'offset': 223, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.69, "system.gpu.0.powerPercent": 15.48, "system.gpu.process.0.powerWatts": 38.69, "system.gp

Epoch: 4/50, batch: 874/981, train_loss: 0.001537167775997193, t_ent_sample_acc: 0.00457665917529667, t_head_rel_sample_acc: 0.0001906941323040279, t_tail_rel_sample_acc: 0.0001906941323040279,lr: 7.4577303674025485e-06, batch_time: 2.2854623794555664, total_time: 1869.7455115318298 ------------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 31, 'content': ['2020-06-01T01:07:03.407252 Epoch: 4/50, batch: 773/981, train_loss: 0.0015527771789319138, t_ent_sample_acc: 0.00409659348129455, t_head_rel_sample_acc: 0.00021561018322602897, t_tail_rel_sample_acc: 0.00021561018322602897,lr: 2.7738140064872076e-05, batch_time: 2.1358847618103027, total_time: 1659.330600976944 -------------\r']}, 'wandb-events.jsonl': {'offset': 288, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.72, "system.gpu.

Epoch: 4/50, batch: 980/981, train_loss: 0.001523046307942868, t_ent_sample_acc: 0.004591836871541276, t_head_rel_sample_acc: 0.00017006803227930653, t_tail_rel_sample_acc: 0.00017006803227930653,lr: 2.5639016871248365e-09, batch_time: 2.2513272762298584, total_time: 2102.0691912174225 ---------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 4/50, batch: 981/981, train_loss: 0.0015220041871550289, t_ent_sample_acc: 0.005096840095933182, t_head_rel_sample_acc: 0.00016989467037076495, t_tail_rel_sample_acc: 0.00016989467037076495,lr: 6.409758326331172e-10, batch_time: 1.1080141067504883, total_time: 2103.2121245861053 -------------

Validating: 100%|██████████| 97/97 [00:41<00:00,  2.33it/s]

{'time': 41.65100598335266,
 'val_ent_seq_acc': 0.0,
 'val_f1': 0.0,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0,
 'val_recall': 0.0,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.0, Best f1: 0.0





Epoch: 5/50, batch: 981/981, train_loss: 0.0008003613781996255, t_ent_sample_acc: 0.1413523646092439, t_head_rel_sample_acc: 0.0005096840111122948, t_tail_rel_sample_acc: 0.0003397893407415299,lr: 0.000500800609406507, batch_time: 1.1717591285705566, total_time: 2024.1995029449463 ------------------

Validating: 100%|██████████| 97/97 [00:54<00:00,  1.79it/s]


{'time': 54.211540937423706,
 'val_ent_seq_acc': 0.402061864733696,
 'val_f1': 0.0,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0,
 'val_recall': 0.0,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.0, Best f1: 0.0
Epoch: 6/50, batch: 981/981, train_loss: 0.00029637683575604325, t_ent_sample_acc: 0.40299015451947967, t_head_rel_sample_acc: 0.0006795786814830598, t_tail_rel_sample_acc: 0.0006795786814830598,lr: 6.409758326331172e-10, batch_time: 1.1538288593292236, total_time: 2202.0729405879974 --------------

Validating: 100%|██████████| 97/97 [01:10<00:00,  1.37it/s]


{'time': 70.8852527141571,
 'val_ent_seq_acc': 0.44501719201348494,
 'val_f1': 0.0,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0,
 'val_recall': 0.0,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.0, Best f1: 0.0
Epoch: 7/50, batch: 70/981, train_loss: 0.0002498073136458905, t_ent_sample_acc: 0.40476191363164354, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 0.0009969514164096546, batch_time: 3.0315797328948975, total_time: 195.3593442440033 ---------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 50, 'content': ['ERROR 2020-06-01T02:27:27.179828 Validating:  85%|████████▍ | 82/97 [00:58<00:12,  1.20it/s]\r']}, 'wandb-events.jsonl': {'offset': 439, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.67, "system.gpu.0.powerPercent": 15.47, "system.gpu.process.0.powerWatts": 38.67, "system.gpu.process.0.powerPercent": 15.47, "system.gpu.1.gpu": 99.4, "system.gpu.1.memory": 15.93, "system.gpu.1.memoryAllocated": 65.3, "system.gpu.1.temp": 47.27, "s

Epoch: 7/50, batch: 124/981, train_loss: 0.0002444412928584385, t_ent_sample_acc: 0.42876345027358304, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 0.0009903339800947284, batch_time: 2.9977424144744873, total_time: 350.43902564048767 -------------

requests_with_retry encountered retryable exception: 408 Client Error: Request Timeout for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 60, 'content': ['2020-06-01T02:32:31.479805 Epoch: 7/50, batch: 103/981, train_loss: 0.00024160730475345085, t_ent_sample_acc: 0.43527509084025634, t_head_rel_sample_acc: 0.0, t_tail_rel_sample_acc: 0.0,lr: 0.0009933460967532454, batch_time: 1.9075875282287598, total_time: 290.2091944217682 -------------\r']}, 'wandb-events.jsonl': {'offset': 448, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.72, "system.gpu.0.powerPercent": 1

Epoch: 7/50, batch: 776/981, train_loss: 0.000197830406925521, t_ent_sample_acc: 0.48367698698006956, t_head_rel_sample_acc: 0.0006443299161097439, t_tail_rel_sample_acc: 0.0006443299161097439,lr: 0.0006619511346266997, batch_time: 2.9478836059570312, total_time: 2192.1189062595367 ----------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 61, 'content': ['2020-06-01T03:00:43.400702 Epoch: 7/50, batch: 701/981, train_loss: 0.00020468214502546985, t_ent_sample_acc: 0.47123158483974603, t_head_rel_sample_acc: 0.00047551118868393834, t_tail_rel_sample_acc: 0.0007132667830259075,lr: 0.0007174569593104108, batch_time: 3.030891180038452, total_time: 1981.3023533821106 -------------\r']}, 'wandb-events.jsonl': {'offset': 501, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.69, "system.gpu.0

Epoch: 7/50, batch: 980/981, train_loss: 0.00018687286682859627, t_ent_sample_acc: 0.4967687188210536, t_head_rel_sample_acc: 0.0010204081936758392, t_tail_rel_sample_acc: 0.0008503401613965326,lr: 0.0005016012167603286, batch_time: 2.786871910095215, total_time: 2761.8176488876343 ---------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 7/50, batch: 981/981, train_loss: 0.00018704841763871304, t_ent_sample_acc: 0.4962623286897375, t_head_rel_sample_acc: 0.0010193680222245897, t_tail_rel_sample_acc: 0.0008494733518538246,lr: 0.000500800609406507, batch_time: 1.3990917205810547, total_time: 2763.2507259845734 -------------

Validating: 100%|██████████| 97/97 [01:17<00:00,  1.25it/s]

{'time': 77.77986669540405,
 'val_ent_seq_acc': 0.5584192577096605,
 'val_f1': 0.0,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0,
 'val_recall': 0.0,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.0, Best f1: 0.0





Epoch: 8/50, batch: 980/981, train_loss: 0.0001046459397058447, t_ent_sample_acc: 0.6197279069040503, t_head_rel_sample_acc: 0.0020408163873516785, t_tail_rel_sample_acc: 0.002891156548748211,lr: 2.5639016871248365e-09, batch_time: 3.1375083923339844, total_time: 2765.862104654312 ----------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 8/50, batch: 981/981, train_loss: 0.00010455616184330625, t_ent_sample_acc: 0.6201155441039442, t_head_rel_sample_acc: 0.0020387360444491793, t_tail_rel_sample_acc: 0.002888209396303004,lr: 6.409758326331172e-10, batch_time: 1.4464895725250244, total_time: 2767.341563940048 -------------

Validating: 100%|██████████| 97/97 [01:00<00:00,  1.61it/s]

{'time': 60.071879863739014,
 'val_ent_seq_acc': 0.6099656526882624,
 'val_f1': 0.0,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0,
 'val_recall': 0.0,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.0, Best f1: 0.0





Epoch: 9/50, batch: 652/981, train_loss: 7.992781645372782e-05, t_ent_sample_acc: 0.6275562536817021, t_head_rel_sample_acc: 0.0005112474589991423, t_tail_rel_sample_acc: 0.0005112474589991423,lr: 0.0007520771527566093, batch_time: 3.01727294921875, total_time: 1592.9317939281464 ----------------

requests_with_retry encountered retryable exception: 408 Client Error: Request Timeout for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 80, 'content': ['2020-06-01T04:27:41.855042 Epoch: 9/50, batch: 632/981, train_loss: 8.01295669744728e-05, t_ent_sample_acc: 0.6279008604256036, t_head_rel_sample_acc: 0.0005274261760560772, t_tail_rel_sample_acc: 0.0005274261760560772,lr: 0.0007657738891847679, batch_time: 3.203542709350586, total_time: 1530.804307937622 -------------\r']}, 'wandb-events.jsonl': {'offset': 663, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.69

Epoch: 9/50, batch: 981/981, train_loss: 7.27736074367071e-05, t_ent_sample_acc: 0.6423717464389664, t_head_rel_sample_acc: 0.0013591573629661196, t_tail_rel_sample_acc: 0.0010193680222245897,lr: 0.000500800609406507, batch_time: 1.4261517524719238, total_time: 2522.0325191020966 ---------------

Validating: 100%|██████████| 97/97 [01:14<00:00,  1.30it/s]

{'time': 74.70183658599854,
 'val_ent_seq_acc': 0.694158095367176,
 'val_f1': 0.0,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0,
 'val_recall': 0.0,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.0, Best f1: 0.0





Epoch: 10/50, batch: 980/981, train_loss: 3.78839739981658e-05, t_ent_sample_acc: 0.695578251581411, t_head_rel_sample_acc: 0.0020408163873516785, t_tail_rel_sample_acc: 0.0023809524519102913,lr: 2.5639016871248365e-09, batch_time: 3.1325490474700928, total_time: 2676.5970635414124 ----------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 10/50, batch: 981/981, train_loss: 3.7859206969501946e-05, t_ent_sample_acc: 0.6953788853718479, t_head_rel_sample_acc: 0.0025484200403717073, t_tail_rel_sample_acc: 0.002888209381113237,lr: 6.409758326331172e-10, batch_time: 1.4172897338867188, total_time: 2678.0520572662354 -------------

Validating: 100%|██████████| 97/97 [01:10<00:00,  1.37it/s]

{'time': 70.650625705719,
 'val_ent_seq_acc': 0.6769759621202331,
 'val_f1': 0.0,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0,
 'val_recall': 0.0,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.0, Best f1: 0.0





Epoch: 11/50, batch: 290/981, train_loss: 2.5968105678500784e-05, t_ent_sample_acc: 0.6994253060427206, t_head_rel_sample_acc: 0.0045977012864474595, t_tail_rel_sample_acc: 0.002873563304029662,lr: 0.0009474135833469724, batch_time: 3.133308172225952, total_time: 819.8492159843445 ----------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 99, 'content': ['2020-06-01T05:41:25.086511 Epoch: 11/50, batch: 216/981, train_loss: 2.6851142992941107e-05, t_ent_sample_acc: 0.6921296479801337, t_head_rel_sample_acc: 0.006172839690137793, t_tail_rel_sample_acc: 0.0038580248063361205,lr: 0.000970662360343527, batch_time: 2.151212692260742, total_time: 608.9408457279205 -------------\r']}, 'wandb-events.jsonl': {'offset': 801, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.71, "system.gpu.0.pow

Epoch: 11/50, batch: 619/981, train_loss: 2.4703104059052615e-05, t_ent_sample_acc: 0.6943995870517413, t_head_rel_sample_acc: 0.004038772333611965, t_tail_rel_sample_acc: 0.0032310178668895715,lr: 0.0007745314833479833, batch_time: 3.0763261318206787, total_time: 1750.0499844551086 --------------

requests_with_retry encountered retryable exception: 408 Client Error: Request Timeout for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 100, 'content': ['2020-06-01T05:59:24.046156 Epoch: 11/50, batch: 595/981, train_loss: 2.473454854324922e-05, t_ent_sample_acc: 0.6955182251559586, t_head_rel_sample_acc: 0.00420168079748875, t_tail_rel_sample_acc: 0.0033613446379909995,lr: 0.0007903840663659185, batch_time: 2.961010217666626, total_time: 1687.1171216964722 -------------\r']}, 'wandb-events.jsonl': {'offset': 835, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.

Epoch: 11/50, batch: 980/981, train_loss: 2.4001289803352996e-05, t_ent_sample_acc: 0.6860544397818799, t_head_rel_sample_acc: 0.004421768839261969, t_tail_rel_sample_acc: 0.003231292613306824,lr: 0.0005016012167603286, batch_time: 3.146695852279663, total_time: 2777.939968109131 -----------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 11/50, batch: 981/981, train_loss: 2.4010215785334678e-05, t_ent_sample_acc: 0.6858647818412256, t_head_rel_sample_acc: 0.004417261429639888, t_tail_rel_sample_acc: 0.0032279987370445336,lr: 0.000500800609406507, batch_time: 1.3562374114990234, total_time: 2779.329046010971 -------------

Validating: 100%|██████████| 97/97 [01:03<00:00,  1.52it/s]

{'time': 63.86064577102661,
 'val_ent_seq_acc': 0.7018900533619615,
 'val_f1': 0.006488240064234629,
 'val_head_rel_acc': 0.003436426219251967,
 'val_prec': 0.0032546786004879367,
 'val_recall': 0.999999999975,
 'val_tail_rel_acc': 0.003436426219251967}
Current avf_f1: 0.006488240064234629, Best f1: 0.006488240064234629





Epoch: 12/50, batch: 779/981, train_loss: 2.0716643014149413e-05, t_ent_sample_acc: 0.6668806339641591, t_head_rel_sample_acc: 0.004920838828723452, t_tail_rel_sample_acc: 0.006632434943062044,lr: 2.6182230162504484e-05, batch_time: 3.0075995922088623, total_time: 2073.6457896232605 --------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 110, 'content': ['2020-06-01T06:49:44.688900 Epoch: 12/50, batch: 689/981, train_loss: 2.0885896206094336e-05, t_ent_sample_acc: 0.665699098470076, t_head_rel_sample_acc: 0.004596033034892144, t_tail_rel_sample_acc: 0.006289308363536619,lr: 5.4025193443136054e-05, batch_time: 2.2961416244506836, total_time: 1865.635240316391 -------------\r']}, 'wandb-events.jsonl': {'offset': 929, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.66, "system.gpu.0.p

Epoch: 12/50, batch: 981/981, train_loss: 2.066678886248963e-05, t_ent_sample_acc: 0.6695548937134786, t_head_rel_sample_acc: 0.005266734781493713, t_tail_rel_sample_acc: 0.0066258921444598325,lr: 6.409758326331172e-10, batch_time: 1.4399232864379883, total_time: 2640.0754656791687 ---------------

Validating: 100%|██████████| 97/97 [01:15<00:00,  1.28it/s]

{'time': 75.75732946395874,
 'val_ent_seq_acc': 0.6804123898756873,
 'val_f1': 0.009700889246894428,
 'val_head_rel_acc': 0.0,
 'val_prec': 0.0048820179007319055,
 'val_recall': 0.749999999990625,
 'val_tail_rel_acc': 0.0}
Current avf_f1: 0.009700889246894428, Best f1: 0.009700889246894428





Epoch: 13/50, batch: 980/981, train_loss: 2.0876739065953393e-05, t_ent_sample_acc: 0.6352040982976251, t_head_rel_sample_acc: 0.008163265549406714, t_tail_rel_sample_acc: 0.008503401613965327,lr: 0.0005016012167603286, batch_time: 3.0763120651245117, total_time: 2775.413981437683 ----------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 13/50, batch: 981/981, train_loss: 2.087938082880895e-05, t_ent_sample_acc: 0.6355759595633769, t_head_rel_sample_acc: 0.008154944177796717, t_tail_rel_sample_acc: 0.008494733518538246,lr: 0.000500800609406507, batch_time: 1.442861795425415, total_time: 2776.8914198875427 -------------

Validating: 100%|██████████| 97/97 [01:04<00:00,  1.50it/s]


{'time': 64.77010726928711,
 'val_ent_seq_acc': 0.6580756216012326,
 'val_f1': 0.14587892047739776,
 'val_head_rel_acc': 0.030927835973267703,
 'val_prec': 0.08136696501219842,
 'val_recall': 0.7042253521121802,
 'val_tail_rel_acc': 0.030927835973267703}
Current avf_f1: 0.14587892047739776, Best f1: 0.14587892047739776
Epoch: 14/50, batch: 980/981, train_loss: 1.7615596125695597e-05, t_ent_sample_acc: 0.6309523971865372, t_head_rel_sample_acc: 0.02159864009947193, t_tail_rel_sample_acc: 0.022789116325427076,lr: 2.5639016871248365e-09, batch_time: 2.96329665184021, total_time: 2701.584529876709 -----------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 14/50, batch: 981/981, train_loss: 1.761140440389445e-05, t_ent_sample_acc: 0.6313285925003124, t_head_rel_sample_acc: 0.021576623137087147, t_tail_rel_sample_acc: 0.0227658858296825,lr: 6.409758326331172e-10, batch_time: 1.2564895153045654, total_time: 2702.8729269504547 -------------

Validating: 100%|██████████| 97/97 [01:06<00:00,  1.46it/s]


{'time': 66.5579948425293,
 'val_ent_seq_acc': 0.6417525912990275,
 'val_f1': 0.1249059443051015,
 'val_head_rel_acc': 0.017182131096259833,
 'val_prec': 0.06753458096012468,
 'val_recall': 0.82999999999917,
 'val_tail_rel_acc': 0.017182131096259833}
Current avf_f1: 0.1249059443051015, Best f1: 0.14587892047739776
Epoch: 15/50, batch: 139/981, train_loss: 2.775263336509937e-05, t_ent_sample_acc: 0.489208645421824, t_head_rel_sample_acc: 0.008393285621842035, t_tail_rel_sample_acc: 0.010791367228082615,lr: 0.0009878428410862482, batch_time: 2.848259687423706, total_time: 399.06385827064514 -----------------

requests_with_retry encountered retryable exception: 500 Server Error: Internal Server Error for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 138, 'content': ['2020-06-01T08:43:36.406702 Epoch: 15/50, batch: 129/981, train_loss: 2.8461725145631178e-05, t_ent_sample_acc: 0.48320414657278576, t_head_rel_sample_acc: 0.009043927918108858, t_tail_rel_sample_acc: 0.011627907323282819,lr: 0.0009895349605301633, batch_time: 3.098073959350586, total_time: 368.5563175678253 -------------\r']}, 'wandb-events.jsonl': {'offset': 1141, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWat

Epoch: 15/50, batch: 156/981, train_loss: 2.690809104677054e-05, t_ent_sample_acc: 0.5064102694965326, t_head_rel_sample_acc: 0.007478632701513095, t_tail_rel_sample_acc: 0.010683761002161564,lr: 0.000984679437866531, batch_time: 2.124236583709717, total_time: 442.39676904678345 ----------------

requests_with_retry encountered retryable exception: 500 Server Error: Internal Server Error for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 138, 'content': ['2020-06-01T08:43:36.406702 Epoch: 15/50, batch: 129/981, train_loss: 2.8461725145631178e-05, t_ent_sample_acc: 0.48320414657278576, t_head_rel_sample_acc: 0.009043927918108858, t_tail_rel_sample_acc: 0.011627907323282819,lr: 0.0009895349605301633, batch_time: 3.098073959350586, total_time: 368.5563175678253 -------------\r']}, 'wandb-events.jsonl': {'offset': 1141, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWat

Epoch: 15/50, batch: 158/981, train_loss: 2.7009059660393845e-05, t_ent_sample_acc: 0.5063291268446778, t_head_rel_sample_acc: 0.007383966464785081, t_tail_rel_sample_acc: 0.010548523521121544,lr: 0.0009842836154290576, batch_time: 2.269378900527954, total_time: 446.4353606700897 ---------------

requests_with_retry encountered retryable exception: 500 Server Error: Internal Server Error for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 138, 'content': ['2020-06-01T08:43:36.406702 Epoch: 15/50, batch: 129/981, train_loss: 2.8461725145631178e-05, t_ent_sample_acc: 0.48320414657278576, t_head_rel_sample_acc: 0.009043927918108858, t_tail_rel_sample_acc: 0.011627907323282819,lr: 0.0009895349605301633, batch_time: 3.098073959350586, total_time: 368.5563175678253 -------------\r']}, 'wandb-events.jsonl': {'offset': 1141, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWat

Epoch: 15/50, batch: 161/981, train_loss: 2.6805830626325142e-05, t_ent_sample_acc: 0.5072463896148693, t_head_rel_sample_acc: 0.0072463770275530606, t_tail_rel_sample_acc: 0.010351967182218659,lr: 0.0009836805706384983, batch_time: 2.955505847930908, total_time: 455.78450751304626 -------------

requests_with_retry encountered retryable exception: 500 Server Error: Internal Server Error for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 138, 'content': ['2020-06-01T08:43:36.406702 Epoch: 15/50, batch: 129/981, train_loss: 2.8461725145631178e-05, t_ent_sample_acc: 0.48320414657278576, t_head_rel_sample_acc: 0.009043927918108858, t_tail_rel_sample_acc: 0.011627907323282819,lr: 0.0009895349605301633, batch_time: 3.098073959350586, total_time: 368.5563175678253 -------------\r']}, 'wandb-events.jsonl': {'offset': 1141, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWat

Epoch: 15/50, batch: 167/981, train_loss: 2.6375003662525013e-05, t_ent_sample_acc: 0.5169660813972622, t_head_rel_sample_acc: 0.007984032174070439, t_tail_rel_sample_acc: 0.010978044239346852,lr: 0.0009824410123113634, batch_time: 2.9752581119537354, total_time: 473.1154000759125 -------------

requests_with_retry encountered retryable exception: 500 Server Error: Internal Server Error for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 138, 'content': ['2020-06-01T08:43:36.406702 Epoch: 15/50, batch: 129/981, train_loss: 2.8461725145631178e-05, t_ent_sample_acc: 0.48320414657278576, t_head_rel_sample_acc: 0.009043927918108858, t_tail_rel_sample_acc: 0.011627907323282819,lr: 0.0009895349605301633, batch_time: 3.098073959350586, total_time: 368.5563175678253 -------------\r']}, 'wandb-events.jsonl': {'offset': 1141, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWat

Epoch: 15/50, batch: 179/981, train_loss: 2.5768681715609044e-05, t_ent_sample_acc: 0.5204841849191228, t_head_rel_sample_acc: 0.010242085966318013, t_tail_rel_sample_acc: 0.013035382138950198,lr: 0.000979828426508364, batch_time: 2.884810209274292, total_time: 509.4636404514313 ----------------

requests_with_retry encountered retryable exception: 500 Server Error: Internal Server Error for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 138, 'content': ['2020-06-01T08:43:36.406702 Epoch: 15/50, batch: 129/981, train_loss: 2.8461725145631178e-05, t_ent_sample_acc: 0.48320414657278576, t_head_rel_sample_acc: 0.009043927918108858, t_tail_rel_sample_acc: 0.011627907323282819,lr: 0.0009895349605301633, batch_time: 3.098073959350586, total_time: 368.5563175678253 -------------\r']}, 'wandb-events.jsonl': {'offset': 1141, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWat

Epoch: 15/50, batch: 367/981, train_loss: 2.240297205615167e-05, t_ent_sample_acc: 0.5540417946408165, t_head_rel_sample_acc: 0.014986376427824556, t_tail_rel_sample_acc: 0.015894641668335294,lr: 0.0009165669217613919, batch_time: 1.853548526763916, total_time: 1044.6151278018951 --------------

requests_with_retry encountered retryable exception: 408 Client Error: Request Timeout for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 144, 'content': ['2020-06-01T08:53:49.966679 Epoch: 15/50, batch: 345/981, train_loss: 2.25895992986372e-05, t_ent_sample_acc: 0.5545893865219061, t_head_rel_sample_acc: 0.014009662210077479, t_tail_rel_sample_acc: 0.015458937615588092,lr: 0.0009260479449817143, batch_time: 2.8797380924224854, total_time: 982.1079840660095 -------------\r']}, 'wandb-events.jsonl': {'offset': 1160, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.

Epoch: 15/50, batch: 554/981, train_loss: 2.1370504134214216e-05, t_ent_sample_acc: 0.560469328200559, t_head_rel_sample_acc: 0.01624548782187679, t_tail_rel_sample_acc: 0.016847172557016572,lr: 0.0008164611494624226, batch_time: 2.948953866958618, total_time: 1565.9805595874786 ----------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 145, 'content': ['2020-06-01T09:00:04.563032 Epoch: 15/50, batch: 478/981, train_loss: 2.196285001130236e-05, t_ent_sample_acc: 0.5568340446597363, t_head_rel_sample_acc: 0.016387727095990998, t_tail_rel_sample_acc: 0.01673640214124983,lr: 0.0008611127697839648, batch_time: 3.087697744369507, total_time: 1356.717055797577 -------------\r']}, 'wandb-events.jsonl': {'offset': 1172, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.53, "system.gpu.0.pow

Epoch: 15/50, batch: 980/981, train_loss: 1.984196282072454e-05, t_ent_sample_acc: 0.5683673606995417, t_head_rel_sample_acc: 0.01836734747095984, t_tail_rel_sample_acc: 0.01836734747095984,lr: 0.0005016012167603286, batch_time: 2.993910312652588, total_time: 2791.19801902771 -------------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 15/50, batch: 981/981, train_loss: 1.983039379516602e-05, t_ent_sample_acc: 0.5688073531962802, t_head_rel_sample_acc: 0.018348624384852845, t_tail_rel_sample_acc: 0.018348624384852845,lr: 0.000500800609406507, batch_time: 1.3164246082305908, total_time: 2792.5558507442474 -------------

Validating: 100%|██████████| 97/97 [01:17<00:00,  1.26it/s]

{'time': 77.21701288223267,
 'val_ent_seq_acc': 0.6340206357621655,
 'val_f1': 0.26588845651754445,
 'val_head_rel_acc': 0.042955327740649585,
 'val_prec': 0.16680227827500677,
 'val_recall': 0.6549520766771071,
 'val_tail_rel_acc': 0.04467354085027557}
Current avf_f1: 0.26588845651754445, Best f1: 0.26588845651754445





Epoch: 16/50, batch: 980/981, train_loss: 1.4562589794831164e-05, t_ent_sample_acc: 0.6108843694536054, t_head_rel_sample_acc: 0.05272108993055869, t_tail_rel_sample_acc: 0.05255102188307412,lr: 2.5639016871248365e-09, batch_time: 3.1410889625549316, total_time: 2535.740625858307 ----------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 16/50, batch: 981/981, train_loss: 1.4554619984719596e-05, t_ent_sample_acc: 0.6107713374765884, t_head_rel_sample_acc: 0.0526673477389883, t_tail_rel_sample_acc: 0.05300713704935029,lr: 6.409758326331172e-10, batch_time: 1.4912447929382324, total_time: 2537.2656486034393 -------------

Validating:  56%|█████▌    | 54/97 [00:42<00:29,  1.43it/s]requests_with_retry encountered retryable exception: 408 Client Error: Request Timeout for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 155, 'content': ['2020-06-01T10:07:15.252006 Epoch: 16/50, batch: 974/981, train_loss: 1.4583659321709388e-05, t_ent_sample_acc: 0.6108829725449579, t_head_rel_sample_acc: 0.052532513467193384, t_tail_rel_sample_acc: 0.05236139777259905,lr: 4.102190110921722e-08, batch_time: 3.001534938812256, total_time: 2517.2938804626465 -------------\r']}, 'wandb-events.jsonl': {'offset': 1297, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 49.98, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 49.98, "syst

{'time': 65.42906427383423,
 'val_ent_seq_acc': 0.6202749298098161,
 'val_f1': 0.2961487383497547,
 'val_head_rel_acc': 0.06357388505616139,
 'val_prec': 0.18144833197720248,
 'val_recall': 0.8050541516242581,
 'val_tail_rel_acc': 0.06357388505616139}
Current avf_f1: 0.2961487383497547, Best f1: 0.2961487383497547
Epoch: 17/50, batch: 981/981, train_loss: 1.590171156738295e-05, t_ent_sample_acc: 0.5732246146866792, t_head_rel_sample_acc: 0.05793408250529224, t_tail_rel_sample_acc: 0.059802923909750186,lr: 0.000500800609406507, batch_time: 0.9456641674041748, total_time: 2772.3775458335876 ----------------

Validating: 100%|██████████| 97/97 [01:02<00:00,  1.55it/s]

{'time': 62.57852220535278,
 'val_ent_seq_acc': 0.6219931436875432,
 'val_f1': 0.4789961810796015,
 'val_head_rel_acc': 0.13316151576558338,
 'val_prec': 0.3572009764035511,
 'val_recall': 0.7268211920528598,
 'val_tail_rel_acc': 0.13487972872158915}
Current avf_f1: 0.4789961810796015, Best f1: 0.4789961810796015





Epoch: 18/50, batch: 865/981, train_loss: 1.130558852352117e-05, t_ent_sample_acc: 0.619653195986858, t_head_rel_sample_acc: 0.13082851984597355, t_tail_rel_sample_acc: 0.1265895987693974,lr: 8.748687154702673e-06, batch_time: 3.0294017791748047, total_time: 2366.555083990097 ------------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 174, 'content': ['2020-06-01T11:31:51.453527 Epoch: 18/50, batch: 788/981, train_loss: 1.140740793056152e-05, t_ent_sample_acc: 0.6188663447909246, t_head_rel_sample_acc: 0.1292301218975619, t_tail_rel_sample_acc: 0.12415397965212159,lr: 2.3930409020223086e-05, batch_time: 3.08392333984375, total_time: 2155.293935060501 -------------\r']}, 'wandb-events.jsonl': {'offset': 1456, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 31.67, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 31.67, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.68, "system.gpu.0.power

Epoch: 18/50, batch: 981/981, train_loss: 1.1131099118579459e-05, t_ent_sample_acc: 0.6228338599235397, t_head_rel_sample_acc: 0.1357458411553584, t_tail_rel_sample_acc: 0.1326877371038744,lr: 6.409758326331172e-10, batch_time: 0.9210188388824463, total_time: 2694.391699552536 -----------------

Validating: 100%|██████████| 97/97 [01:18<00:00,  1.23it/s]

{'time': 78.76416850090027,
 'val_ent_seq_acc': 0.6529209838085568,
 'val_f1': 0.549544724111352,
 'val_head_rel_acc': 0.1958762934220206,
 'val_prec': 0.41741253051257793,
 'val_recall': 0.8040752351095918,
 'val_tail_rel_acc': 0.1941580803123946}
Current avf_f1: 0.549544724111352, Best f1: 0.549544724111352





Epoch: 19/50, batch: 505/981, train_loss: 1.5132100409096209e-05, t_ent_sample_acc: 0.5511551285144126, t_head_rel_sample_acc: 0.07623762588689822, t_tail_rel_sample_acc: 0.07458746082121782,lr: 0.0008458288124187359, batch_time: 3.171355724334717, total_time: 1434.5346801280975 ---------------

requests_with_retry encountered retryable exception: 408 Client Error: Request Timeout for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 184, 'content': ['2020-06-01T12:05:03.536730 Epoch: 19/50, batch: 484/981, train_loss: 1.5347061382407228e-05, t_ent_sample_acc: 0.5475206739338468, t_head_rel_sample_acc: 0.07403581475546538, t_tail_rel_sample_acc: 0.07162534636406859,lr: 0.0008577736685484625, batch_time: 2.0537681579589844, total_time: 1374.8670127391815 -------------\r']}, 'wandb-events.jsonl': {'offset': 1518, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 31.67, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 31.67, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38

Epoch: 19/50, batch: 981/981, train_loss: 1.3027037906262997e-05, t_ent_sample_acc: 0.5834182945747501, t_head_rel_sample_acc: 0.11331974474236628, t_tail_rel_sample_acc: 0.11196058739458992,lr: 0.000500800609406507, batch_time: 1.452570915222168, total_time: 2662.3712134361267 ---------------

Validating:  14%|█▍        | 14/97 [00:13<01:08,  1.21it/s]requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 185, 'content': ['2020-06-01T12:23:12.740534 Epoch: 19/50, batch: 911/981, train_loss: 1.3269220068523822e-05, t_ent_sample_acc: 0.5793999403747847, t_head_rel_sample_acc: 0.10867179212140984, t_tail_rel_sample_acc: 0.10757409726421867,lr: 0.0005567209247472357, batch_time: 3.0268280506134033, total_time: 2463.068875312805 -------------\r']}, 'wandb-events.jsonl': {'offset': 1552, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 31.67, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 31.67, "system.gpu.process.0.temp

{'time': 80.07452726364136,
 'val_ent_seq_acc': 0.6357388479500702,
 'val_f1': 0.6420966420465916,
 'val_head_rel_acc': 0.25171821894719426,
 'val_prec': 0.6379170056956356,
 'val_recall': 0.6463314097278939,
 'val_tail_rel_acc': 0.24828179288156255}
Current avf_f1: 0.6420966420465916, Best f1: 0.6420966420465916
Epoch: 20/50, batch: 631/981, train_loss: 8.55115417824085e-06, t_ent_sample_acc: 0.645800332944669, t_head_rel_sample_acc: 0.2150026458993011, t_tail_rel_sample_acc: 0.21368199105119176,lr: 7.691194967200099e-05, batch_time: 2.9376981258392334, total_time: 1790.3107388019562 -----------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 195, 'content': ['2020-06-01T12:54:09.797235 Epoch: 20/50, batch: 554/981, train_loss: 8.645250599229159e-06, t_ent_sample_acc: 0.6419976091933595, t_head_rel_sample_acc: 0.2099879706809667, t_tail_rel_sample_acc: 0.20908544390102587,lr: 0.00011289233941844785, batch_time: 2.855889320373535, total_time: 1577.9740104675293 -------------\r']}, 'wandb-events.jsonl': {'offset': 1610, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 31.67, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 31.67, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.57, "system.gpu.0.pow

Epoch: 20/50, batch: 980/981, train_loss: 8.34426320883889e-06, t_ent_sample_acc: 0.6534013772041214, t_head_rel_sample_acc: 0.2270408212378317, t_tail_rel_sample_acc: 0.22602041315059274,lr: 2.5639016871248365e-09, batch_time: 2.951669692993164, total_time: 2777.3143944740295 ----------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 20/50, batch: 981/981, train_loss: 8.343699143726228e-06, t_ent_sample_acc: 0.6532450047502946, t_head_rel_sample_acc: 0.22680938309181964, t_tail_rel_sample_acc: 0.22579001517592343,lr: 6.409758326331172e-10, batch_time: 1.4291002750396729, total_time: 2778.778292655945 -------------

Validating: 100%|██████████| 97/97 [01:06<00:00,  1.47it/s]


{'time': 66.01268863677979,
 'val_ent_seq_acc': 0.6829897080807342,
 'val_f1': 0.6929274842834756,
 'val_head_rel_acc': 0.34793815124280675,
 'val_prec': 0.6297803091944157,
 'val_recall': 0.7701492537312666,
 'val_tail_rel_acc': 0.3290378068833007}
Current avf_f1: 0.6929274842834756, Best f1: 0.6929274842834756
Epoch: 21/50, batch: 305/981, train_loss: 1.3231296951162818e-05, t_ent_sample_acc: 0.580327883704764, t_head_rel_sample_acc: 0.12295082294550098, t_tail_rel_sample_acc: 0.12677595976923334,lr: 0.0009419240173986226, batch_time: 2.7729990482330322, total_time: 868.5096707344055 ---------------

requests_with_retry encountered retryable exception: ('Connection aborted.', OSError("(104, 'ECONNRESET')")). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 205, 'content': ['2020-06-01T13:27:44.685050 Epoch: 21/50, batch: 262/981, train_loss: 1.2913321347001912e-05, t_ent_sample_acc: 0.5820610830915793, t_head_rel_sample_acc: 0.13167939278233143, t_tail_rel_sample_acc: 0.13422392223400015,lr: 0.0009569679003823542, batch_time: 2.9285004138946533, total_time: 747.1467778682709 -------------\r']}, 'wandb-events.jsonl': {'offset': 1673, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 31.67, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 31.67, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.64, "system.gpu.0.powerPercent": 15.46, "system.gpu.proce

Epoch: 21/50, batch: 461/981, train_loss: 1.1996811291636083e-05, t_ent_sample_acc: 0.59942156273605, t_head_rel_sample_acc: 0.14099783451981243, t_tail_rel_sample_acc: 0.1456977623533013,lr: 0.0008703915422323985, batch_time: 1.9895858764648438, total_time: 1307.0373861789703 ----------------

requests_with_retry encountered retryable exception: 408 Client Error: Request Timeout for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 206, 'content': ['2020-06-01T13:36:02.567549 Epoch: 21/50, batch: 439/981, train_loss: 1.2162563579438188e-05, t_ent_sample_acc: 0.5983295522138035, t_head_rel_sample_acc: 0.13591496188168103, t_tail_rel_sample_acc: 0.14123007210484942,lr: 0.0008819910152028872, batch_time: 3.076282501220703, total_time: 1245.001119852066 -------------\r']}, 'wandb-history.jsonl': {'offset': 2091, 'content': ['{"train_loss": 1.2227758973889535e-05, "train_ent_seq_acc": 0.5974478111051351, "train_head_rel_acc": 0.13534416451946488, "train_tail_rel_acc": 0.1411446286948817, "learning_rate": 0.0008860922297765599, "time": 1220.7413876056671, "_runtime": 55249.24163269997, "_timestamp": 1591018535.240225, "_st

Epoch: 21/50, batch: 980/981, train_loss: 1.0594500347405283e-05, t_ent_sample_acc: 0.622789132351778, t_head_rel_sample_acc: 0.1690476233101621, t_tail_rel_sample_acc: 0.17057823557026533,lr: 0.0005016012167603286, batch_time: 2.415224313735962, total_time: 2667.322118997574 -----------------

Validating:   0%|          | 0/97 [00:00<?, ?it/s]

Epoch: 21/50, batch: 981/981, train_loss: 1.0586330994978955e-05, t_ent_sample_acc: 0.6231736490364347, t_head_rel_sample_acc: 0.16887530157386224, t_tail_rel_sample_acc: 0.1704043535768196,lr: 0.000500800609406507, batch_time: 1.211759328842163, total_time: 2668.568452358246 -------------

Validating: 100%|██████████| 97/97 [01:11<00:00,  1.35it/s]


{'time': 71.9177839756012,
 'val_ent_seq_acc': 0.7036082646281449,
 'val_f1': 0.7212967131427601,
 'val_head_rel_acc': 0.32302406123004007,
 'val_prec': 0.6517493897477094,
 'val_recall': 0.8074596774192734,
 'val_tail_rel_acc': 0.319587635625269}
Current avf_f1: 0.7212967131427601, Best f1: 0.7212967131427601
Epoch: 22/50, batch: 981/981, train_loss: 6.272242306514809e-06, t_ent_sample_acc: 0.718314664899756, t_head_rel_sample_acc: 0.31889229350619847, t_tail_rel_sample_acc: 0.31396534811101556,lr: 6.409758326331172e-10, batch_time: 1.3453576564788818, total_time: 2779.4664232730865 ----------------

Validating: 100%|██████████| 97/97 [01:19<00:00,  1.22it/s]


{'time': 79.23456358909607,
 'val_ent_seq_acc': 0.7173539716558358,
 'val_f1': 0.7733887733387332,
 'val_head_rel_acc': 0.4072165017582707,
 'val_prec': 0.7567127746134453,
 'val_recall': 0.790816326530545,
 'val_tail_rel_acc': 0.4140893540431544}
Current avf_f1: 0.7733887733387332, Best f1: 0.7733887733387332
Epoch: 23/50, batch: 516/981, train_loss: 9.887276856829004e-06, t_ent_sample_acc: 0.6469638410002686, t_head_rel_sample_acc: 0.17990956504562106, t_tail_rel_sample_acc: 0.18572351876501889,lr: 0.0008394150701385248, batch_time: 3.14919114112854, total_time: 1375.4810934066772 ----------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 225, 'content': ['2020-06-01T15:08:04.427272 Epoch: 23/50, batch: 447/981, train_loss: 9.73571263123568e-06, t_ent_sample_acc: 0.647278167257373, t_head_rel_sample_acc: 0.18269948244628223, t_tail_rel_sample_acc: 0.1890380360202768,lr: 0.0008778271206386134, batch_time: 2.218101739883423, total_time: 1166.8901913166046 -------------\r']}, 'wandb-events.jsonl': {'offset': 1861, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 31.67, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 31.67, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.62, "system.gpu.0.powerP

Epoch: 23/50, batch: 808/981, train_loss: 8.926457196048207e-06, t_ent_sample_acc: 0.6608911064628622, t_head_rel_sample_acc: 0.21699670469849416, t_tail_rel_sample_acc: 0.22174092911478907,lr: 0.0006375108114673424, batch_time: 2.6507489681243896, total_time: 2159.0561690330505 -------------

requests_with_retry encountered retryable exception: 408 Client Error: Request Timeout for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 226, 'content': ['2020-06-01T15:23:36.058724 Epoch: 23/50, batch: 783/981, train_loss: 8.991190117065327e-06, t_ent_sample_acc: 0.6600681314409007, t_head_rel_sample_acc: 0.21306939615295917, t_tail_rel_sample_acc: 0.2181779530046848,lr: 0.0006566389274910309, batch_time: 2.312624454498291, total_time: 2099.249093770981 -------------\r']}, 'wandb-events.jsonl': {'offset': 1890, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 40.43, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 40.43, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.67,

Epoch: 23/50, batch: 981/981, train_loss: 8.54486269467576e-06, t_ent_sample_acc: 0.6690452095504686, t_head_rel_sample_acc: 0.2351342221678336, t_tail_rel_sample_acc: 0.2375127474770755,lr: 0.000500800609406507, batch_time: 1.289499282836914, total_time: 2580.276881456375 -------------------

Validating: 100%|██████████| 97/97 [01:04<00:00,  1.50it/s]


{'time': 64.4647421836853,
 'val_ent_seq_acc': 0.712199330329895,
 'val_f1': 0.7547018807022539,
 'val_head_rel_acc': 0.3591065360713251,
 'val_prec': 0.7672904800650311,
 'val_recall': 0.7425196850393115,
 'val_tail_rel_acc': 0.3745704540579589}
Current avf_f1: 0.7547018807022539, Best f1: 0.7733887733387332
Epoch: 24/50, batch: 425/981, train_loss: 5.38493978076397e-06, t_ent_sample_acc: 0.7274509994422689, t_head_rel_sample_acc: 0.3737254974070717, t_tail_rel_sample_acc: 0.3701960854319965,lr: 0.00018602470190609671, batch_time: 3.2740142345428467, total_time: 1227.0891292095184 -----------------

requests_with_retry encountered retryable exception: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')). args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 236, 'content': ['2020-06-01T15:49:40.582194 Epoch: 24/50, batch: 353/981, train_loss: 5.438036308377024e-06, t_ent_sample_acc: 0.7304060623345227, t_head_rel_sample_acc: 0.3701605357377455, t_tail_rel_sample_acc: 0.3663833871610442,lr: 0.00023287119772373905, batch_time: 2.3651013374328613, total_time: 1018.1215300559998 -------------\r']}, 'wandb-events.jsonl': {'offset': 1938, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 40.43, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 40.43, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.62, "system.gpu.0.pow

Epoch: 24/50, batch: 682/981, train_loss: 5.271426680491114e-06, t_ent_sample_acc: 0.7304496772128466, t_head_rel_sample_acc: 0.3844086098487426, t_tail_rel_sample_acc: 0.3829423338830296,lr: 5.658703905325174e-05, batch_time: 3.1486706733703613, total_time: 1974.964429616928 ------------------

requests_with_retry encountered retryable exception: 408 Client Error: Request Timeout for url: https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream. args: ('https://api.wandb.ai/files/wycheng/webnlg_single/2w4et4vj/file_stream',), kwargs: {'json': {'files': {'output.log': {'offset': 237, 'content': ['2020-06-01T16:04:37.268462 Epoch: 24/50, batch: 663/981, train_loss: 5.28766242114161e-06, t_ent_sample_acc: 0.7300151022850658, t_head_rel_sample_acc: 0.3821015661838008, t_tail_rel_sample_acc: 0.3803418876089122,lr: 6.382047270233738e-05, batch_time: 3.1649386882781982, total_time: 1914.6098518371582 -------------\r']}, 'wandb-events.jsonl': {'offset': 1966, 'content': ['{"system.gpu.0.gpu": 0.0, "system.gpu.0.memory": 0.0, "system.gpu.0.memoryAllocated": 40.43, "system.gpu.0.temp": 39.0, "system.gpu.process.0.gpu": 0.0, "system.gpu.process.0.memory": 0.0, "system.gpu.process.0.memoryAllocated": 40.43, "system.gpu.process.0.temp": 39.0, "system.gpu.0.powerWatts": 38.68,

Epoch: 24/50, batch: 981/981, train_loss: 5.016570748659459e-06, t_ent_sample_acc: 0.73836223510731, t_head_rel_sample_acc: 0.4023105755342013, t_tail_rel_sample_acc: 0.4009514178978194,lr: 6.409758326331172e-10, batch_time: 1.1667487621307373, total_time: 2802.278261899948 --------------------

Validating: 100%|██████████| 97/97 [00:51<00:00,  1.89it/s]

{'time': 51.28088450431824,
 'val_ent_seq_acc': 0.7362543172443036,
 'val_f1': 0.8160965794268014,
 'val_head_rel_acc': 0.45017182949892026,
 'val_prec': 0.825061025223692,
 'val_recall': 0.8073248407642669,
 'val_tail_rel_acc': 0.4587628953542906}
Current avf_f1: 0.8160965794268014, Best f1: 0.8160965794268014





Epoch: 25/50, batch: 596/981, train_loss: 8.00038925078279e-06, t_ent_sample_acc: 0.6926733969191977, t_head_rel_sample_acc: 0.2732102967198663, t_tail_rel_sample_acc: 0.2732102964448449,lr: 0.0007897319427893388, batch_time: 1.8562812805175781, total_time: 1252.2970128059387 -----------------

# Prediction

In [None]:
model_state_path = get_last_state_path(model_state_dict_dir)
# model_state_path = get_state_path(model_state_dict_dir, 16)
rel_extractor.load_state_dict(torch.load(model_state_path))
rel_extractor.eval()
print("------------model state {} loaded ----------------".format(model_state_path.split("/")[-1]))

In [None]:
def filter_duplicates(rel_list):
    rel_memory_set = set()
    filtered_rel_list = []
    for rel in rel_list:
        rel_memory = "{}\u2E80\{}\u2E80\{}\u2E80\{}\u2E80{}".format(*rel.values())
        if rel_memory not in rel_memory_set:
            filtered_rel_list.append(rel)
            rel_memory_set.add(rel_memory)
    return filtered_rel_list

In [None]:
def predict(short_test_data):
    '''
    short_test_data: seq_len <= max_seq_len
    '''
    indexed_test_data = get_indexed_train_valid_data(short_test_data)
    test_dataloader = DataLoader(MyDataset(indexed_test_data), 
                              batch_size = batch_size, 
                              shuffle = False, 
                              num_workers = 0,
                              drop_last = False,
                              collate_fn = generate_pred_batch,
                             )
    short_pred_sample_list = []
    for batch_test_data in tqdm(test_dataloader, desc = "Predicting"):
        text_id_list, text_list, batch_input_ids, \
        batch_attention_mask, batch_token_type_ids, \
        offset_map_list = batch_test_data

        batch_input_ids, \
        batch_attention_mask, \
        batch_token_type_ids = (batch_input_ids.to(device), 
                                  batch_attention_mask.to(device), 
                                  batch_token_type_ids.to(device)
                                 )
        with torch.no_grad():
            batch_ent_shaking_outputs, \
            batch_head_rel_shaking_outputs, \
            batch_tail_rel_shaking_outputs = rel_extractor(batch_input_ids, 
                                                              batch_attention_mask, 
                                                              batch_token_type_ids, 
                                                             )

        batch_ent_shaking_tag, \
        batch_head_rel_shaking_tag, \
        batch_tail_rel_shaking_tag = torch.argmax(batch_ent_shaking_outputs, dim = -1), \
                                     torch.argmax(batch_head_rel_shaking_outputs, dim = -1), \
                                     torch.argmax(batch_tail_rel_shaking_outputs, dim = -1)

        for ind in range(len(text_list)):
            text, offset_map = text_list[ind], offset_map_list[ind]
            ent_shaking_tag, \
            head_rel_shaking_tag, \
            tail_rel_shaking_tag = batch_ent_shaking_tag[ind], \
                                    batch_head_rel_shaking_tag[ind], \
                                    batch_tail_rel_shaking_tag[ind]
            rel_list = handshaking_tagger.decode_rel_fr_shaking_tag(text, 
                                                  ent_shaking_tag, 
                                                  head_rel_shaking_tag, 
                                                  tail_rel_shaking_tag, 
                                                  offset_map)
            short_pred_sample_list.append({
                "text": text,
                "id": text_id_list[ind],
                "relation_list": rel_list,
            })
    # merge
    text_id2rel_list = {}
    for sample in short_pred_sample_list:
        text_id = sample["id"]
        if text_id not in text_id2rel_list:
            text_id2rel_list[text_id] = sample["relation_list"]
        else:
            text_id2rel_list[text_id].extend(sample["relation_list"])

    text_id2text = {sample["id"]:sample["text"] for sample in test_data}
    merged_pred_sample_list = []
    for text_id, rel_list in text_id2rel_list.items():
        merged_pred_sample_list.append({
            "id": text_id,
            "text": text_id2text[text_id],
            "relation_list": filter_duplicates(rel_list),
        })
    return merged_pred_sample_list

In [None]:
pred_sample_list = predict(short_test_data)

In [None]:
len([s for s in pred_sample_list if len(s["relation_list"]) > 0])

In [None]:
text_id2gold_n_pred = {}
for sample in test_data:
    text_id = sample["id"]
    text_id2gold_n_pred[text_id] = {
        "gold_relation_list": sample["relation_list"],
    }
def get_test_prf(pred_sample_list):
    for sample in pred_sample_list:
        text_id = sample["id"]
        text_id2gold_n_pred[text_id]["pred_relation_list"] = sample["relation_list"]

    correct_num, pred_num, gold_num = 0, 0, 0
    for gold_n_pred in text_id2gold_n_pred.values():
        gold_rel_list = gold_n_pred["gold_relation_list"]
        pred_rel_list = gold_n_pred["pred_relation_list"] if "pred_relation_list" in gold_n_pred else []
        gold_rel_set = set(["{}\u2E80{}\u2E80{}".format(rel["subject"], rel["predicate"], rel["object"]) for rel in gold_rel_list])
        pred_rel_set = set(["{}\u2E80{}\u2E80{}".format(rel["subject"], rel["predicate"], rel["object"]) for rel in pred_rel_list])

        for rel_str in pred_rel_set:
            if rel_str in gold_rel_set:
                correct_num += 1

        pred_num += len(gold_rel_set)
        gold_num += len(pred_rel_set)

    prf = get_scores(correct_num, pred_num, gold_num)
#     print(prf)
    return prf

In [None]:
# model state 16: (0.9112068965517129, 0.9034188034187924, 0.9072961372890456)
# model state 17: (0.9060344827586095, 0.9096191889218483, 0.9078232970872052)
# 18: (0.9178571428571316, 0.904600072824361, 0.9111803899493801)
get_test_prf(pred_sample_list)