The whole notebook should run in less than 3 minutes due to the loading of the drive folder, imports, loading of weights and test runs.
The training cells are commented to avoid running it if the notebook it's run from the sections collapsed.


# Folder setup, libraries and utils functions

In [1]:
%pip install huggingface_hub tokenizers pytorch_lightning torchmetrics transformers --quiet
#huggingface_hub tokenizers sentencepiece sacremoses importlib_metadata safetensors regex pytorch_lightning torchmetrics transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m727.0/727.0 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m763.4/763.4 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m37.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m58.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m55.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import gdown
url = "https://drive.google.com/drive/folders/1i9dIdCZBuibTSJ6Ku4hypP9qObUN91zl?usp=sharing"
gdown.download_folder(url, quiet=True, use_cookies=False)

['/content/DL_BonaiutiAndrea/checkpoints/GRU/epoch=36-step=5772.ckpt',
 '/content/DL_BonaiutiAndrea/checkpoints/LSTM/epoch=50-step=7956.ckpt',
 '/content/DL_BonaiutiAndrea/checkpoints/PFN/epoch=51-step=8112.ckpt',
 '/content/DL_BonaiutiAndrea/checkpoints/PFN/lightning_logs/version_0/events.out.tfevents.1694163192.d88f4d641c0f.243.0',
 '/content/DL_BonaiutiAndrea/checkpoints/PFN/lightning_logs/version_0/hparams.yaml',
 '/content/DL_BonaiutiAndrea/data/webnlg_github/dev_triples.json',
 '/content/DL_BonaiutiAndrea/data/webnlg_github/rel2id.json',
 '/content/DL_BonaiutiAndrea/data/webnlg_github/test_triples.json',
 '/content/DL_BonaiutiAndrea/data/webnlg_github/train_triples.json',
 '/content/DL_BonaiutiAndrea/LSTM_checkpoints/state_dict.pt',
 '/content/DL_BonaiutiAndrea/Relation_Extraction_Project.ipynb',
 '/content/DL_BonaiutiAndrea/Report.pdf']

In [3]:
%cd DL_BonaiutiAndrea/

/content/DL_BonaiutiAndrea


In [4]:
import json
import torchmetrics
from torchmetrics import Metric
import torch
from torch.utils.data import Dataset
from torch.nn.functional import pad
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from torch.utils.data import DataLoader
from transformers import AutoModel, AutoTokenizer

In [5]:
# Data Loading

# Reading data json files
def read_file(file_name):
  path = "data/webnlg_github/"+file_name+"_triples.json"
  with open(path, 'r') as read_data:
    sentences = []
    relations = []
    print(f"Reading {file_name} sentences")
    json_loaded = json.load(read_data)
    for i in json_loaded:
      sentences.append(i['text'])
      relations.append(i['triple_list'])
    print(f"Reading {file_name} finished")
    return sentences, relations

# Reading relations dict
def read_rels():
  path = "data/webnlg_github/rel2id.json"
  with open(path, 'r') as read_data:
    rel2id = json.load(read_data)
    rel_num = len(rel2id)
    return rel2id

# Inverse dict of relations for inference
def decode_rels(rel2id):
  id2rel = {}
  for r in rel2id:
    id2rel[rel2id[r]]= r
  return id2rel

def where_in_sentence(sentence, entity):
  for i in range(len(sentence)):
    if sentence[i] == entity[0]:
      end = 0
      for k in range(len(entity)):
        if sentence[i+k] == entity[k]:
          end = i+k
      return i, end
  return -1, -1

def triplets(s_output, o_output, id2rel, tokenizer, entities, treshold=0.5):
  # Apply sigmoid to compute probabilities
  sigmoid = torch.nn.Sigmoid()
  s_output = sigmoid(s_output)
  o_output = sigmoid(o_output)
  pred_subjects = []
  pred_objects = []

  # Filter probabilities with threshold
  for s in range(s_output.size(0)):
    pred_subjects.append([])
    s_preds = torch.where(s_output[s]>treshold, 1, 0)
    pred_subjects[s] = torch.nonzero(s_preds)
  for o in range(o_output.size(0)):
    pred_objects.append([])
    o_preds = torch.where(o_output[o]>treshold, 1, 0)
    pred_objects[o] = torch.nonzero(o_preds)
  pred_relations = []
  # Create predicted triplets
  for s in range(len(pred_subjects)):
    #if s in s_nonzeros:
      if len(pred_subjects[s]) != 0:
        for r_s in range(len(pred_subjects[s])):
          rel = pred_subjects[s][r_s].item()
          for o in range(len(pred_objects)):
            #if o in o_nonzeros:
              if len(pred_objects[o]) != 0:
                for r_o in range(len(pred_objects[o])):
                  if rel == pred_objects[o][r_o].item():
                    pred_relations.append((s, rel, o))
  output = []
  for r in pred_relations:
    s, rel, o = r
    s = tokenizer.decode(entities[s])
    rel = id2rel[rel]
    o = tokenizer.decode(entities[o])
    output.append((s, rel, o))

  return output

# Metrics

In [6]:
class CustomMetricsPFN(Metric):
  def __init__(self, threshold = 0.5):
    super().__init__()
    full_state_update: bool = True
    self.add_state("correct", default=torch.tensor(0))
    self.add_state("pred", default=torch.tensor(0))
    self.add_state("truth", default=torch.tensor(0))
    self.threshold = threshold

  def update(self, e_output, r_output, e_labels, r_labels):

    self.truth += torch.sum(r_labels, dtype=torch.long)

    e_output = (e_output>self.threshold).long()
    r_output = (r_output>self.threshold).long()

    e_mask = torch.sum(e_output, dim=-1).float()
    e_mask = (e_mask>0).long()

    length = e_mask.size(-1)
    e1 = e_mask.unsqueeze(1).repeat(1, length, 1)
    e2 = e_mask.unsqueeze(-1).repeat(1, 1, length)
    e_mask = e1*e2
    e_mask = e_mask.unsqueeze(-1).repeat(1, 1, 1, r_labels.size(-1))

    final_r = r_output * e_mask

    self.pred += final_r.sum().item()

    correct_r = final_r + r_labels
    correct_r = (correct_r==2)

    e_mask = e_labels * e_output
    e_mask = torch.sum(e_output, dim=-1).float()
    e_mask = (e_mask>0).long()
    length = e_mask.size(-1)
    e1 = e_mask.unsqueeze(1).repeat(1, length, 1)
    e2 = e_mask.unsqueeze(-1).repeat(1, 1, length)
    e_mask = e1*e2
    e_mask = e_mask.unsqueeze(-1).repeat(1, 1, 1, r_labels.size(-1))
    correct_r = correct_r*e_mask
    self.correct += correct_r.sum().item()

  def compute(self):
    precision = self.correct / (self.pred + 1e-10)
    recall = self.correct / (self.truth)
    f1_score = (2 * precision * recall) / (precision + recall + 1e-10)
    return precision, recall, f1_score

class CustomMetrics(Metric):
  def __init__(self, threshold = 0.5):
    super().__init__()
    full_state_update: bool = True
    self.add_state("correct", default=torch.tensor(0))
    self.add_state("pred", default=torch.tensor(0))
    self.add_state("truth", default=torch.tensor(0))
    self.threshold = threshold

  def update(self, s_output, o_output, s_labels, o_labels, relations):
    # Filter rows with all zeros
    pred_num = 0
    correct_num = 0
    truth_num = 0
    s_nonzeros = []
    o_nonzeros = []

    s_output = torch.sigmoid(s_output)
    o_output = torch.sigmoid(o_output)

    s_output = torch.where(s_output>=self.threshold,
                           torch.ones_like(s_output),
                           torch.zeros_like(s_output))
    o_output = torch.where(o_output>=self.threshold,
                          torch.ones_like(o_output),
                          torch.zeros_like(o_output))

    s_output = torch.nonzero(s_output)
    o_output = torch.nonzero(o_output)
    rel_output = []
    for s in s_output:
        for o in o_output:
            if s[1] == o[1]:
                rel_output.append((s[0], s[1], o[0]))

    self.pred += len(rel_output)
    rels = torch.nonzero(relations[0])
    rel_labels = []
    for r in rels:
        rel_labels.append((r[0], relations[0][r[0],r[1]], r[1]))

    self.truth += len(rel_labels)

    for r in rel_output:
        if r in rel_labels:
            self.correct += 1

  def compute(self):
    precision = self.correct / (self.pred + 1e-10)
    recall = self.correct / (self.truth)
    f1_score = (2 * precision * recall) / (precision + recall + 1e-10)
    return precision, recall, f1_score

# Dataset

In [7]:
class RelDataset(Dataset):
  def __init__(self, sentences, relations, tokenizer, rel2id, max_length=105, sota=False):
    self.sentences = []
    self.masks = []
    self.entities = []
    self.relations = []
    self.tokenizer = tokenizer
    self.rel2id = rel2id
    self.max_l = max_length
    self.sota = sota

    for s in sentences:
      tokenized = tokenizer(s)
      self.sentences.append(torch.LongTensor(tokenized.input_ids))
      self.masks.append(torch.LongTensor(tokenized.attention_mask))
      if len(tokenized.input_ids) > self.max_l:
        self.max_l = len(tokenized.input_ids)
    for i in range(len(self.sentences)):
      l = self.sentences[i].size(-1)
      self.sentences[i] = pad(self.sentences[i],
                              (0,self.max_l - l), "constant", 0)
      self.masks[i] = pad(self.masks[i],
                          (0,self.max_l - l), "constant", 0)
      self.entities.append([])
      for r in relations[i]:
        sub = tokenizer(r[0], add_special_tokens=False).input_ids
        obj = tokenizer(r[2], add_special_tokens=False).input_ids
        r[0] = sub
        r[2] = obj
        r[1] = self.rel2id[r[1]]
        if sub not in self.entities[i]:
          self.entities[i].append(sub)
        if obj not in self.entities[i]:
          self.entities[i].append(obj)
      matrix = torch.zeros((len(self.entities[i]),
                            len(self.entities[i])), dtype=torch.long)
      for r in relations[i]:
        s, o = self.entities[i].index(r[0]), self.entities[i].index(r[2])
        matrix[s,o] = r[1]
      self.relations.append(matrix)

  def max_length(self):
    return self.max_l

  def __len__(self):
    return len(self.sentences)

  def __getitem__(self, idx):
    sentence = self.sentences[idx]
    mask = self.masks[idx]
    entities = self.entities[idx]
    relations = self.relations[idx]
    nonzeros = torch.nonzero(relations)
    if self.sota:
      entity_labels = torch.zeros((len(sentence),
                                   len(sentence)), dtype=torch.float)
      relation_labels = torch.zeros((len(sentence),
                                     len(sentence), len(self.rel2id)), dtype=torch.float)
      for i in nonzeros:
        start_s, end_s = where_in_sentence(sentence, entities[i[0]])
        if start_s != -1:
          entity_labels[start_s, end_s] = 1
        start_o, end_o = where_in_sentence(sentence, entities[i[1]])
        if start_o != -1:
          entity_labels[start_o, end_o] = 1
        relation_labels[start_s, start_o, relations[i[0], i[1]].item()] = 1
      return sentence, mask, entities, relations, entity_labels, relation_labels
    else:
      s_labels = torch.zeros((len(entities),
                              len(self.rel2id)), dtype=torch.float)
      o_labels = torch.zeros((len(entities),
                              len(self.rel2id)), dtype=torch.float)
      for i in nonzeros:
        s_labels[i[0], relations[i[0], i[1]]] = 1
        o_labels[i[1], relations[i[0], i[1]]] = 1
      return sentence, mask, entities, relations, s_labels, o_labels

# Modifications with batch size more than one
def custom_collate_sota(batch):
  batch = list(batch)
  tokens, mask, entities, relations, entity_labels, relation_labels = zip(*batch)
  sentence_length = tokens[0].size(0)
  rel_num = relation_labels[0].size(-1)
  batch_size = len(tokens)
  t = torch.zeros((batch_size, sentence_length), dtype=torch.long)
  m = torch.zeros((batch_size, sentence_length), dtype=torch.long)
  e_labels = torch.zeros((batch_size, sentence_length, sentence_length))
  r_labels = torch.zeros((batch_size, sentence_length, sentence_length, rel_num))
  for i in range(len(tokens)):
    t[i,:].copy_(tokens[i])
    m[i,:].copy_(mask[i])
    e_labels[i,:].copy_(entity_labels[i])
    r_labels[i,:].copy_(relation_labels[i])

  return t, m, entities, relations, e_labels, r_labels

# Modifications with batch size more than one
def custom_collate(batch):
  batch = list(batch)
  tokens, mask, entities, relations, s_labels, o_labels = zip(*batch)
  t = torch.zeros((len(tokens), tokens[0].size(0)), dtype=torch.long)
  m = torch.zeros((len(tokens), tokens[0].size(0)), dtype=torch.long)
  for i in range(len(tokens)):
    t[i,:].copy_(tokens[i])
    m[i,:].copy_(mask[i])
  return t, m, entities, relations, s_labels, o_labels

# PFN Model (SOTA)

In [8]:
class LinearDropConnect(nn.Linear):
  def __init__(self, in_features, out_features, bias=True, dropout=0.):
    super(LinearDropConnect, self).__init__(in_features=in_features,
                                            out_features=out_features,
                                            bias=bias)
    self.dropout = dropout

  def sample_mask(self):
    if self.dropout == 0.:
      self._weight = self.weight
    else:
      mask = self.weight.new_empty(self.weight.size(),
                                   dtype=torch.bool)
      mask.bernoulli_(self.dropout)
      self._weight = self.weight.masked_fill(mask, 0.)

  def forward(self, input, sample_mask=False):
    if self.training:
      if sample_mask:
        self.sample_mask()
      return F.linear(input, self._weight, self.bias)
    else:
      return F.linear(input, self.weight * (1 - self.dropout),
                      self.bias)

class PFN(pl.LightningModule):
  def __init__(self, max_length = 105, hidden_size = 768, cell_size = 300):
    super().__init__()
    self.save_hyperparameters()
    # attributes
    self.rel2id = read_rels()
    self.rel_num = len(self.rel2id)
    self.id2rel = decode_rels(self.rel2id)
    print("Dict preparation done")
    self.max_length = max_length
    self.hidden_size = hidden_size
    self.cell_size = cell_size
    self.trained = False
    self.epoch = 0
    # layers
    self.embedding = AutoModel.from_pretrained("bert-base-cased")
    self.tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
    self.linear_cell_candidate = nn.Linear(hidden_size, 5*cell_size)
    self.linear_drop_connect = LinearDropConnect(cell_size, 5*cell_size, bias=True, dropout=0.1)
    self.transform = nn.Linear(cell_size*3, cell_size)
    self.global_ner = nn.Linear(2*cell_size, cell_size)
    self.global_re = nn.Linear(2*cell_size, cell_size)
    self.ner = nn.Linear(3*cell_size, 1)
    self.rel1 = nn.Linear(3*cell_size, cell_size)
    self.rel2 = nn.Linear(cell_size, self.rel_num)
    self.dropout = nn.Dropout(0.1)
    self.layer_norm = nn.LayerNorm(cell_size)
    # activations
    self.tanh = nn.Tanh()
    self.sigmoid = nn.Sigmoid()
    self.softmax = nn.Softmax(dim=-1)
    self.elu = nn.ELU()
    # loss
    self.loss = nn.BCELoss(reduction="sum")
    # metrics
    self.custom_metrics = CustomMetricsPFN()
    self.custom_metrics = self.custom_metrics.set_dtype(torch.float32)
    self.f1_score = torchmetrics.F1Score(task="binary")

  def reset_hidden(self, batch_size):
    h = torch.zeros(batch_size, self.cell_size).requires_grad_(False).to(self.device)
    c = torch.zeros(batch_size, self.cell_size).requires_grad_(False).to(self.device)
    return h, c

  def recurrent(self, input, h_prev, c_prev):
    gates = self.linear_cell_candidate(input) + self.linear_drop_connect(h_prev)
    #combined = torch.cat([input, h_prev], dim=-1)
    #cell = self.tanh(self.linear_cell_candidate(combined))
    cell, ent_gate_prev, rel_gate_prev, ent_gate_curr, rel_gate_curr = gates.chunk(5, -1)
    cell = self.tanh(cell)
    ent_gate_prev = torch.cumsum(self.softmax(ent_gate_prev), dim=-1)
    rel_gate_prev = 1 - torch.cumsum(self.softmax(rel_gate_prev), dim=-1)
    ent_gate_curr = torch.cumsum(self.softmax(ent_gate_curr), dim=-1)
    rel_gate_curr = 1 - torch.cumsum(self.softmax(rel_gate_curr), dim=-1)
    sha_part_prev = ent_gate_prev * rel_gate_prev
    rel_part_prev = rel_gate_prev - sha_part_prev
    ent_part_prev = ent_gate_prev - sha_part_prev
    sha_part_curr = ent_gate_curr * rel_gate_curr
    rel_part_curr = rel_gate_curr - sha_part_curr
    ent_part_curr = ent_gate_curr - sha_part_curr
    ent_part = ent_part_prev * c_prev + ent_part_curr * cell
    rel_part = rel_part_prev * c_prev + rel_part_curr * cell
    sha_part = sha_part_prev * c_prev + sha_part_curr * cell
    ent_mem = ent_part + sha_part
    rel_mem = rel_part + sha_part
    sha_mem = sha_part
    cell = torch.cat([rel_mem, ent_mem, sha_mem], dim=-1)
    ent_mem = self.tanh(ent_mem)
    rel_mem = self.tanh(rel_mem)
    sha_mem = self.tanh(sha_mem)
    cell = self.transform(cell)
    hid = self.tanh(cell)

    return hid, cell, ent_mem, rel_mem, sha_mem

  def forward(self, sentence, mask):
    batch_size = sentence.size(0)

    # Embedding
    #with torch.no_grad():
    embedded = self.embedding(sentence, mask)[0]
    embedded = self.dropout(embedded)
    entity_partition, relation_partition, shared_partition = [], [], []

    # Reset for recurrent layer
    h_prev, c_prev = self.reset_hidden(batch_size)

    if self.training:
      self.linear_drop_connect.sample_mask()

    #print("EMBEDDED SIZE: ", embedded.size())

    embedded = embedded.transpose(0,1)

    # Recurrent loop
    for i in range(self.max_length):
      h_prev, c_prev, ent, rel, sha = self.recurrent(embedded[i,:,:], h_prev, c_prev)
      entity_partition.append(ent)
      relation_partition.append(rel)
      shared_partition.append(sha)

    # Stacking partition [batch, length, feat]
    entity_partition = torch.stack(entity_partition, dim=0).transpose(0,1)
    relation_partition = torch.stack(relation_partition, dim=0).transpose(0,1)
    shared_partition = torch.stack(shared_partition, dim=0).transpose(0,1)
    #print("PARTITION SIZE: ", entity_partition.size())

    # Global max pooling
    global_e = self.tanh(self.global_ner(torch.cat([shared_partition, entity_partition], dim=-1)))
    global_r = self.tanh(self.global_re(torch.cat([shared_partition, relation_partition], dim=-1)))
    #global_e = global_e.transpose(1,2)
    #global_r = global_r.transpose(1,2)
    global_e = torch.max(global_e, dim=1)[0] # ex dim=2
    global_r = torch.max(global_r, dim=1)[0] # ex dim=2
    global_e = global_e.unsqueeze(1).repeat(1, self.max_length, 1)
    global_e = global_e.unsqueeze(1).repeat(1, self.max_length, 1, 1)
    #print("GLOBAL_E SIZE: ", global_e.size())

    # NER prediction
    ner_start = entity_partition.unsqueeze(1).repeat(1, self.max_length, 1, 1)
    ner_end = entity_partition.unsqueeze(2).repeat(1, 1, self.max_length, 1)
    #print("NER_START: ", ner_start.size())
    ner_output = torch.cat([ner_start, ner_end, global_e], dim=-1)
    #ner_output = torch.cat([entity_partition, global_e], dim=-1)
    ner_output = self.sigmoid(self.ner(ner_output))
    ner_output = ner_output.squeeze()
    #print("NER_OUTPUT: ", ner_output.size())

    # NER masking
    diag_mask = torch.triu(torch.ones(batch_size, self.max_length, self.max_length)).to(self.device)
    #m = mask.unsqueeze(-1).repeat(1,1,self.max_length).transpose(1,2)
    #diag_mask = diag_mask.permute(1, 2, 0)
    ner_mask = mask
    mask_start = ner_mask.unsqueeze(1).repeat(1, self.max_length, 1)
    mask_end = ner_mask.unsqueeze(2).repeat(1, 1, self.max_length)
    ner_mask = mask_start * mask_end
    ner_mask = diag_mask * ner_mask
    #print("MASK_NER SIZE: ", ner_mask.size())
    #ner_mask = ner_mask*m
    ner_output = ner_output*ner_mask

    # RE prediction
    global_r = global_r.unsqueeze(1).repeat(1, self.max_length, 1)
    global_r = global_r.unsqueeze(1).repeat(1, self.max_length, 1, 1)
    #global_r = global_r.unsqueeze(1) # da fare un'altra volta
    #global_r = global_r.repeat(1, self.max_length, self.max_length, 1)
    r1 = relation_partition.unsqueeze(1).repeat(1, self.max_length, 1, 1)
    r2 = relation_partition.unsqueeze(2).repeat(1, 1, self.max_length, 1)
    #print("R1_SIZE: ", r1.size())
    #relation_partition = relation_partition.unsqueeze(2)
    #relation_partition = relation_partition.repeat(1, 1, self.max_length, 1)
    re_output = torch.cat([r1, r2, global_r], dim=-1)
    re_output = self.layer_norm(self.rel1(re_output))
    re_output = self.elu(self.dropout(re_output))
    re_output = self.sigmoid(self.rel2(re_output))
    #print("RE_OUTPUT SIZE: ", re_output.size())

    # RE masking
    mask = mask.unsqueeze(-1).repeat(1, 1, self.rel_num)
    mask_1 = mask.unsqueeze(1).repeat(1, self.max_length, 1, 1)
    mask_2 = mask.unsqueeze(2).repeat(1, 1, self.max_length, 1)
    re_mask = mask_1 * mask_2
    re_output = re_output*re_mask

    return ner_output, re_output

  def configure_optimizers(self, lr=2e-5):
    optimizer = torch.optim.Adam(self.parameters(), lr=lr)
    return optimizer

  def training_step(self, train_batch, batch_idx):
    sentence, mask, entities, relations, entity_labels, relation_labels = train_batch
    ner_output, re_output = self(sentence, mask)
    ner_loss = self.loss(ner_output.reshape(-1), entity_labels.reshape(-1))/self.max_length
    #ner_f1 = self.f1_score(ner_output, entity_labels)
    re_loss = self.loss(re_output.reshape(-1), relation_labels.reshape(-1))/self.max_length
    #re_f1 = self.f1_score(re_output, relation_labels)
    if self.epoch > 5:
        loss = ner_loss + re_loss*(0.05*self.epoch)
        self.log('train_re_loss', re_loss, prog_bar=True, logger=True,
                 on_step=True, on_epoch=True)
        #self.log('train_re_f1', re_f1, prog_bar=True, logger=True,
        #         on_step=True, on_epoch=True)
    else:
        loss = ner_loss
    self.log('train_loss', loss, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    self.log('train_ner_loss', ner_loss, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    #self.log('train_ner_f1', ner_f1, prog_bar=True, logger=True,
    #         on_step=True, on_epoch=True)
    return loss

  def validation_step(self, val_batch, batch_idx):
    sentence, mask, entities, relations, entity_labels, relation_labels = val_batch
    ner_output, re_output = self(sentence, mask)
    ner_loss = self.loss(ner_output.reshape(-1), entity_labels.reshape(-1))/self.max_length
    ner_f1 = self.f1_score(ner_output, entity_labels)
    re_loss = self.loss(re_output.reshape(-1), relation_labels.reshape(-1))/self.max_length
    re_f1 = self.f1_score(re_output, relation_labels)
    if self.epoch > 5:
        loss = ner_loss + re_loss
        self.log('val_re_loss', re_loss, prog_bar=True, logger=True,
                 on_step=True, on_epoch=True)
        self.log('val_re_f1', re_f1, prog_bar=True, logger=True,
                 on_step=True, on_epoch=True)
        self.custom_metrics.update(ner_output, re_output, entity_labels, relation_labels)
    else:
        loss = ner_loss
    self.log('val_loss', loss, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    self.log('val_ner_loss', ner_loss, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    self.log('val_ner_f1', ner_f1, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    return loss

  def on_validation_epoch_end(self):
    self.epoch +=1
    if self.epoch>5:
        precision, recall, f1 = self.custom_metrics.compute()
        self.log_dict({"precision": precision, "recall": recall, "f1": f1}, prog_bar=True,
                  logger=True, on_step=False, on_epoch=True)
        self.custom_metrics.reset()

  def test_step(self, test_batch, batch_idx):
    sentence, mask, entities, relations, entity_labels, relation_labels = test_batch
    ner_output, re_output = self(sentence, mask)
    self.custom_metrics.update(ner_output, re_output, entity_labels, relation_labels)

  def on_test_epoch_end(self):
    precision, recall, f1 = self.custom_metrics.compute()
    self.log_dict({"precision": precision, "recall": recall, "f1": f1}, prog_bar=True,
                  logger=True, on_step=False, on_epoch=True)
    self.custom_metrics.reset()

  #def prepare_data(self):

  def setup(self, stage: str):
    print("Setup..")
    if stage == "fit" or None:
      sentences, relations = read_file("train")
      self.dataset_train = RelDataset(sentences, relations,
                                      self.tokenizer, self.rel2id, sota=True)
      sentences, relations = read_file("dev")
      self.dataset_dev = RelDataset(sentences, relations,
                                    self.tokenizer, self.rel2id, sota=True)
      self.trained = True

    if stage == "validate" and not self.trained:
      sentences, relations = read_file("dev")
      self.dataset_dev = RelDataset(sentences, relations,
                                    self.tokenizer, self.rel2id, sota=True)

    if stage == "test":
      sentences, relations = read_file("test")
      self.dataset_test = RelDataset(sentences, relations,
                                     self.tokenizer, self.rel2id, sota=True)
    print("Setup done")

  def train_dataloader(self):
    return DataLoader(self.dataset_train, batch_size=32,
                      collate_fn=custom_collate_sota, shuffle=True, drop_last=True)

  def val_dataloader(self):
    return DataLoader(self.dataset_dev, batch_size=32,
                      collate_fn=custom_collate_sota, shuffle=False, drop_last=True)

  def test_dataloader(self):
    return DataLoader(self.dataset_test, batch_size=32,
                      collate_fn=custom_collate_sota, shuffle=False, drop_last=True)

In [9]:
model = PFN()
trainer = pl.Trainer(max_epochs=100, accelerator="auto", devices=1,
                     num_sanity_val_steps=0, default_root_dir="checkpoints/PFN/", gradient_clip_val=0.25)

Dict preparation done


Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
#trainer.fit(model=model)

In [10]:
trainer.test(model=model, ckpt_path="checkpoints/PFN/epoch=51-step=8112.ckpt")

Setup..
Reading test sentences
Reading test finished


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at checkpoints/PFN/epoch=51-step=8112.ckpt


Setup done


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at checkpoints/PFN/epoch=51-step=8112.ckpt


Testing: 0it [00:00, ?it/s]

[{'precision': 0.9186875820159912,
  'recall': 0.879180908203125,
  'f1': 0.8985002040863037}]

# GRU Model (Baseline 1)

In [11]:
class GRU_PFN(pl.LightningModule):
  def __init__(self, max_length = 105, hidden_size = 768, cell_size = 300):
    super().__init__()
    self.save_hyperparameters()
    # attributes
    self.rel2id = read_rels()
    self.rel_num = len(self.rel2id)
    self.id2rel = decode_rels(self.rel2id)
    print("Dict preparation done")
    self.max_length = max_length
    self.hidden_size = hidden_size
    self.cell_size = cell_size
    self.trained = False
    self.epoch = 0
    # layers
    self.embedding = AutoModel.from_pretrained("bert-base-cased")
    self.tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
    self.gru = nn.GRU(input_size=hidden_size,
                      hidden_size=3*cell_size, batch_first=True, bidirectional=True)
    self.global_ner = nn.Linear(2*cell_size, cell_size)
    self.global_re = nn.Linear(2*cell_size, cell_size)
    self.ner = nn.Linear(3*cell_size, 1)
    self.rel1 = nn.Linear(3*cell_size, cell_size)
    self.rel2 = nn.Linear(cell_size, self.rel_num)
    self.dropout = nn.Dropout(0.1)
    self.layer_norm = nn.LayerNorm(cell_size)
    # activations
    self.tanh = nn.Tanh()
    self.sigmoid = nn.Sigmoid()
    self.softmax = nn.Softmax(dim=-1)
    self.elu = nn.ELU()
    # loss
    self.loss = nn.BCELoss(reduction="sum")
    # metrics
    self.custom_metrics = CustomMetricsPFN()
    self.custom_metrics = self.custom_metrics.set_dtype(torch.float32)
    self.f1_score = torchmetrics.F1Score(task="binary")

  def forward(self, sentence, mask):
    batch_size = sentence.size(0)

    # Embedding
    embedded = self.embedding(sentence, mask)[0]
    if self.training:
      embedded = self.dropout(embedded)

    # Bidirectional Gated Recurrent Unit
    output, _ = self.gru(embedded)
    forward = output[:,:,:3*self.cell_size]
    reverse = output[:,:,3*self.cell_size:]
    output = forward + reverse

    # Creating partition inspired from SOTA
    entity_partition = output[:,:,:self.cell_size]
    shared_partition = output[:,:,self.cell_size:self.cell_size*2]
    relation_partition = output[:,:,self.cell_size*2:]

    # Global max pooling
    global_e = self.tanh(self.global_ner(torch.cat([shared_partition, entity_partition], dim=-1)))
    global_r = self.tanh(self.global_re(torch.cat([shared_partition, relation_partition], dim=-1)))
    global_e = torch.max(global_e, dim=1)[0]
    global_r = torch.max(global_r, dim=1)[0]
    global_e = global_e.unsqueeze(1).repeat(1, self.max_length, 1)
    global_e = global_e.unsqueeze(1).repeat(1, self.max_length, 1, 1)
    #print("GLOBAL_E SIZE: ", global_e.size())

    # NER prediction
    ner_start = entity_partition.unsqueeze(1).repeat(1, self.max_length, 1, 1)
    ner_end = entity_partition.unsqueeze(2).repeat(1, 1, self.max_length, 1)
    #print("NER_START: ", ner_start.size())
    ner_output = torch.cat([ner_start, ner_end, global_e], dim=-1)
    ner_output = self.sigmoid(self.ner(ner_output))
    ner_output = ner_output.squeeze()
    #print("NER_OUTPUT: ", ner_output.size())

    # NER masking
    diag_mask = torch.triu(torch.ones(batch_size, self.max_length, self.max_length)).to(self.device)
    ner_mask = mask
    mask_start = ner_mask.unsqueeze(1).repeat(1, self.max_length, 1)
    mask_end = ner_mask.unsqueeze(2).repeat(1, 1, self.max_length)
    ner_mask = mask_start * mask_end
    ner_mask = diag_mask * ner_mask
    #print("MASK_NER SIZE: ", ner_mask.size())
    ner_output = ner_output*ner_mask

    # RE prediction
    global_r = global_r.unsqueeze(1).repeat(1, self.max_length, 1)
    global_r = global_r.unsqueeze(1).repeat(1, self.max_length, 1, 1)
    r1 = relation_partition.unsqueeze(1).repeat(1, self.max_length, 1, 1)
    r2 = relation_partition.unsqueeze(2).repeat(1, 1, self.max_length, 1)
    #print("R1_SIZE: ", r1.size())
    re_output = torch.cat([r1, r2, global_r], dim=-1)
    re_output = self.layer_norm(self.rel1(re_output))
    re_output = self.elu(self.dropout(re_output))
    re_output = self.sigmoid(self.rel2(re_output))
    #print("RE_OUTPUT SIZE: ", re_output.size())

    # RE masking
    mask = mask.unsqueeze(-1).repeat(1, 1, self.rel_num)
    mask_1 = mask.unsqueeze(1).repeat(1, self.max_length, 1, 1)
    mask_2 = mask.unsqueeze(2).repeat(1, 1, self.max_length, 1)
    re_mask = mask_1 * mask_2
    re_output = re_output*re_mask

    return ner_output, re_output

  def configure_optimizers(self, lr=2e-5):
    optimizer = torch.optim.Adam(self.parameters(), lr=lr)
    return optimizer

  def training_step(self, train_batch, batch_idx):
    sentence, mask, entities, relations, entity_labels, relation_labels = train_batch
    ner_output, re_output = self(sentence, mask)
    ner_loss = self.loss(ner_output.reshape(-1), entity_labels.reshape(-1))/self.max_length
    #ner_f1 = self.f1_score(ner_output, entity_labels)
    re_loss = self.loss(re_output.reshape(-1), relation_labels.reshape(-1))/self.max_length
    #re_f1 = self.f1_score(re_output, relation_labels)
    if self.epoch > 5:
        loss = ner_loss + re_loss*(0.05*self.epoch)
        self.log('train_re_loss', re_loss, prog_bar=True, logger=True,
                 on_step=True, on_epoch=True)
        #self.log('train_re_f1', re_f1, prog_bar=True, logger=True,
        #         on_step=True, on_epoch=True)
    else:
        loss = ner_loss
    self.log('train_loss', loss, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    self.log('train_ner_loss', ner_loss, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    #self.log('train_ner_f1', ner_f1, prog_bar=True, logger=True,
    #         on_step=True, on_epoch=True)
    return loss

  def validation_step(self, val_batch, batch_idx):
    sentence, mask, entities, relations, entity_labels, relation_labels = val_batch
    ner_output, re_output = self(sentence, mask)
    ner_loss = self.loss(ner_output.reshape(-1), entity_labels.reshape(-1))/self.max_length
    ner_f1 = self.f1_score(ner_output, entity_labels)
    re_loss = self.loss(re_output.reshape(-1), relation_labels.reshape(-1))/self.max_length
    re_f1 = self.f1_score(re_output, relation_labels)
    if self.epoch > 5:
        loss = ner_loss + re_loss
        self.log('val_re_loss', re_loss, prog_bar=True, logger=True,
                 on_step=True, on_epoch=True)
        self.log('val_re_f1', re_f1, prog_bar=True, logger=True,
                 on_step=True, on_epoch=True)
        self.custom_metrics.update(ner_output, re_output, entity_labels, relation_labels)
    else:
        loss = ner_loss
    self.log('val_loss', loss, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    self.log('val_ner_loss', ner_loss, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    self.log('val_ner_f1', ner_f1, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    return loss

  def on_validation_epoch_end(self):
    self.epoch +=1
    if self.epoch>5:
        precision, recall, f1 = self.custom_metrics.compute()
        self.log_dict({"precision": precision, "recall": recall, "f1": f1}, prog_bar=True,
                  logger=True, on_step=False, on_epoch=True)
        self.custom_metrics.reset()

  def test_step(self, test_batch, batch_idx):
    sentence, mask, entities, relations, entity_labels, relation_labels = test_batch
    ner_output, re_output = self(sentence, mask)
    self.custom_metrics.update(ner_output, re_output, entity_labels, relation_labels)

  def on_test_epoch_end(self):
    precision, recall, f1 = self.custom_metrics.compute()
    self.log_dict({"precision": precision, "recall": recall, "f1": f1}, prog_bar=True,
                  logger=True, on_step=False, on_epoch=True)
    self.custom_metrics.reset()

  #def prepare_data(self):

  def setup(self, stage: str):
    print("Setup..")
    if stage == "fit" or None:
      sentences, relations = read_file("train")
      self.dataset_train = RelDataset(sentences, relations,
                                      self.tokenizer, self.rel2id, sota=True)
      sentences, relations = read_file("dev")
      self.dataset_dev = RelDataset(sentences, relations,
                                    self.tokenizer, self.rel2id, sota=True)
      self.trained = True

    if stage == "validate" and not self.trained:
      sentences, relations = read_file("dev")
      self.dataset_dev = RelDataset(sentences, relations,
                                    self.tokenizer, self.rel2id, sota=True)

    if stage == "test":
      sentences, relations = read_file("test")
      self.dataset_test = RelDataset(sentences, relations,
                                     self.tokenizer, self.rel2id, sota=True)

    print("Setup done")

  def train_dataloader(self):
    return DataLoader(self.dataset_train, batch_size=32,
                      collate_fn=custom_collate_sota, shuffle=True, drop_last=True)

  def val_dataloader(self):
    return DataLoader(self.dataset_dev, batch_size=32,
                      collate_fn=custom_collate_sota, shuffle=False, drop_last=True)

  def test_dataloader(self):
    return DataLoader(self.dataset_test, batch_size=32,
                      collate_fn=custom_collate_sota, shuffle=False, drop_last=True)

In [12]:
model = GRU_PFN()
trainer = pl.Trainer(max_epochs=100, accelerator="auto", devices=1,
                     num_sanity_val_steps=0, default_root_dir="checkpoints/GRU", gradient_clip_val=0.25)

Dict preparation done


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
#trainer.fit(model=model)

In [13]:
trainer.test(model=model, ckpt_path="checkpoints/GRU/epoch=36-step=5772.ckpt")



Setup..
Reading test sentences
Reading test finished


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at checkpoints/GRU/epoch=36-step=5772.ckpt


Setup done


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at checkpoints/GRU/epoch=36-step=5772.ckpt


Testing: 0it [00:00, ?it/s]

[{'precision': 0.9319334030151367,
  'recall': 0.8784983158111572,
  'f1': 0.9044272899627686}]

# LSTM Model (Baseline 2)

In [14]:
class BiLSTMNet(pl.LightningModule):
  def __init__(self, max_len = 105):
    super(BiLSTMNet, self).__init__()
    self.save_hyperparameters()
    # attributes
    self.rel2id = read_rels()
    self.rel_num = len(self.rel2id)
    self.id2rel = decode_rels(self.rel2id)
    print("Dict preparation done")
    self.trained = False
    self.max_len = max_len
    # layers
    #self.embedding = AutoModel.from_pretrained("prajjwal1/bert-tiny")
    #self.tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-tiny")
    self.embedding = AutoModel.from_pretrained("bert-base-cased")
    self.tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
    self.lstm_subject = nn.LSTM(768, self.rel_num, 1, batch_first=True, bidirectional=True) # 768 if bert-base, 128 if tiny-bert
    self.lstm_object = nn.LSTM(768, self.rel_num, 1, batch_first=True, bidirectional=True)
    self.drop = nn.Dropout(p=0.2)
    self.linear_subject = nn.Linear(max_len, 1)
    self.linear_object = nn.Linear(max_len, 1)
    # activations
    self.tanh = nn.Tanh()
    # loss
    self.criterion = nn.BCEWithLogitsLoss()
    # metrics
    self.custom_metrics = CustomMetrics()
    self.custom_metrics = self.custom_metrics.set_dtype(torch.float32)
    self.f1_score = torchmetrics.F1Score(task="binary")

  def configure_optimizers(self, lr=2e-5):
    optimizer = torch.optim.Adam(self.parameters(), lr=lr)
    return optimizer

  def entity_features(self, sentence, entities):
    featured = []
    for b in range(len(entities)): #16 i.e. batch_size
      text = sentence[b]
      for i in range(len(entities[b])):
        e = torch.tensor(entities[b][i], device=self.device)#.to(device)
        e = torch.unsqueeze(e, dim=0)
        e = self.embedding(e)[0]
        e = torch.sum(e, 1)/e.size(1)
        e = text + e
        featured.append(e)
    featured = torch.stack(featured, dim=0)
    return featured

  def input_linear(self, input):
    forward = input[:,:,:self.rel_num]
    reverse = input[:,:,self.rel_num:]
    # SUM OR MEAN?
    #return (forward + reverse) / 2
    return forward + reverse

  def forward(self, sentence, mask, entities):
    embed_sentence = self.embedding(sentence, mask)[0]
    sentence_features = self.entity_features(embed_sentence, entities)
    s_output, _ = self.lstm_subject(sentence_features)
    o_output, _ = self.lstm_object(sentence_features)
    s_output = self.input_linear(s_output)
    o_output = self.input_linear(o_output)
    s_output = s_output.transpose(1,2)
    o_output = o_output.transpose(1,2)
    s_output = self.tanh(s_output)
    o_output = self.tanh(o_output)
    s_output = self.drop(s_output)
    o_output = self.drop(o_output)
    s_output = self.linear_subject(s_output)
    o_output = self.linear_object(o_output)
    s_output = s_output.squeeze(-1)
    o_output = o_output.squeeze(-1)
    return s_output, o_output

  def training_step(self, train_batch, batch_idx):
    sentence, mask, entities, relations, s_labels, o_labels = train_batch
    #sentence = sentence.to(device)
    #mask = mask.to(device)
    s_output, o_output = self(sentence, mask, entities)
    s_labels = torch.vstack(s_labels)#.to(device)
    o_labels = torch.vstack(o_labels)#.to(device)
    s_loss = self.criterion(s_output, s_labels)
    o_loss = self.criterion(o_output, o_labels)
    s_f1 = self.f1_score(s_output, s_labels)
    o_f1 = self.f1_score(o_output, o_labels)
    total_loss = s_loss + o_loss
    self.log('train_loss', total_loss,
             prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    self.log('train_sub_f1', s_f1, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    self.log('train_obj_f1', o_f1, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)

    return total_loss

  def validation_step(self, val_batch, batch_idx):
    sentence, mask, entities, relations, s_labels, o_labels = val_batch
    #sentence = sentence.to(device)
    #mask = mask.to(device)
    s_output, o_output = self(sentence, mask, entities)
    s_labels = torch.vstack(s_labels)#.to(device)
    o_labels = torch.vstack(o_labels)#.to(device)
    s_loss = self.criterion(s_output, s_labels)
    o_loss = self.criterion(o_output, o_labels)
    s_f1 = self.f1_score(s_output, s_labels)
    o_f1 = self.f1_score(o_output, o_labels)
    total_loss = s_loss + o_loss
    self.custom_metrics.update(s_output, o_output, s_labels, o_labels, relations)
    self.log("val_loss", total_loss, prog_bar=True,
             logger=True, on_step=False, on_epoch=True)
    self.log('val_sub_f1', s_f1, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)
    self.log('val_obj_f1', o_f1, prog_bar=True, logger=True,
             on_step=True, on_epoch=True)

  def on_validation_epoch_end(self):
    precision, recall, f1 = self.custom_metrics.compute()
    self.log_dict({"precision": precision, "recall": recall, "f1": f1},
                  prog_bar=True, logger=True, on_step=False, on_epoch=True)
    self.custom_metrics.reset()

  def test_step(self, test_batch, batch_idx):
    sentence, mask, entities, relations, s_labels, o_labels = test_batch
    #sentence = sentence.to(device)
    #mask = mask.to(device)
    s_output, o_output = self(sentence, mask, entities)
    s_labels = torch.vstack(s_labels)#.to(device)
    o_labels = torch.vstack(o_labels)#.to(device)
    self.custom_metrics.update(s_output, o_output, s_labels, o_labels, relations)

  def on_test_epoch_end(self):
    precision, recall, f1 = self.custom_metrics.compute()
    self.log_dict({"precision": precision, "recall": recall, "f1": f1},
                  prog_bar=True, logger=True, on_step=False, on_epoch=True)
    self.custom_metrics.reset()

  #def prepare_data(self):

  def setup(self, stage=None):
    # Assign train/val datasets for use in dataloaders
    if stage == "fit" or None:
      sentences, relations = read_file("train")
      self.dataset_train = RelDataset(sentences, relations, self.tokenizer, self.rel2id)
      sentences, relations = read_file("dev")
      self.dataset_dev = RelDataset(sentences, relations, self.tokenizer, self.rel2id)
      self.trained = True

    if stage == "validate" and not self.trained:
      sentences, relations = read_file("dev")
      self.dataset_dev = RelDataset(sentences, relations, self.tokenizer, self.rel2id)

    if stage == "test":
      sentences, relations = read_file("dev")
      self.dataset_test = RelDataset(sentences, relations, self.tokenizer, self.rel2id)

  def train_dataloader(self):
    return DataLoader(self.dataset_train, batch_size=32, shuffle=True,
                      collate_fn=custom_collate, drop_last=True)

  def val_dataloader(self):
    return DataLoader(self.dataset_dev, batch_size=1, shuffle=False,
                      collate_fn=custom_collate, drop_last=True)

  def test_dataloader(self):
    return DataLoader(self.dataset_test, batch_size=1, shuffle=False,
                      collate_fn=custom_collate, drop_last=True)

In [15]:
model = BiLSTMNet()
trainer = pl.Trainer(max_epochs=100, accelerator="auto", devices=1,
                     num_sanity_val_steps=0, default_root_dir="checkpoints/LSTM")

Dict preparation done


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
#trainer.fit(model=model)

In [16]:
trainer.test(model=model, ckpt_path="checkpoints/LSTM/epoch=50-step=7956.ckpt")



Reading dev sentences
Reading dev finished


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at checkpoints/LSTM/epoch=50-step=7956.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at checkpoints/LSTM/epoch=50-step=7956.ckpt


Testing: 0it [00:00, ?it/s]

[{'precision': 0.9073724150657654,
  'recall': 0.874316930770874,
  'f1': 0.8905380368232727}]

# Inference examples

In [17]:
def infer_lstm(model, sentence, entities):
  tokenized = model.tokenizer(sentence, max_length=105, padding='max_length', return_tensors='pt')
  text = tokenized.input_ids.to(model.device)
  mask = tokenized.attention_mask.to(model.device)
  for i in range(len(entities)):
    entities[i] = torch.tensor(model.tokenizer(entities[i], add_special_tokens=False).input_ids)
  s_output, o_output = model(text, mask, [entities])
  trips = triplets(s_output, o_output, model.id2rel, model.tokenizer, entities)
  print(trips)

def infer(model, sentence):
  tokenized = model.tokenizer(sentence, max_length=105, padding='max_length', return_tensors='pt')
  text = tokenized.input_ids.to(model.device)
  mask = tokenized.attention_mask.to(model.device)
  ner_output, re_output = model(text, mask)
  ner_output = (ner_output>0.5).long()
  re_output = (re_output>0.5).long()
  entities = ner_output[0].nonzero()
  relations = re_output[0].nonzero()
  words = []
  starts = []
  ends = []
  for e in entities:
    word = model.tokenizer.decode(text[0, e[0]:e[1]+1])
    words.append(word)
    starts.append(e[0])
    ends.append(e[1])
  print("Entities detected: ", words)
  triplets = []
  for r in relations:
    if r[0] in starts and r[1] in starts:
      idx = starts.index(r[0])
      sub = model.tokenizer.decode(text[0, r[0]:ends[idx]+1])
      idx = starts.index(r[1])
      obj = model.tokenizer.decode(text[0, r[1]:ends[idx]+1])
      rel = model.id2rel[r[2].item()]
      triplets.append((sub, rel, obj))
  print("Triplets detected: ", triplets)

In [22]:
model = PFN.load_from_checkpoint("checkpoints/PFN/epoch=51-step=8112.ckpt")#, map_location="cpu")
model.eval()

Dict preparation done


PFN(
  (embedding): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=T

In [23]:
sentence = "Elliot See attended the University of Texas in Austin . He became a test pilot . He died in St. Louis on 1966-02-28 ."
infer(model, sentence)
print("-------------------")
sentence = "Andrea is a pilot of Apollo 12, Alan Shepard was born in USA"
infer(model, sentence)
print("-------------------")
sentence = "Andrea is the best student in Italy, he is the leader of Deep Learning and will be the pilot of Engineering"
infer(model, sentence)

Entities detected:  ['See', 'Austin', 'pilot', 'Louis']
Triplets detected:  [('See', 'almaMater', 'Austin'), ('See', 'occupation', 'pilot'), ('See', 'deathPlace', 'Louis')]
-------------------
Entities detected:  ['pilot', '12', 'Shepard']
Triplets detected:  [('Shepard', 'was a crew member of', '12')]
-------------------
Entities detected:  ['Andrea', 'Italy', 'Learning']
Triplets detected:  [('Learning', 'country', 'Italy')]


In [24]:
model = GRU_PFN.load_from_checkpoint("checkpoints/GRU/epoch=36-step=5772.ckpt")#, map_location="cpu")
model.eval()

Dict preparation done


GRU_PFN(
  (embedding): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affi

In [25]:
sentence = "Elliot See attended the University of Texas in Austin . He became a test pilot . He died in St. Louis on 1966-02-28 ."
infer(model, sentence)
print("-------------------")
sentence = "Andrea is a pilot of Apollo 12, Alan Shepard was born in USA"
infer(model, sentence)
print("-------------------")
sentence = "Andrea is the best student in Italy, he is the leader of Deep Learning and will be the pilot of Engineering"
infer(model, sentence)

Entities detected:  ['See', 'Austin', 'pilot', 'Louis']
Triplets detected:  [('See', 'almaMater', 'Austin'), ('See', 'occupation', 'pilot'), ('See', 'deathPlace', 'Louis')]
-------------------
Entities detected:  ['pilot', '12', 'Shepard']
Triplets detected:  [('Shepard', 'occupation', 'pilot'), ('Shepard', 'was a crew member of', '12')]
-------------------
Entities detected:  ['Andrea', 'Italy', 'Learning']
Triplets detected:  []


In [26]:
model  = BiLSTMNet.load_from_checkpoint("checkpoints/LSTM/epoch=50-step=7956.ckpt")#, map_location="cpu")
model.eval()

Dict preparation done


BiLSTMNet(
  (embedding): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_af

In [27]:
sentence = "Elliot See attended the University of Texas in Austin . He became a test pilot . He died in St. Louis on 1966-02-28 ."
entities = ["See", "Austin", "Louis", "pilot"]
infer_lstm(model, sentence, entities)
print("-------------------")
sentence = "Andrea is a pilot of Apollo 12, Alan Shepard was born in USA"
entities = ["Andrea", "12", "Shepard", "USA"]
infer_lstm(model, sentence, entities)
print("-------------------")
sentence = "Andrea is a pilot of Apollo 12, Alan Shepard was born in USA"
entities = ["Andrea", "Apollo 12", "Alan Shepard", "USA"]
infer_lstm(model, sentence, entities)
print("-------------------")
sentence = "Andrea is the best student in Italy, he is the leader of Deep Learning and will be the pilot of Engineering"
entities = ["Andrea", "Italy", "Deep Learning", "Engineering"]
infer_lstm(model, sentence, entities)

  e = torch.tensor(entities[b][i], device=self.device)#.to(device)


[('See', 'occupation', 'pilot'), ('See', 'deathPlace', 'Louis')]
-------------------
[('Shepard', 'was a crew member of', '12')]
-------------------
[]
-------------------
[('Italy', 'leaderName', 'Andrea')]
