In [None]:
%matplotlib inline

In [None]:
# Author: Qingzhou Li and Leo Zhang based on https://pytorch.org/tutorials/beginner/nlp/advanced_tutorial.html by Robert Guthrie

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import json
from sklearn.model_selection import train_test_split
torch.manual_seed(1)
from tqdm import tqdm
import pickle

Helper functions to make the code more readable.



In [None]:
def argmax(vec):
    # return the argmax as a python int
    _, idx = torch.max(vec, 1)
    return idx.item()


def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)


In [None]:
# Import the trainning data
training_entity = pd.read_csv('entity.csv')
with open("sents_dict.txt", "r") as fp:
  sents_dict = json.load(fp)
with open("relation.txt", "r") as fp:
  relation_data = json.load(fp)


In [None]:
# Format the trainning data with IOB tagging
training_data = []
for key in sents_dict:
  iob = ["O"] * len(sents_dict[key].split())
  current_table = training_entity[training_entity["sent_num"] == key]
  for index, row in current_table.iterrows():
    start = row['start']
    end = row['end']
    concept = row['concept']
    if start == (end - 1):
      iob = iob[:start] + ['B-'+concept] + iob[end:]
    else:
      num_i = end - start - 1
      iob = iob[:start] + ['B-'+concept] + ['I-'+concept] * num_i + iob[end:]
  ner = [sents_dict[key].split(),iob]
  training_data.append(('ner', ner))
for i in relation_data:
  training_data.append(('re',i))

In [None]:
# Import the gold standard data
test_entity = pd.read_csv('test_entity.csv')
with open("test_sents_dict.txt", "r") as fp:
  test_sents_dict = json.load(fp)
with open("test_relation.txt", "r") as fp:
  test_relation = json.load(fp)

In [None]:
# Format the test data with IOB tagging
test_data = []
test_ner = []
test_re = []
for key in test_sents_dict:
  iob = ["O"] * len(test_sents_dict[key].split())
  current_table = test_entity[test_entity["sent_num"] == key]
  for index, row in current_table.iterrows():
    start = row['start']
    end = row['end']
    concept = row['concept']
    if start == (end - 1):
      iob = iob[:start] + ['B-'+concept] + iob[end:]
    else:
      num_i = end - start - 1
      iob = iob[:start] + ['B-'+concept] + ['I-'+concept] * num_i + iob[end:]
  ner = [test_sents_dict[key].split(),iob]
  test_ner.append((key,test_sents_dict[key].split(),iob))
  test_data.append(('ner', ner))
for i in test_relation:
  test_re.append(i)
  test_data.append(('re',i))

In [None]:
train, val = train_test_split(training_data, test_size=0.2, random_state=1234)

## Bi-LSTM for joint training

Create model



In [None]:
class BiLSTM(nn.Module):

    def __init__(self, vocab_size, tag_to_ix,relation_to_ix, embedding_dim, hidden_dim):
        super(BiLSTM, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.tag_to_ix = tag_to_ix
        self.tagset_size = len(tag_to_ix)
        self.relation_to_ix = relation_to_ix
        self.relation_size = len(relation_to_ix)
        #self.char_embeds = nn.xxxxxxxx
        self.word_embeds = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2,
                            num_layers=1, bidirectional=True)

        # Maps the output of the LSTM into tag space.
        self.hidden2tag = nn.Linear(hidden_dim, self.tagset_size)
        self.hidden2rel = nn.Linear(hidden_dim*2, self.relation_size)


        self.hidden = self.init_hidden()

    def init_hidden(self):
        return (torch.randn(2, 1, self.hidden_dim // 2),
                torch.randn(2, 1, self.hidden_dim // 2))



    def cross_entropy_loss(self, task, input, tags):
        feats = self.forward(task, input)
        loss_fuction = nn.CrossEntropyLoss()
        loss = loss_fuction(feats, tags)
        return loss


    def forward(self, task, input):  # dont confuse this with _forward_alg above.
        # Get the emission scores from the BiLSTM
        sentence = input[0]
        self.hidden = self.init_hidden()
        #print(sentence)
        embeds = self.word_embeds(sentence).view(len(sentence), 1, -1)
        lstm_out, self.hidden = self.lstm(embeds, self.hidden)
        lstm_out = lstm_out.view(len(sentence), self.hidden_dim)
        if task == 'ner':
          lstm_feats = self.hidden2tag(lstm_out)
        if task == 're':
          head, tail = input[1], input[2]
          lstm_head = torch.mean(lstm_out[head[0]:head[1]],dim=0)
          lstm_tail = torch.mean(lstm_out[tail[0]:tail[1]],dim=0)
          #now we concat head and tail embeddings
          lstm_entities = torch.cat([lstm_head,lstm_tail])
          #then we predict relations based on concat embeddings
          lstm_feats = self.hidden2rel(lstm_entities)
        return lstm_feats

    def predict(self, task, input):
        #print(sentence)
        if task == 'ner':
          ix_pred = np.argmax(self.forward(task, input).numpy(), axis=1)
          ix_to_tag = {v:k for k, v in self.tag_to_ix.items()}
          tag_pred = [ix_to_tag[i] for i in ix_pred]
          return tag_pred

In [None]:
class EarlyStopping:
    def __init__(self, tolerance=5, min_delta=0):
        self.tolerance = tolerance
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.tolerance:
                return True
        return False

Run training



In [None]:
START_TAG = "<START>"
STOP_TAG = "<STOP>"
EMBEDDING_DIM = 5
HIDDEN_DIM = 64


train_data = train
val_data = val
word_to_ix = {}
for _, input in training_data:
  if _ == 'ner':
    for word in input[0]:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
  if _ == 're':
    for word in input[1]:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
for _, input in test_data:
  if _ == 'ner':
    for word in input[0]:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
  if _ == 're':
    for word in input[1]:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

tag_to_ix = {"O": 0,  "B-Drug": 1, "B-Route": 2, "B-Reason": 3, "B-Duration": 4, "B-Dosage": 5, "B-Frequency": 6, "B-Strength": 7,
             "B-Form": 8, "B-ADE": 9, "I-Drug": 10, "I-Route": 11, "I-Reason": 12, "I-Duration": 13, "I-Dosage": 14, "I-Frequency": 15,
             "I-Strength": 16, "I-Form": 17, "I-ADE": 18, START_TAG: 19, STOP_TAG: 20}
ix_to_tag = {0: "O", 1: "B-Drug", 2: "B-Route", 3: "B-Reason", 4: "B-Duration", 5: "B-Dosage", 6: "B-Frequency", 7: "B-Strength",
             8: "B-Form", 9: "B-ADE", 10: "I-Drug", 11: "I-Route", 12: "I-Reason", 13: "I-Duration", 14: "I-Dosage", 15: "I-Frequency",
             16: "I-Strength", 17: "I-Form", 18: "I-ADE", 19: START_TAG, 20: STOP_TAG}

relation_to_ix = {START_TAG: 0, STOP_TAG: 1,'Strength-Drug': 2, 'Form-Drug': 3, 'Route-Drug': 4, 'Frequency-Drug': 5, 'Reason-Drug': 6, 'ADE-Drug': 7,
                  'Dosage-Drug': 8, 'Duration-Drug': 9}
ix_to_relation = {0: START_TAG, 1: STOP_TAG, 2: 'Strength-Drug', 3: 'Form-Drug', 4: 'Route-Drug', 5: 'Frequency-Drug', 6: 'Reason-Drug', 7: 'ADE-Drug',
                  8: 'Dosage-Drug', 9: 'Duration-Drug'}


model = BiLSTM(len(word_to_ix), tag_to_ix, relation_to_ix, EMBEDDING_DIM, HIDDEN_DIM)
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-4) #you can change it to dynamic optimisers such as Adam
early_stopping = EarlyStopping(tolerance=10, min_delta=0)
train_loss = []
validation_loss = []
epoch_i = 0

# Make sure prepare_sequence from earlier in the LSTM section is loaded
for i in tqdm(range(50)):
    for task, sample in train_data:
        if task == 'ner':
            sentence, tags = sample
            sentence_in = prepare_sequence(sentence, word_to_ix)
            # Step 1. Remember that Pytorch accumulates gradients.
            # We need to clear them out before each instance
            model.zero_grad()

            # Step 2. Get our inputs ready for the network, that is,
            # turn them into Tensors of word indices.
            input = [sentence_in]
            targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long)

            # Step 3. Run our forward pass.
            loss = model.cross_entropy_loss(task, input, targets)

            # Step 4. Compute the loss, gradients, and update the parameters by
            # calling optimizer.step()
            loss.backward()
            optimizer.step()
        if task == 're':
            key, sentence, tags = sample
            sentence_in = prepare_sequence(sentence, word_to_ix)
            #print(len(relations))
            for head, tail, rel in tags:
                # Step 1. Remember that Pytorch accumulates gradients.
                # We need to clear them out before each instance
                model.zero_grad()

                # Step 2. Get our inputs ready for the network, that is,
                # turn them into Tensors of word indices.
                input = [sentence_in, head, tail]
                targets = torch.tensor(relation_to_ix[rel], dtype=torch.long)

                # Step 3. Run our forward pass.
                loss = model.cross_entropy_loss(task, input, targets)
                #print(loss)

                # Step 4. Compute the loss, gradients, and update the parameters by
                # calling optimizer.step()
                loss.backward()
                optimizer.step()
    train_loss.append(loss.item())
    with torch.no_grad():
      for sentence, tags in val_data:
        if task == 'ner':
          sentence, tags = sample
          sentence_in = prepare_sequence(sentence, word_to_ix)
          # Step 1. Remember that Pytorch accumulates gradients.
          # We need to clear them out before each instance
          model.zero_grad()

          # Step 2. Get our inputs ready for the network, that is,
          # turn them into Tensors of word indices.
          input = [sentence_in]
          targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long, requires_grad=False)

          # Step 3. Run our forward pass.
          val_loss = model.cross_entropy_loss(task, input, targets)
        if task == 're':
          sentence, tags = sample
          sentence_in = prepare_sequence(sentence, word_to_ix)
          for head, tail, rel in tags:
            # Step 1. Remember that Pytorch accumulates gradients.
            # We need to clear them out before each instance
            model.zero_grad()

            # Step 2. Get our inputs ready for the network, that is,
            # turn them into Tensors of word indices.
            input = [sentence_in, head, tail]
            targets = torch.tensor(relation_to_ix[rel], dtype=torch.long)

            # Step 3. Run our forward pass.
            val_loss = model.cross_entropy_loss(task, input, targets)

    validation_loss.append(val_loss.item())
    epoch_i += 1
    # early stopping
    if early_stopping.early_stop(val_loss):
      print("We are at epoch:", epoch_i)
      break

 62%|██████▏   | 31/50 [49:12<30:09, 95.23s/it]

We are at epoch: 32





In [None]:
# NER and RE Performance measure
pred = []
true = []
for i in test_data:
  with torch.no_grad():
    task, sample = i
    if task == 'ner':
      precheck_sent = [prepare_sequence(sample[0], word_to_ix)]
      pred_ner = model.predict(task,precheck_sent)
      pred.append(pred_ner)
      true.append(sample[1])

    # if task == 're':
    #   key, sentence, relations = sample
    #   sentence_in = prepare_sequence(sentence, word_to_ix)
    #   for head, tail, _ in relations:
    #     param = task, [sentence_in, head, tail]
    #     y_pred = ix_to_relation[np.argmax(model(*param).numpy())]
    #     pred.append(y_pred)
    #   for j in relations:
    #     true.append(j[2])

In [None]:
print(pred)
print(true)

[['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Reason', 'O', 'B-Drug', 'I-Drug'], ['O', 'O', 'B-Strength', 'B-Route', 'B-Drug', 'O', 'B-Reason', 'O', 'B-Drug'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Strength', 'I-Strength', 'B-Route', 'B-Drug'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Drug', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Drug'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Route', 'B-Drug'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Reason', 'O', 'B-Reason', 'O', 'O', 'O', 'B-Reason', 'I-Reason'], ['O', 'O', 'B-Reason', 'O', 'B-Reason', 'I-Reason'], ['O', 'O', 'O', 'O', 'B-Drug', 'B-Route', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Drug', 'O', 'O', 'B-Drug', 'O', 'O', 'O', 'B-Drug'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Drug'

In [None]:
!pip install seqeval
from seqeval.metrics import f1_score, classification_report

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m911.9 kB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=87649beebe9d7141243671fef4ed322979f9e5e7c0b96fbae0affc1d7352a91d
  Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [None]:
# Parameter sharing NER prediction performance measure
print(f1_score(true, pred))
print(classification_report(true, pred))

0.7933337362504532
              precision    recall  f1-score   support

         ADE       0.39      0.20      0.26       625
      Dosage       0.84      0.82      0.83      2681
        Drug       0.80      0.84      0.82     10575
    Duration       0.56      0.46      0.51       378
        Form       0.90      0.89      0.89      4354
   Frequency       0.80      0.77      0.78      4012
      Reason       0.49      0.56      0.52      2545
       Route       0.87      0.88      0.87      3513
    Strength       0.79      0.81      0.80      4230

   micro avg       0.79      0.80      0.79     32913
   macro avg       0.72      0.69      0.70     32913
weighted avg       0.79      0.80      0.79     32913



In [None]:
# Parameter sharing RE performance measure
pred = []
true = []
for i in test_data:
  with torch.no_grad():
    task, sample = i
    if task == 're':
      key, sentence, relations = sample
      sentence_in = prepare_sequence(sentence, word_to_ix)
      for head, tail, _ in relations:
        param = task, [sentence_in, head, tail]
        y_pred = ix_to_relation[np.argmax(model(*param).numpy())]
        pred.append(y_pred)
      for j in relations:
        true.append(j[2])

In [None]:
from sklearn.metrics import f1_score, classification_report
print(f1_score(true, pred, average='weighted'))
print(classification_report(true, pred))

0.9522357168247767
                precision    recall  f1-score   support

      ADE-Drug       0.69      0.56      0.62       607
   Dosage-Drug       0.96      0.97      0.96      2657
 Duration-Drug       0.95      0.88      0.91       393
     Form-Drug       0.97      0.97      0.97      4304
Frequency-Drug       0.98      0.98      0.98      3961
   Reason-Drug       0.84      0.95      0.89      2442
    Route-Drug       0.98      0.95      0.97      3503
 Strength-Drug       0.98      0.96      0.97      4211

      accuracy                           0.95     22078
     macro avg       0.92      0.90      0.91     22078
  weighted avg       0.95      0.95      0.95     22078



From here, we are performing the end-to-end performance measure.


In [None]:
# Format the prediction with original sentence
output_entity = []
for i in test_data:
  with torch.no_grad():
    task, sample = i
    if task == 'ner':
      precheck_sent = [prepare_sequence(sample[0], word_to_ix)]
      pred_ner = model.predict(task,precheck_sent)

In [None]:
from collections import defaultdict

def list_duplicates(seq):
    tally = defaultdict(list)
    for i,item in enumerate(seq):
        tally[item].append(i)
    return ((key,locs) for key,locs in tally.items() if len(locs)>=1)

In [None]:
# Identify predicted entities with their respective position in a sentence
out = []
task = 'ner'
for key, sentence, tags in test_ner:
  with torch.no_grad():
    prepare_sent = [prepare_sequence(sentence, word_to_ix)]
    pred_entity = model.predict(task,prepare_sent)
    relation = [key,sentence]
    sample = []
    indices = [i for i, x in enumerate(pred_entity) if x == "B-Drug"]
    for i in indices:
      sec_index = i+1
      if i != (len(pred_entity)-1):
        while sec_index < len(pred_entity) and pred_entity[sec_index] == 'I-Drug':
          sec_index += 1
      drug_index = [i,sec_index]

      for item in sorted(list_duplicates(pred_entity)):
        if item[0][0] == 'B' and item[0] != 'B-Drug':
          for i in item[1]:
            seco_index = i+1
            if i != (len(pred_entity)-1):
              while seco_index < len(pred_entity) and pred_entity[seco_index] == ''.join(['I-',item[0][2:]]):
                seco_index += 1
            entity_index = [i,seco_index]
            sample.append([entity_index, drug_index, item[0][2:]+'-'+'Drug'])
    relation.append(sample)
    out.append(relation)

In [None]:
# Format TP, FP, FN for end-to-end evaluation
TP = []
FP = []
FN = []
for key1, sentence1, relation1 in out:
  for key2, sentence2, relation2 in test_relation:
    if key1 == key2 and sentence1 == sentence2:
      for i in relation1:
        if i not in relation2:
          FP.append(i)
        elif i in relation2:
          TP.append(i)
      for i in relation2:
        if i not in relation1:
          FN.append(i)

In [None]:
print(len(TP))
print(len(FP))
print(len(FN))

15593
39218
6485


In [None]:
# Relations for end-to-end evaluation
relation_out = []
for key1, sentence1, relation1 in out:
  rela = []
  for key2, sentence2, relation2 in test_relation:
    if key1 == key2 and sentence1 == sentence2:
      for i in relation1:
        if i in relation2:
          rela.append(i)
      relation_out.append(('re',[key1,sentence1,rela]))

In [None]:
# end-to-end parameter sharing Performance measure
pred = []
true = []
for i in relation_out:
  with torch.no_grad():
    task, sample = i
    if task == 're':
      key, sentence, relations = sample
      sentence_in = prepare_sequence(sentence, word_to_ix)
      for head, tail, _ in relations:
        param = task, [sentence_in, head, tail]
        y_pred = ix_to_relation[np.argmax(model(*param).numpy())]
        pred.append(y_pred)
      for j in relations:
        true.append(j[2])
print(f1_score(true, pred, average='weighted'))
print(classification_report(true, pred))

0.9977642499262873
                precision    recall  f1-score   support

      ADE-Drug       0.91      0.95      0.93        95
   Dosage-Drug       1.00      1.00      1.00      2040
 Duration-Drug       0.99      0.99      0.99       153
     Form-Drug       1.00      1.00      1.00      3567
Frequency-Drug       1.00      1.00      1.00      2672
   Reason-Drug       1.00      0.99      0.99      1240
    Route-Drug       1.00      1.00      1.00      2736
 Strength-Drug       1.00      1.00      1.00      3090

      accuracy                           1.00     15593
     macro avg       0.99      0.99      0.99     15593
  weighted avg       1.00      1.00      1.00     15593

