<a href="https://colab.research.google.com/github/KatBCN/MUD-Lab-DDI-NN/blob/main/experiments/codemap_lower_pos/MUDLab6_DDI_lower_POS_tagging.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Change Runtime Type to GPU before running.

In [1]:
#!pip install tensorflow_addons

In [2]:
from google.colab import drive
# mount drive
drive.mount('/content/drive/')
%cd /content/drive/MyDrive/MUD-Lab-6
%ls 

Mounted at /content/drive/
/content/drive/MyDrive/MUD-Lab-6
codemaps_lw_pos.py  [0m[01;34mdata[0m/       devel.out     [01;34mExperiments[0m/  [01;34m__pycache__[0m/
codemaps_pos.py     dataset.py  devel.stats   [01;34mmodel[0m/        [01;34mutil[0m/
codemaps.py         deptree.py  evaluator.py  model.idx


In [3]:
import sys
import time
sys.path.insert(1, "/content/drive/MyDrive/MUD-Lab-6")

import random
from contextlib import redirect_stdout

from tensorflow.keras import regularizers, Input
from tensorflow.keras.models import Model,load_model
from tensorflow.keras.layers import Embedding, Dense, Dropout, Conv1D, MaxPool1D, Reshape, Concatenate, Flatten, Bidirectional, LSTM


from deptree import *
from dataset import *
# codemaps for lowercase words and pos tags
from codemaps_lw_pos import *
import evaluator

import tensorflow as tf
tf.random.set_seed(23)

## Experiment with POS tagging & changing words to lower case

- Uses 150 as the out output_dim for the input word embedding instead of 100.

This model uses code provided by the course.

In [4]:
#this is building the neural network.
def build_network(idx) :

   # sizes
   n_words = codes.get_n_lc_words() # number of lowercase words in vocabulary
   n_pos = codes.get_n_pos() # number of pos tags
   max_len = codes.maxlen
   n_labels = codes.get_n_labels()

   # word input layer & embeddings
   inptW = Input(shape=(max_len,)) # shape of input: all vectors need to be the same length.
   embW = Embedding(input_dim=n_words, output_dim=150, # output_dim is the hyperparameter that can be tuned.
                      input_length=max_len, mask_zero=False)(inptW)  

  # POS input layer & embeddings
   inptPOS = Input(shape=(max_len,)) # shape of input: all vectors need to be the same length.
   embPOS = Embedding(input_dim=n_pos, output_dim=100, # output_dim is the hyperparameter that can be tuned.
                      input_length=max_len, mask_zero=False)(inptPOS)

   conc = Concatenate()([embW, embPOS])

  # number of filters it the output dimension which can be tuned.
   conv = Conv1D(filters=30, kernel_size=2, strides=1, activation='relu', padding='same')(conc) # kernel size is the context of words, depending on the stride, you can have overlapping.
   flat= Flatten()(conv) # concatenating the vectors one after the other - we can change this architecture.

   # we need a single vector
   
   out = Dense(n_labels, activation='softmax')(flat)

   model = Model([inptW, inptPOS], out)
   model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

   return model

In [5]:
# directory with files to process
trainfile = "/content/drive/MyDrive/MUD-Lab-6/data/train.pck"
validationfile = "/content/drive/MyDrive/MUD-Lab-6/data/devel.pck"
modelname = "model"

In [6]:
## --------- MAIN PROGRAM ----------- 
## --
## -- Usage:  train.py ../data/Train ../data/Devel  modelname
## --

## --------- MAIN PROGRAM ----------- 
## --
## -- Usage:  train.py ../data/Train ../data/Devel  modelname
## --


# directory with files to process
# trainfile = sys.argv[1]
# validationfile = sys.argv[2]
# modelname = sys.argv[3]

# load train and validation data
traindata = Dataset(trainfile)
valdata = Dataset(validationfile)

# create indexes from training data
max_len = 150
suf_len = 5
codes = Codemaps(traindata, max_len)

# build network
model = build_network(codes)
with redirect_stdout(sys.stderr) :
   model.summary()

# encode datasets
Xt = codes.encode_words(traindata)
Yt = codes.encode_labels(traindata)
Xv = codes.encode_words(valdata)
Yv = codes.encode_labels(valdata)

#track time to train model
start_time = time.time()

# train model
with redirect_stdout(sys.stderr) :
   model.fit(Xt, Yt, batch_size=32, epochs=10, validation_data=(Xv,Yv), verbose=1) # add class_weight here for experiments.

train_time = time.time() - start_time

# save model and indexs
model.save(modelname)
codes.save(modelname)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 150)]        0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 150)]        0           []                               
                                                                                                  
 embedding (Embedding)          (None, 150, 150)     597450      ['input_1[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, 150, 100)     5100        ['input_2[0][0]']                
                                                                                              

INFO:tensorflow:Assets written to: model/assets


In [11]:
print("Amount of time to train the model is:", train_time)

Amount of time to train the model is: 82.60335373878479


In [7]:
fname = "/content/drive/MyDrive/MUD-Lab-6/model"
datafile = "/content/drive/MyDrive/MUD-Lab-6/data/devel.pck"
outfile = "devel.out"

The following code chunk is from predict.py

In [8]:
# predict.py

## --------- Entity extractor ----------- 
## -- Extract drug entities from given text and return them as
## -- a list of dictionaries with keys "offset", "text", and "type"

def output_interactions(data, preds, outfile) :

   #print(testdata[0])
   outf = open(outfile, 'w')
   for exmp,tag in zip(data.sentences(),preds) :
      sid = exmp['sid']
      e1 = exmp['e1']
      e2 = exmp['e2']
      if tag!='null' :
         print(sid, e1, e2, tag, sep="|", file=outf)
            
   outf.close()

   
## --------- MAIN PROGRAM ----------- 
## --
## -- Usage:  baseline-NER.py target-dir
## --
## -- Extracts Drug NE from all XML files in target-dir
## --

#fname = sys.argv[1]
#datafile = sys.argv[2]
#outfile = sys.argv[3]

model = load_model(fname)
codes = Codemaps(fname)

testdata = Dataset(datafile)
X = codes.encode_words(testdata)

Y = model.predict(X)
Y = [codes.idx2label(np.argmax(s)) for s in Y]

# extract relations
output_interactions(testdata, Y, outfile)


In [9]:
task = "DDI"
golddir = "/content/drive/MyDrive/MUD-Lab-6/data/devel"
outfile = "devel.out"

The following code chunk is from evaluator.py and creates file devel.stats

In [10]:
# evaluator.py

#! /usr/bin/python3

import sys
from os import listdir

from xml.dom.minidom import parse

## --
## -- auxliary to insert an instance in given instance_set
## --

def add_instance(instance_set, einfo, etype) :
    instance_set["CLASS"].add(einfo+"|"+etype)
    instance_set["NOCLASS"].add(einfo)
    if etype not in instance_set : instance_set[etype] = set([])
    instance_set[etype].add(einfo)

    
## --
## -- Load entities from XML files in given golddir
## --

"""
def load_gold_NER(golddir) :
    entities = { "CLASS" : set([]), "NOCLASS" : set([]) }

    # process each file in directory
    for f in listdir(golddir) :

        # parse XML file, obtaining a DOM tree
        tree = parse(golddir+"/"+f)

        # process each sentence in the file
        sentences = tree.getElementsByTagName("sentence")
        for s in sentences :
            sid = s.attributes["id"].value   # get sentence id
            
            # load sentence entities
            ents = s.getElementsByTagName("entity")
            for e in ents :
                einfo = sid + "|" + e.attributes["charOffset"].value  + "|" + e.attributes["text"].value
                etype = e.attributes["type"].value
                add_instance(entities, einfo, etype)
            
    return entities
"""
## --
## -- Load relations from XML files in given golddir
## --

def load_gold_DDI(golddir) :
    relations = { "CLASS" : set([]), "NOCLASS" : set([]) }

    # process each file in directory
    for f in listdir(golddir) :

        # parse XML file, obtaining a DOM tree
        tree = parse(golddir+"/"+f)

        # process each sentence in the file
        sentences = tree.getElementsByTagName("sentence")
        for s in sentences :
            sid = s.attributes["id"].value   # get sentence id
        
            # load "pairs"  in the sentence, keep those with ddi=true
            pairs = s.getElementsByTagName("pair")
            for p in pairs:
                id_e1 = p.attributes["e1"].value
                id_e2 = p.attributes["e2"].value
                ddi = p.attributes["ddi"].value

                if (ddi == "true") :
                    rtype = p.attributes["type"].value
                    rinfo = sid + "|" + id_e1 + "|" +  id_e2
                    add_instance(relations, rinfo, rtype)

    return relations


## --
## -- Load entities/relations from given system output file
## --

def load_predicted(task, outfile) :
    predicted = { "CLASS" : set([]), "NOCLASS" : set([]) }
    outf = open(outfile,"r")
    for line in outf.readlines() :
        line = line.strip()
        if line in predicted["CLASS"] :
            print("Ignoring duplicated entity in system predictions file: "+line)
            continue

        etype = line.split("|")[-1]
        einfo = "|".join(line.split("|")[:-1])
        add_instance(predicted, einfo, etype)
        outf.close()
        
    return predicted
    


## --
## -- Compare given sets and compute tp,fp,fn,P,R,F1
## --

def statistics(gold,predicted,kind) :
    tp = 0
    fp = 0
    nexp = len(gold[kind])
    if kind in predicted:
        npred = len(predicted[kind])
        for p in predicted[kind] :
            if p in gold[kind] : tp += 1
            else : fp += 1

        fn = 0
        for p in gold[kind] :
            if p not in predicted[kind] : fn += 1

    else :
        npred = 0
        fn = nexp

    P = tp/npred if npred!=0 else 0
    R = tp/nexp if nexp!=0 else 0    
    F1 = 2*P*R/(P+R) if P+R!=0 else 0

    return tp,fp,fn,npred,nexp,P,R,F1

## --
## -- Compute and print statistics table
## --

def row(txt) :
   return txt + ' '*(17-len(txt))


def print_statistics(gold,predicted, statfile) :
    print(row("")+"  tp\t  fp\t  fn\t#pred\t#exp\tP\tR\tF1", file=statfile)
    print("------------------------------------------------------------------------------", file=statfile)
    (nk,sP,sR,sF1) = (0,0,0,0)
    for kind in sorted(gold) :
        if kind=="CLASS" or kind=="NOCLASS" : continue
        (tp,fp,fn,npred,nexp,P,R,F1) = statistics(gold, predicted, kind)
        print(row(kind)+"{:>4}\t{:>4}\t{:>4}\t{:>4}\t{:>4}\t{:2.1%}\t{:2.1%}\t{:2.1%}".format(tp,fp,fn,npred,nexp, P, R, F1), file=statfile)
        (nk,sP,sR,sF1) = (nk+1, sP+P, sR+R, sF1+F1)

    (sP, sR, sF1) = (sP/nk, sR/nk, sF1/nk)
    print("------------------------------------------------------------------------------", file=statfile)
    print(row("M.avg")+"-\t-\t-\t-\t-\t{:2.1%}\t{:2.1%}\t{:2.1%}".format(sP, sR, sF1), file=statfile)

    print("------------------------------------------------------------------------------", file=statfile)
    (tp,fp,fn,npred,nexp,P,R,F1) = statistics(gold, predicted, "CLASS")
    print(row("m.avg")+"{:>4}\t{:>4}\t{:>4}\t{:>4}\t{:>4}\t{:2.1%}\t{:2.1%}\t{:2.1%}".format(tp,fp,fn,npred,nexp, P, R, F1), file=statfile)                        
    (tp,fp,fn,npred,nexp,P,R,F1) = statistics(gold, predicted, "NOCLASS")
    print(row("m.avg(no class)")+"{:>4}\t{:>4}\t{:>4}\t{:>4}\t{:>4}\t{:2.1%}\t{:2.1%}\t{:2.1%}".format(tp,fp,fn,npred,nexp, P, R, F1), file=statfile)               

## --
## -- Evaluates results in outfile comparing them with gold standard in golddir.
## -- 'task' is either NER or DDI
## -- This function can be called from any program requesting evaluation.
## --
 
def evaluate(task, golddir, outfile):

    if task=="NER" :
        # get set of expected entities in the whole golddir
        gold = load_gold_NER(golddir)
    elif task == "DDI" :
        # get set of expected relations in the whole golddir
        gold = load_gold_DDI(golddir)
    else :
        print ("Invalid task '"+task+"'. Please specify 'NER' or 'DDI'.")        


    # Load entities/relations predicted by the system
    predicted = load_predicted(task, outfile)

    # compare both sets and compute statistics
    statfile = open("devel.stats", 'w')
    print_statistics(gold,predicted, statfile)
    statfile.close()

         
        
## --
## -- Usage as standalone program:  evaluator.py (NER|DDI) golddir outfile
## --
## -- Evaluates results in outfile comparing them with gold standard in golddir
## --


if __name__ == "__main__":

    #if len(sys.argv) != 4 :
     #   print("\n  Usage: evaluator.py (NER|DDI) golddir outfile\n")
      #  exit()
        
    task = "DDI"
    golddir = "/content/drive/MyDrive/MUD-Lab-6/data/devel"
    outfile = "devel.out"

    evaluate(task, golddir, outfile)


### Results

Replacing the words with lowercase and adding pos tags did not hurt the model very much, but it also didn't improve it.

devel.stats:


```
                   tp	  fp	  fn	#pred	#exp	P	R	F1
------------------------------------------------------------------------------
advise             96	  61	  45	 157	 141	61.1%	68.1%	64.4%
effect            153	 113	 159	 266	 312	57.5%	49.0%	52.9%
int                14	  10	  14	  24	  28	58.3%	50.0%	53.8%
mechanism          66	  38	 195	 104	 261	63.5%	25.3%	36.2%
------------------------------------------------------------------------------
M.avg            -	-	-	-	-	60.1%	48.1%	51.8%
------------------------------------------------------------------------------
m.avg             329	 222	 413	 551	 742	59.7%	44.3%	50.9%
m.avg(no class)   375	 176	 367	 551	 742	68.1%	50.5%	58.0%

```



### Baseline Results:

devel.stats:

```
                   tp	  fp	  fn	#pred	#exp	P	R	F1
------------------------------------------------------------------------------
advise             90	  54	  51	 144	 141	62.5%	63.8%	63.2%
effect            160	  98	 152	 258	 312	62.0%	51.3%	56.1%
int                14	   5	  14	  19	  28	73.7%	50.0%	59.6%
mechanism          64	  53	 197	 117	 261	54.7%	24.5%	33.9%
------------------------------------------------------------------------------
M.avg            -	-	-	-	-	63.2%	47.4%	53.2%
------------------------------------------------------------------------------
m.avg             328	 210	 414	 538	 742	61.0%	44.2%	51.2%
m.avg(no class)   359	 179	 383	 538	 742	66.7%	48.4%	56.1%

```

