This is the notebook for writing the base BiLSTM-CRF model. 

Reference: Gao S, Kotevska O, Sorokine A, Christian JB (2021) A pre-training and self-training approach for biomedical named entity recognition. PLoS ONE 16(2): e0246310. https://doi.org/10.1371/journal.pone.0246310

Code: https://code.ornl.gov/biomedner/biomedner

Mounting the Google Drive and setting appropriate tensorflow version

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [None]:
import tensorflow as tf
print(tf.__version__)

1.15.2


Pre-training BiLSTM-CRF on Medmentions data

In [None]:
pip install nervaluate

Collecting nervaluate
  Downloading nervaluate-0.1.8-py3-none-any.whl (24 kB)
Installing collected packages: nervaluate
Successfully installed nervaluate-0.1.8


In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import numpy as np

from tensorflow.contrib.crf import crf_log_likelihood, crf_decode
from tensorflow.contrib.rnn import LSTMCell, GRUCell, BasicRNNCell
from sklearn.metrics import f1_score, precision_score, recall_score
from nervaluate import Evaluator
import sys
import random
import time
import pickle

In [None]:
class lstm_crf(object):

    '''
    lstm-crf for ner tagging
    
    parameters:
      - embedding_matrix: numpy array[float]
        numpy array of word embeddings
        each row should represent a word embedding
        NOTE: the word index 0 is dropped, so the first row is ignored
      - num_classes: int
        number of output classes
      - max_len: int (default: 50)
        maximum number of input tokens in any sequence
      - rnn_size: int (default: 300)
        number of rnn units in RNN layer
      - dropout_keep: float (default: 0.9)
        dropout keep rate after rnn layer
      - lr: float (default: 1E-4)
        learning rate for adam optimizer
       
    methods:
      - train(X,y,doc_lens,batch_size=128,epochs=25,patience=10,
              validation_data=None,savebest=False,filepath=None)
        train network on given data
      - predict(X,doc_lens,batch_size=128)
        return the predicted labels, flattened labels ignoring padding tokens, 
        and confidence scores for given data
      - score(X,y,doc_lens,batch_size=128)
        return the entity-level exact F1 score for given input sequences
      - save(filepath)
        save the model weights to a file
      - load(filepath)
        load model weights from a file
    '''

    def __init__(self,embedding_matrix,num_classes,max_len=50,rnn_size=300,dropout_keep=0.9,learning_rate=1E-4):
    
        #model params
        self.num_classes = num_classes
        self.embeddings = embedding_matrix.astype(np.float32)
        self.max_len = max_len
        self.rnn_size = rnn_size
        self.dropout_keep = dropout_keep
        self.idx2label = {0:'O',1:'B-ENT',2:'I-ENT'}
        self.lr = learning_rate
        
        #model inputs
        self.doc_inputs = tf.placeholder(tf.float32,shape=[None,max_len,200])
        self.doc_lens = tf.placeholder(tf.int32,shape=[None])
        self.labels = tf.placeholder(tf.int32,shape=[None,max_len])
        self.doc_idx = tf.placeholder(tf.int32,shape=[None,2])
        self.dropout = tf.placeholder(tf.float32)
        rnn_input = tf.nn.dropout(self.doc_inputs,self.dropout)
                      
        #bi-lstm
        with tf.variable_scope('rnn',initializer=tf.contrib.layers.xavier_initializer()):
            [outputs_fw,outputs_bw],_ = tf.nn.bidirectional_dynamic_rnn(
                            LSTMCell(self.rnn_size/2),LSTMCell(self.rnn_size/2),
                            rnn_input,sequence_length=self.doc_lens,dtype=tf.float32)
            outputs = tf.concat((outputs_fw,outputs_bw),2)
            outputs = tf.nn.dropout(outputs,self.dropout)
        
        #linear chain conditional random field
        unary_scores = tf.layers.dense(outputs,self.num_classes,
                       kernel_initializer=tf.contrib.layers.xavier_initializer())
        log_likelihood, self.transition_params = \
                       crf_log_likelihood(unary_scores,self.labels,self.doc_lens)
        self.pred, viterbi_score = crf_decode(unary_scores,self.transition_params,self.doc_lens)
        self.pred_flat = tf.gather_nd(self.pred,self.doc_idx)
        seq_score,_ = crf_log_likelihood(unary_scores,self.pred,self.doc_lens,
                                         self.transition_params)
        self.seq_score = seq_score/tf.cast(self.doc_lens,tf.float32)

        #loss, accuracy, and training functions
        self.loss = tf.reduce_mean(-log_likelihood)
        self.optimizer = tf.train.AdamOptimizer(self.lr,0.9,0.99).minimize(self.loss)
        
        #init ops
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.saver = tf.train.Saver()
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())

    def _gen_doc_idx(self,doc_lens):
    
        doc_idx = []
        for i,l in enumerate(doc_lens):
            for j in range(l):
                doc_idx.append([i,j])
        doc_idx = np.array(doc_idx)

        return doc_idx
        
    def _flatten_y(self,y,doc_lens):
    
        y_flattened = []
        for i,(doc,l) in enumerate(zip(y,doc_lens)):       
            y_flattened.extend(doc[:l])
            
        return y_flattened
                    
    def train(self,X,y,doc_lens,batch_size=128,epochs=500,patience=10,
              validation_data=None,savebest=False,filepath=None):
    
        '''
        train network on given data
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - y: numpy array[int]
            2d numpy array (doc x ner labels) of labels for given data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for training
          - epochs: int (default: 500)
            number of epochs to train for
          - patience: int (default: 10)
            training stops after no improvement in validation score
            for this number of epochs
          - validation_data: tuple (optional)
            tuple of numpy arrays (X,y) representing validation data
          - savebest: boolean (default: False)
            set to True to save the best model based on validation score per epoch
          - filepath: string (optional)
            path to save model if savebest is set to True
        
        outputs:
            None
        '''
    
        if savebest==True and filepath==None:
            raise Exception("Please enter a path to save the network")

        if validation_data:
            validation_size = len(validation_data[0])
        else:
            validation_size = len(X)

        print('training network on %i documents, validation on %i documents' \
              % (len(X), validation_size))

        #track best model for saving
        prevbest = 0
        pat_count = 0

        for ep in range(epochs):

            #shuffle data
            xyz = list(zip(X,y,doc_lens))            
            random.shuffle(xyz)
            X,y,doc_lens = zip(*xyz)
            X = list(X)
            y = list(y)
            doc_lens = list(doc_lens)

            y_pred = []
            y_true = []
            start_time = time.time()

            #train
            for start in range(0,len(X),batch_size):

                #get batch index
                if start+batch_size < len(X):
                    stop = start+batch_size
                else:
                    stop = len(X)

                embeds = np.take(self.embeddings,X[start:stop],0)
                feed_dict = {self.doc_inputs:embeds,
                             self.labels:y[start:stop],
                             self.doc_lens:doc_lens[start:stop],
                             self.doc_idx:self._gen_doc_idx(doc_lens[start:stop]),
                             self.dropout:self.dropout_keep}
                preds,loss,_ = self.sess.run([self.pred,self.loss,self.optimizer],
                              feed_dict=feed_dict)

                #track correct predictions
                for y_pred_,y_true_,l in zip(preds,y[start:stop],doc_lens[start:stop]):
                    y_p = [self.idx2label[l] for l in y_pred_[:l]]
                    y_t = [self.idx2label[l] for l in y_true_[:l]]
                    y_pred.append(y_p)
                    y_true.append(y_t)
                    
                sys.stdout.write("epoch %i, sample %i of %i, loss: %f        \r"\
                                 % (ep+1,stop,len(X),loss))
                sys.stdout.flush()

            #checkpoint after every epoch
            print("\ntraining time: %.2f" % (time.time()-start_time))
            evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
            results, results_by_tag = evaluator.evaluate()
            f1 = results['exact']['f1']
            print("epoch %i training f1: %.4f" % (ep+1,f1))

            f1 = self.score(validation_data[0],validation_data[1],
                            validation_data[2],batch_size=batch_size)
            print("epoch %i validation f1: %.4f" % (ep+1,f1))

            #save if performance better than previous best
            if f1 >= prevbest:
                prevbest = f1
                pat_count = 0
                if savebest:
                    self.save(filepath)
            else:
                pat_count += 1
                if pat_count >= patience:
                    break

            #reset timer
            start_time = time.time()

    def predict(self,X,doc_lens,batch_size=128):
    
        '''
        return the predicted labels, flattened labels ignoring padding tokens, 
        and confidence scores for given data
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for inference
            
        outputs:
          - y_pred: numpy_array[int]
            2d numpy array of predicted labels for input data
          - y_pred_flat: list[int]
            flattened list of all predicted labels ignoring padding tokens
          - scores: numpy_array[float]
            flattened list of confidence scores for all predicted labels ignoring padding tokens
        '''
    
        y_pred_flat = []
        y_pred = []
        scores = []
        
        for start in range(0,len(X),batch_size):

            #get batch index
            if start+batch_size < len(X):
                stop = start+batch_size
            else:
                stop = len(X)

            embeds = np.take(self.embeddings,X[start:stop],0)
            feed_dict = {self.doc_inputs:embeds,
                         self.doc_lens:doc_lens[start:stop],
                         self.doc_idx:self._gen_doc_idx(doc_lens[start:stop]),
                         self.dropout:1.0}
            pred,pred_flat,score = self.sess.run([self.pred,self.pred_flat,self.seq_score],
                                   feed_dict=feed_dict)

            score = np.exp(score)
            y_pred.append(pred)
            y_pred_flat.extend(pred_flat)
            scores.extend(score)

            sys.stdout.write("processed %i of %i records        \r" \
                             % (stop,len(X)))
            sys.stdout.flush()

        print()
        y_pred = np.vstack(y_pred)
        return y_pred,y_pred_flat,np.array(scores)

    def score(self,X,y,doc_lens,batch_size=128):
    
        '''
        return the entity-level exact F1 score for given input sequences
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - y: numpy array[int]
            2d numpy array (doc x ner labels) of labels for given data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for inference
            
        outputs:
          - entity-level exact F1 score for given input sequences
        '''
        
        y_preds_,_,_ = self.predict(X,doc_lens,batch_size)
        y_pred = []
        y_true = []
        for y_pred_,y_true_,l in zip(y_preds_,y,doc_lens):
            y_p = [self.idx2label[l] for l in y_pred_[:l]]
            y_t = [self.idx2label[l] for l in y_true_[:l]]
            y_pred.append(y_p)
            y_true.append(y_t)
        
        evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
        results, results_by_tag = evaluator.evaluate()
        f1 = results['exact']['f1']
        
        return f1

    def save(self,filename):
    
        '''
        save the model weights to a file
        
        parameters:
          - filepath: string
            path to save model weights
        
        outputs:
            None
        '''
        
        self.saver.save(self.sess,filename)

    def load(self,filename):
    
        '''
        load model weights from a file
        
        parameters:
          - filepath: string
            path from which to load model weights
        
        outputs:
            None
        '''
        
        self.saver.restore(self.sess,filename)


In [None]:
batch_size = 128
patience = 5
pretrain_dataset = 'medmentions'

In [None]:
vocab = np.load('/content/drive/MyDrive/Pubmed/vocab.npy').astype(np.int16)

In [None]:
# load data
vocab = np.load('/content/drive/MyDrive/Pubmed/vocab.npy')
X = np.load('/content/drive/MyDrive/Pubmed/X_%s.npy' % pretrain_dataset).astype(np.int32)
y = np.load('/content/drive/MyDrive/Pubmed/y_%s.npy' % pretrain_dataset).astype(np.int32)
max_len = X.shape[1]
num_classes = np.max(y) + 1

In [None]:
with open('/content/drive/MyDrive/Pubmed/sentence_lens_%s.pkl' % pretrain_dataset,'rb') as f:
    doc_len = pickle.load(f)
doc_len = [l if l <=max_len else max_len for l in doc_len]

In [None]:
# load conversion dictionaries
label2idx = {'O':0, 'B':1, 'I': 2}
idx2label = {v:k for k,v in label2idx.items()}
with open('/content/drive/MyDrive/Pubmed/word2idx.pkl','rb') as f:
    word2idx = pickle.load(f)
idx2word = {v:k for k,v in word2idx.items()}

In [None]:
# train val split
num_docs = len(X)
train_size = int(num_docs * 0.8)
X_train = X[:train_size]
X_val = X[train_size:]
y_train = y[:train_size]
y_val = y[train_size:]
doc_len_train = doc_len[:train_size]
doc_len_val = doc_len[train_size:]

In [None]:
# train model
model = lstm_crf(vocab,num_classes,max_len)
model.train(X_train,y_train,doc_len_train,
            batch_size=batch_size,patience=patience,
            validation_data=(X_val,y_val,doc_len_val),
            savebest=True,filepath='/content/drive/MyDrive/Pubmed/PretrainModel/lstm_crf_%s.ckpt' % pretrain_dataset)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API


Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


Instructions for updating:
Please use `layer.add_weight` method instead.


Instructions for updating:
Please use `layer.add_weight` method instead.


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use keras.layers.Dense instead.


Instructions for updating:
Use keras.layers.Dense instead.


Instructions for updating:
Please use `layer.__call__` method instead.


Instructions for updating:
Please use `layer.__call__` method instead.


training network on 38184 documents, validation on 9546 documents

training time: 133.28
epoch 1 training f1: 0.3511

epoch 1 validation f1: 0.5259

training time: 130.68
epoch 2 training f1: 0.5476

epoch 2 validation f1: 0.5762

training time: 128.85
epoch 3 training f1: 0.5748

epoch 3 validation f1: 0.5909

training time: 131.37
epoch 4 training f1: 0.5915

epoch 4 validation f1: 0.6062

training time: 130.77
epoch 5 training f1: 0.6042

epoch 5 validation f1: 0.6148

training time: 130.23
epoch 6 training f1: 0.6141

epoch 6 validation f1: 0.6251

training time: 129.91
epoch 7 training f1: 0.6216

epoch 7 validation f1: 0.6291

training time: 130.31
epoch 8 training f1: 0.6299

epoch 8 validation f1: 0.6333

training time: 129.44
epoch 9 training f1: 0.6349

epoch 9 validation f1: 0.6383

training time: 129.61
epoch 10 training f1: 0.6410

epoch 10 validation f1: 0.6420

training time: 128.54
epoch 11 training f1: 0.6461

epoch 11 validation f1: 0.6436

training time: 130.29
epoch

In [None]:
# load best model and show some examples to check that model is learning
model.load('/content/drive/MyDrive/Pubmed/PretrainModel/lstm_crf_%s.ckpt' % pretrain_dataset)
for i in range(10):
    sentence = X_val[i:i+1]
    labels = y_val[i:i+1]
    doc_len = doc_len_val[i:i+1]
    _,preds,score = model.predict(sentence,doc_len)
    print('sentence:',[idx2word[w] if w in idx2word else 'UNK' \
                       for w in sentence[0] if w != 0])
    print('true labels:',[idx2label[l] for l in labels[0] if l != -1])
    print('pred labels:',[idx2label[l] for l in preds])
    print('score:', score)
    print()


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/PretrainModel/lstm_crf_medmentions.ckpt


2022-04-20 11:31:42 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/PretrainModel/lstm_crf_medmentions.ckpt


processed 1 of 1 records        
sentence: ['To', 'identify', 'key', 'genes', 'and', 'microRNA', 's', 'in', 'MM', ',', 'we', 'downloaded', 'two', 'gene', 'expression', 'profiles', '(', 'UNK', 'and', 'UNK', ')', 'and', 'two', 'microRNA', 'expression', 'profiles', '(', 'UNK', 'and', 'UNK', ')', 'from', 'the', 'Gene', 'Expression', 'Omnibus', '(', 'GEO', ')', 'database', '.']
true labels: ['O', 'O', 'O', 'B', 'O', 'B', 'O', 'O', 'B', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'I', 'O']
pred labels: ['O', 'O', 'O', 'B', 'O', 'B', 'O', 'O', 'B', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'O', 'B', 'O', 'B', 'O', 'O', 'O', 'B', 'I', 'I', 'O', 'B', 'O', 'B', 'O', 'O', 'O', 'B', 'I', 'I', 'O', 'B', 'O', 'B', 'O']
score: [0.906443]


sentence: ['A', 'total', 'of', '596', 'differentially', 'expressed', 'genes', '(', 'DEGs', ')', 'and', '39', 'differentially', 'expressed', 'microRNAs', '(', 'DEMs',

Training BiLSTM-CRF on NER datasets without using semi-supervised learning

In [None]:
batch_size_f = 128
patience_f = 15

In [None]:
# datasets to test on
datasets = [
            'BC2GM',
            'BC4CHEMD', 
            'NCBI-disease',
            's800'
           ]

# supervised dataset sizes to test on
data_sizes = [
              2000
             ]


In [None]:
for dataset in datasets:

    # iterate over each data size setting
    for data_size in data_sizes:

        print('training',dataset,data_size)
        
        # load data
        X = np.load('/content/drive/MyDrive/Pubmed/%s_X_train.npy' % dataset).astype(np.int32)[:data_size]
        y = np.load('/content/drive/MyDrive/Pubmed/%s_y_train.npy' % dataset).astype(np.int32)[:data_size]
        max_len = 50
        num_classes = np.max(y) + 1
        
        with open('/content/drive/MyDrive/Pubmed/%s_senlens_train.pkl' % dataset,'rb') as f:
            doc_len = pickle.load(f)[:data_size]
        doc_len = [l if l <=max_len else max_len for l in doc_len]
        
        # load conversion dictionaries
        label2idx = {'O':0, 'B':1, 'I': 2}
        idx2label = {0:'O',1:'B-ENT',2:'I-ENT'}
            
        # train val split
        num_docs = len(X)
        train_size = int(num_docs * 0.8)
        X_train = X[:train_size]
        X_val = X[train_size:]
        y_train = y[:train_size]
        y_val = y[train_size:]
        doc_len_train = doc_len[:train_size]
        doc_len_val = doc_len[train_size:]
        
        # load test data
        X_test = np.load('/content/drive/MyDrive/Pubmed/%s_X_test.npy' % dataset).astype(np.int32)
        y_test = np.load('/content/drive/MyDrive/Pubmed/%s_y_test.npy' % dataset).astype(np.int32)
        with open('/content/drive/MyDrive/Pubmed/%s_senlens_test.pkl' % dataset,'rb') as f:
            doc_len_test = pickle.load(f)
        doc_len_test = [l if l <=max_len else max_len for l in doc_len_test]
        y_true = []
        for y_true_,l in zip(y_test,doc_len_test):
            y = [idx2label[l] for l in y_true_[:l]]
            y_true.append(y)
        

        # train model from scratch
        tf.reset_default_graph()
        model = lstm_crf(vocab,num_classes,max_len)
        
        model.train(X_train,y_train,doc_len_train,
                    batch_size=batch_size_f,patience=patience_f,
                    validation_data=(X_val,y_val,doc_len_val),
                    savebest=True,filepath='/content/drive/MyDrive/Pubmed/FinetuneNotSemisup/%s_lstm_crf_%s_%i.ckpt' % (dataset,pretrain_dataset,train_size))
        
        # evaluate on test set
        model.load('/content/drive/MyDrive/Pubmed/FinetuneNotSemisup/%s_lstm_crf_%s_%i.ckpt' % (dataset,pretrain_dataset,train_size))
        y_preds_,_,_ = model.predict(X_test,doc_len_test)
        y_pred = []
        for y_pred_,l in zip(y_preds_,doc_len_test):
            y = [idx2label[l] for l in y_pred_[:l]]
            y_pred.append(y)
        
        evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
        results, results_by_tag = evaluator.evaluate()
        
        exact_p = results['exact']['precision']
        exact_r = results['exact']['recall']
        exact_f = results['exact']['f1']
        partial_p = results['partial']['precision']
        partial_r = results['partial']['recall']
        partial_f = results['partial']['f1']
    
        print(dataset, data_size, 'no pretrain')
        print('exact p: %.4f' % exact_p)
        print('exact r: %.4f' % exact_r)
        print('exact f: %.4f' % exact_f)
        print('partial p: %.4f' % partial_p)
        print('partial r: %.4f' % partial_r)
        print('partial f: %.4f' % partial_f)
        print()


training BC2GM 2000
training network on 1600 documents, validation on 400 documents

training time: 3.56
epoch 1 training f1: 0.0325

epoch 1 validation f1: 0.0324

training time: 2.90
epoch 2 training f1: 0.0320

epoch 2 validation f1: 0.0305

training time: 2.93
epoch 3 training f1: 0.0311

epoch 3 validation f1: 0.0266

training time: 2.97
epoch 4 training f1: 0.0272

epoch 4 validation f1: 0.0161

training time: 3.12
epoch 5 training f1: 0.0157

epoch 5 validation f1: 0.0089

training time: 2.93
epoch 6 training f1: 0.0079

epoch 6 validation f1: 0.0109

training time: 2.88
epoch 7 training f1: 0.0136

epoch 7 validation f1: 0.0108

training time: 2.92
epoch 8 training f1: 0.0214

epoch 8 validation f1: 0.0218

training time: 2.86
epoch 9 training f1: 0.0316

epoch 9 validation f1: 0.0255

training time: 2.83
epoch 10 training f1: 0.0386

epoch 10 validation f1: 0.0290

training time: 2.93
epoch 11 training f1: 0.0549

epoch 11 validation f1: 0.0429

training time: 3.04
epoch 12 tr

2022-05-01 04:38:29 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/FinetuneNotSemisup/BC2GM_lstm_crf_medmentions_1600.ckpt



BC2GM 2000 no pretrain
exact p: 0.2400
exact r: 0.3038
exact f: 0.2682
partial p: 0.3931
partial r: 0.4977
partial f: 0.4393

training BC4CHEMD 2000
training network on 1600 documents, validation on 400 documents

training time: 3.68
epoch 1 training f1: 0.0387

epoch 1 validation f1: 0.0372

training time: 3.17
epoch 2 training f1: 0.0344

epoch 2 validation f1: 0.0321

training time: 3.01
epoch 3 training f1: 0.0253

epoch 3 validation f1: 0.0103

training time: 3.02
epoch 4 training f1: 0.0028

epoch 4 validation f1: 0.0000

training time: 3.04
epoch 5 training f1: 0.0000

epoch 5 validation f1: 0.0000

training time: 2.99
epoch 6 training f1: 0.0015

epoch 6 validation f1: 0.0000

training time: 3.04
epoch 7 training f1: 0.0015

epoch 7 validation f1: 0.0000

training time: 3.15
epoch 8 training f1: 0.0029

epoch 8 validation f1: 0.0066

training time: 2.98
epoch 9 training f1: 0.0086

epoch 9 validation f1: 0.0261

training time: 2.96
epoch 10 training f1: 0.0126

epoch 10 valida

2022-05-01 04:39:45 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/FinetuneNotSemisup/BC4CHEMD_lstm_crf_medmentions_1600.ckpt



BC4CHEMD 2000 no pretrain
exact p: 0.0209
exact r: 0.2747
exact f: 0.0388
partial p: 0.0305
partial r: 0.4012
partial f: 0.0566

training NCBI-disease 2000
training network on 1600 documents, validation on 400 documents

training time: 3.84
epoch 1 training f1: 0.0305

epoch 1 validation f1: 0.0318

training time: 3.01
epoch 2 training f1: 0.0291

epoch 2 validation f1: 0.0227

training time: 2.88
epoch 3 training f1: 0.0266

epoch 3 validation f1: 0.0290

training time: 2.98
epoch 4 training f1: 0.0186

epoch 4 validation f1: 0.0092

training time: 3.01
epoch 5 training f1: 0.0097

epoch 5 validation f1: 0.0089

training time: 2.90
epoch 6 training f1: 0.0078

epoch 6 validation f1: 0.0091

training time: 2.92
epoch 7 training f1: 0.0078

epoch 7 validation f1: 0.0135

training time: 2.95
epoch 8 training f1: 0.0143

epoch 8 validation f1: 0.0181

training time: 2.91
epoch 9 training f1: 0.0152

epoch 9 validation f1: 0.0181

training time: 3.01
epoch 10 training f1: 0.0201

epoch 10

2022-05-01 04:52:44 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/FinetuneNotSemisup/NCBI-disease_lstm_crf_medmentions_1600.ckpt



NCBI-disease 2000 no pretrain
exact p: 0.5386
exact r: 0.4937
exact f: 0.5152
partial p: 0.6569
partial r: 0.6020
partial f: 0.6282

training s800 2000
training network on 1600 documents, validation on 400 documents

training time: 3.61
epoch 1 training f1: 0.0083

epoch 1 validation f1: 0.0096

training time: 2.94
epoch 2 training f1: 0.0085

epoch 2 validation f1: 0.0159

training time: 3.11
epoch 3 training f1: 0.0091

epoch 3 validation f1: 0.0120

training time: 2.98
epoch 4 training f1: 0.0076

epoch 4 validation f1: 0.0000

training time: 2.99
epoch 5 training f1: 0.0000

epoch 5 validation f1: 0.0000

training time: 2.99
epoch 6 training f1: 0.0022

epoch 6 validation f1: 0.0000

training time: 2.95
epoch 7 training f1: 0.0000

epoch 7 validation f1: 0.0000

training time: 2.96
epoch 8 training f1: 0.0020

epoch 8 validation f1: 0.0000

training time: 2.90
epoch 9 training f1: 0.0000

epoch 9 validation f1: 0.0000

training time: 2.96
epoch 10 training f1: 0.0020

epoch 10 val

2022-05-01 04:53:57 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/FinetuneNotSemisup/s800_lstm_crf_medmentions_1600.ckpt



s800 2000 no pretrain
exact p: 0.0023
exact r: 0.0293
exact f: 0.0042
partial p: 0.0076
partial r: 0.0987
partial f: 0.0141

