Mounting Google Drive and setting approapriate version of tensorflow

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [None]:
import tensorflow as tf
print(tf.__version__)

1.15.2


In [None]:
pip install nervaluate

Collecting nervaluate
  Downloading nervaluate-0.1.8-py3-none-any.whl (24 kB)
Installing collected packages: nervaluate
Successfully installed nervaluate-0.1.8


In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import numpy as np
from tensorflow.contrib.crf import crf_log_likelihood, crf_decode
from tensorflow.contrib.rnn import LSTMCell, GRUCell, BasicRNNCell
from sklearn.metrics import f1_score, precision_score, recall_score
from nervaluate import Evaluator
import sys
import random
import time
import pickle

In [None]:
class lstm_crf(object):

    '''
    lstm-crf for ner tagging
    
    parameters:
      - embedding_matrix: numpy array[float]
        numpy array of word embeddings
        each row should represent a word embedding
        NOTE: the word index 0 is dropped, so the first row is ignored
      - num_classes: int
        number of output classes
      - max_len: int (default: 50)
        maximum number of input tokens in any sequence
      - rnn_size: int (default: 300)
        number of rnn units in RNN layer
      - dropout_keep: float (default: 0.9)
        dropout keep rate after rnn layer
      - lr: float (default: 1E-4)
        learning rate for adam optimizer
       
    methods:
      - train(X,y,doc_lens,batch_size=128,epochs=25,patience=10,
              validation_data=None,savebest=False,filepath=None)
        train network on given data
      - predict(X,doc_lens,batch_size=128)
        return the predicted labels, flattened labels ignoring padding tokens, 
        and confidence scores for given data
      - score(X,y,doc_lens,batch_size=128)
        return the entity-level exact F1 score for given input sequences
      - save(filepath)
        save the model weights to a file
      - load(filepath)
        load model weights from a file
    '''

    def __init__(self,embedding_matrix,num_classes,max_len=50,rnn_size=300,dropout_keep=0.9,learning_rate=1E-4):
    
        #model params
        self.num_classes = num_classes
        self.embeddings = embedding_matrix.astype(np.float32)
        self.max_len = max_len
        self.rnn_size = rnn_size
        self.dropout_keep = dropout_keep
        self.idx2label = {0:'O',1:'B-ENT',2:'I-ENT'}
        self.lr = learning_rate
        
        #model inputs
        self.doc_inputs = tf.placeholder(tf.float32,shape=[None,max_len,200])
        self.doc_lens = tf.placeholder(tf.int32,shape=[None])
        self.labels = tf.placeholder(tf.int32,shape=[None,max_len])
        self.doc_idx = tf.placeholder(tf.int32,shape=[None,2])
        self.dropout = tf.placeholder(tf.float32)
        rnn_input = tf.nn.dropout(self.doc_inputs,self.dropout)
                      
        #bi-lstm
        with tf.variable_scope('rnn',initializer=tf.contrib.layers.xavier_initializer()):
            [outputs_fw,outputs_bw],_ = tf.nn.bidirectional_dynamic_rnn(
                            LSTMCell(self.rnn_size/2),LSTMCell(self.rnn_size/2),
                            rnn_input,sequence_length=self.doc_lens,dtype=tf.float32)
            outputs = tf.concat((outputs_fw,outputs_bw),2)
            outputs = tf.nn.dropout(outputs,self.dropout)
        
        #linear chain conditional random field
        unary_scores = tf.layers.dense(outputs,self.num_classes,
                       kernel_initializer=tf.contrib.layers.xavier_initializer())
        log_likelihood, self.transition_params = \
                       crf_log_likelihood(unary_scores,self.labels,self.doc_lens)
        self.pred, viterbi_score = crf_decode(unary_scores,self.transition_params,self.doc_lens)
        self.pred_flat = tf.gather_nd(self.pred,self.doc_idx)
        seq_score,_ = crf_log_likelihood(unary_scores,self.pred,self.doc_lens,
                                         self.transition_params)
        self.seq_score = seq_score/tf.cast(self.doc_lens,tf.float32)

        #loss, accuracy, and training functions
        self.loss = tf.reduce_mean(-log_likelihood)
        self.optimizer = tf.train.AdamOptimizer(self.lr,0.9,0.99).minimize(self.loss)
        
        #init ops
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.saver = tf.train.Saver()
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())

    def _gen_doc_idx(self,doc_lens):
    
        doc_idx = []
        for i,l in enumerate(doc_lens):
            for j in range(l):
                doc_idx.append([i,j])
        doc_idx = np.array(doc_idx)

        return doc_idx
        
    def _flatten_y(self,y,doc_lens):
    
        y_flattened = []
        for i,(doc,l) in enumerate(zip(y,doc_lens)):       
            y_flattened.extend(doc[:l])
            
        return y_flattened
                    
    def train(self,X,y,doc_lens,batch_size=128,epochs=500,patience=20,
              validation_data=None,savebest=False,filepath=None):
    
        '''
        train network on given data
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - y: numpy array[int]
            2d numpy array (doc x ner labels) of labels for given data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for training
          - epochs: int (default: 500)
            number of epochs to train for
          - patience: int (default: 10)
            training stops after no improvement in validation score
            for this number of epochs
          - validation_data: tuple (optional)
            tuple of numpy arrays (X,y) representing validation data
          - savebest: boolean (default: False)
            set to True to save the best model based on validation score per epoch
          - filepath: string (optional)
            path to save model if savebest is set to True
        
        outputs:
            None
        '''
    
        if savebest==True and filepath==None:
            raise Exception("Please enter a path to save the network")

        if validation_data:
            validation_size = len(validation_data[0])
        else:
            validation_size = len(X)

        print('training network on %i documents, validation on %i documents' \
              % (len(X), validation_size))

        #track best model for saving
        prevbest = 0
        pat_count = 0

        for ep in range(epochs):

            #shuffle data
            xyz = list(zip(X,y,doc_lens))            
            random.shuffle(xyz)
            X,y,doc_lens = zip(*xyz)
            X = list(X)
            y = list(y)
            doc_lens = list(doc_lens)

            y_pred = []
            y_true = []
            start_time = time.time()

            #train
            for start in range(0,len(X),batch_size):

                #get batch index
                if start+batch_size < len(X):
                    stop = start+batch_size
                else:
                    stop = len(X)

                embeds = np.take(self.embeddings,X[start:stop],0)
                feed_dict = {self.doc_inputs:embeds,
                             self.labels:y[start:stop],
                             self.doc_lens:doc_lens[start:stop],
                             self.doc_idx:self._gen_doc_idx(doc_lens[start:stop]),
                             self.dropout:self.dropout_keep}
                preds,loss,_ = self.sess.run([self.pred,self.loss,self.optimizer],
                              feed_dict=feed_dict)

                #track correct predictions
                for y_pred_,y_true_,l in zip(preds,y[start:stop],doc_lens[start:stop]):
                    y_p = [self.idx2label[l] for l in y_pred_[:l]]
                    y_t = [self.idx2label[l] for l in y_true_[:l]]
                    y_pred.append(y_p)
                    y_true.append(y_t)
                    
                sys.stdout.write("epoch %i, sample %i of %i, loss: %f        \r"\
                                 % (ep+1,stop,len(X),loss))
                sys.stdout.flush()

            #checkpoint after every epoch
            print("\ntraining time: %.2f" % (time.time()-start_time))
            evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
            results, results_by_tag = evaluator.evaluate()
            f1 = results['exact']['f1']
            print("epoch %i training f1: %.4f" % (ep+1,f1))

            f1 = self.score(validation_data[0],validation_data[1],
                            validation_data[2],batch_size=batch_size)
            print("epoch %i validation f1: %.4f" % (ep+1,f1))

            #save if performance better than previous best
            if f1 >= prevbest:
                prevbest = f1
                pat_count = 0
                if savebest:
                    self.save(filepath)
            else:
                pat_count += 1
                if pat_count >= patience:
                    break

            #reset timer
            start_time = time.time()

    def predict(self,X,doc_lens,batch_size=128):
    
        '''
        return the predicted labels, flattened labels ignoring padding tokens, 
        and confidence scores for given data
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for inference
            
        outputs:
          - y_pred: numpy_array[int]
            2d numpy array of predicted labels for input data
          - y_pred_flat: list[int]
            flattened list of all predicted labels ignoring padding tokens
          - scores: numpy_array[float]
            flattened list of confidence scores for all predicted labels ignoring padding tokens
        '''
    
        y_pred_flat = []
        y_pred = []
        scores = []
        
        for start in range(0,len(X),batch_size):

            #get batch index
            if start+batch_size < len(X):
                stop = start+batch_size
            else:
                stop = len(X)

            embeds = np.take(self.embeddings,X[start:stop],0)
            feed_dict = {self.doc_inputs:embeds,
                         self.doc_lens:doc_lens[start:stop],
                         self.doc_idx:self._gen_doc_idx(doc_lens[start:stop]),
                         self.dropout:1.0}
            pred,pred_flat,score = self.sess.run([self.pred,self.pred_flat,self.seq_score],
                                   feed_dict=feed_dict)

            score = np.exp(score)
            y_pred.append(pred)
            y_pred_flat.extend(pred_flat)
            scores.extend(score)

            sys.stdout.write("processed %i of %i records        \r" \
                             % (stop,len(X)))
            sys.stdout.flush()

        print()
        y_pred = np.vstack(y_pred)
        return y_pred,y_pred_flat,np.array(scores)

    def score(self,X,y,doc_lens,batch_size=128):
    
        '''
        return the entity-level exact F1 score for given input sequences
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - y: numpy array[int]
            2d numpy array (doc x ner labels) of labels for given data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for inference
            
        outputs:
          - entity-level exact F1 score for given input sequences
        '''
        
        y_preds_,_,_ = self.predict(X,doc_lens,batch_size)
        y_pred = []
        y_true = []
        for y_pred_,y_true_,l in zip(y_preds_,y,doc_lens):
            y_p = [self.idx2label[l] for l in y_pred_[:l]]
            y_t = [self.idx2label[l] for l in y_true_[:l]]
            y_pred.append(y_p)
            y_true.append(y_t)
        
        evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
        results, results_by_tag = evaluator.evaluate()
        f1 = results['exact']['f1']
        
        return f1

    def save(self,filename):
    
        '''
        save the model weights to a file
        
        parameters:
          - filepath: string
            path to save model weights
        
        outputs:
            None
        '''
        
        self.saver.save(self.sess,filename)

    def load(self,filename):
    
        '''
        load model weights from a file
        
        parameters:
          - filepath: string
            path from which to load model weights
        
        outputs:
            None
        '''
        
        self.saver.restore(self.sess,filename)


In [None]:
class tt_bilstm_crf(object):

    '''
    lstm-crf for ner tagging
    
    parameters:
      - embedding_matrix: numpy array[float]
        numpy array of word embeddings
        each row should represent a word embedding
        NOTE: the word index 0 is dropped, so the first row is ignored
      - num_classes: int
        number of output classes
      - max_len: int (default: 50)
        maximum number of input tokens in any sequence
      - rnn_size: int (default: 300)
        number of rnn units in RNN layer
      - dropout_keep: float (default: 0.9)
        dropout keep rate after rnn layer
      - lr: float (default: 1E-4)
        learning rate for adam optimizer
       
    methods:
      - train(X,y,doc_lens,batch_size=128,epochs=25,patience=10,
              validation_data=None,savebest=False,filepath=None)
        train network on given data
      - predict(X,doc_lens,batch_size=128)
        return the predicted labels, flattened labels ignoring padding tokens, 
        and confidence scores for given data
      - score(X,y,doc_lens,batch_size=128)
        return the entity-level exact F1 score for given input sequences
      - save(filepath)
        save the model weights to a file
      - load(filepath)
        load model weights from a file
    '''

    def __init__(self,embedding_matrix,num_classes,max_len=50,rnn_size=300,dropout_keep=0.9,learning_rate=1E-4):
    
        #model params
        self.num_classes = num_classes
        self.embeddings = embedding_matrix.astype(np.float32)
        self.max_len = max_len
        self.rnn_size = rnn_size
        self.dropout_keep = dropout_keep
        self.idx2label = {0:'O',1:'B-ENT',2:'I-ENT'}
        self.lr = learning_rate
        
        #model inputs
        self.doc_inputs = tf.placeholder(tf.float32,shape=[None,max_len,200])
        self.doc_lens = tf.placeholder(tf.int32,shape=[None])
        self.labels = tf.placeholder(tf.int32,shape=[None,max_len])
        self.doc_idx = tf.placeholder(tf.int32,shape=[None,2])
        self.dropout = tf.placeholder(tf.float32)
        rnn_input = tf.nn.dropout(self.doc_inputs,self.dropout)
                      
        #bi-lstm
        with tf.variable_scope('rnn',initializer=tf.contrib.layers.xavier_initializer()):
            [outputs_fw,outputs_bw],_ = tf.nn.bidirectional_dynamic_rnn(
                            LSTMCell(self.rnn_size/2),LSTMCell(self.rnn_size/2),
                            rnn_input,sequence_length=self.doc_lens,dtype=tf.float32)
            outputs = tf.concat((outputs_fw,outputs_bw),2)
            outputs = tf.nn.dropout(outputs,self.dropout)
        
        #linear chain conditional random field
        unary_scores = tf.layers.dense(outputs,self.num_classes,
                       kernel_initializer=tf.contrib.layers.xavier_initializer())
        log_likelihood, self.transition_params = \
                       crf_log_likelihood(unary_scores,self.labels,self.doc_lens)
        self.pred, viterbi_score = crf_decode(unary_scores,self.transition_params,self.doc_lens)
        self.pred_flat = tf.gather_nd(self.pred,self.doc_idx)
        seq_score,_ = crf_log_likelihood(unary_scores,self.pred,self.doc_lens,
                                         self.transition_params)
        self.seq_score = seq_score/tf.cast(self.doc_lens,tf.float32)

        #loss, accuracy, and training functions
        self.loss = tf.reduce_mean(-log_likelihood)
        self.optimizer = tf.train.AdamOptimizer(self.lr,0.9,0.99).minimize(self.loss)
        
        #init ops
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.saver = tf.train.Saver()
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())

    def _gen_doc_idx(self,doc_lens):
    
        doc_idx = []
        for i,l in enumerate(doc_lens):
            for j in range(l):
                doc_idx.append([i,j])
        doc_idx = np.array(doc_idx)

        return doc_idx
        
    def _flatten_y(self,y,doc_lens):
    
        y_flattened = []
        for i,(doc,l) in enumerate(zip(y,doc_lens)):       
            y_flattened.extend(doc[:l])
            
        return y_flattened
                    
    def train(self,X,y,doc_lens,batch_size=128,epochs=500,patience=20,
              validation_data=None,savebest=False,filepath=None):
    
        '''
        train network on given data
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - y: numpy array[int]
            2d numpy array (doc x ner labels) of labels for given data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for training
          - epochs: int (default: 500)
            number of epochs to train for
          - patience: int (default: 10)
            training stops after no improvement in validation score
            for this number of epochs
          - validation_data: tuple (optional)
            tuple of numpy arrays (X,y) representing validation data
          - savebest: boolean (default: False)
            set to True to save the best model based on validation score per epoch
          - filepath: string (optional)
            path to save model if savebest is set to True
        
        outputs:
            None
        '''
    
        if savebest==True and filepath==None:
            raise Exception("Please enter a path to save the network")

        if validation_data:
            validation_size = len(validation_data[0])
        else:
            validation_size = len(X)

        print('Entered BiLSTM-CRF. Training network on %i documents, validation on %i documents' \
              % (len(X), validation_size))

        #track best model for saving
        prevbest = 0
        pat_count = 0

        for ep in range(epochs):

            #shuffle data
            xyz = list(zip(X,y,doc_lens))            
            random.shuffle(xyz)
            X,y,doc_lens = zip(*xyz)
            X = list(X)
            y = list(y)
            doc_lens = list(doc_lens)

            y_pred = []
            y_true = []
            start_time = time.time()

            #train
            for start in range(0,len(X),batch_size):

                #get batch index
                if start+batch_size < len(X):
                    stop = start+batch_size
                else:
                    stop = len(X)

                embeds = np.take(self.embeddings,X[start:stop],0)
                feed_dict = {self.doc_inputs:embeds,
                             self.labels:y[start:stop],
                             self.doc_lens:doc_lens[start:stop],
                             self.doc_idx:self._gen_doc_idx(doc_lens[start:stop]),
                             self.dropout:self.dropout_keep}
                preds,loss,_ = self.sess.run([self.pred,self.loss,self.optimizer],
                              feed_dict=feed_dict)

                #track correct predictions
                for y_pred_,y_true_,l in zip(preds,y[start:stop],doc_lens[start:stop]):
                    y_p = [self.idx2label[l] for l in y_pred_[:l]]
                    y_t = [self.idx2label[l] for l in y_true_[:l]]
                    y_pred.append(y_p)
                    y_true.append(y_t)
                    
                sys.stdout.write("epoch %i, sample %i of %i, loss: %f        \r"\
                                 % (ep+1,stop,len(X),loss))
                sys.stdout.flush()

            #checkpoint after every epoch
            print("\ntraining time: %.2f" % (time.time()-start_time))
            evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
            results, results_by_tag = evaluator.evaluate()
            f1 = results['exact']['f1']
            print("epoch %i training f1: %.4f" % (ep+1,f1))

            f1 = self.score(validation_data[0],validation_data[1],
                            validation_data[2],batch_size=batch_size)
            print("epoch %i validation f1: %.4f" % (ep+1,f1))

            #save if performance better than previous best
            if f1 >= prevbest:
                prevbest = f1
                pat_count = 0
                if savebest:
                    self.save(filepath)
            else:
                pat_count += 1
                if pat_count >= patience:
                    break

            #reset timer
            start_time = time.time()

    def predict(self,X,doc_lens,batch_size=128):
    
        '''
        return the predicted labels, flattened labels ignoring padding tokens, 
        and confidence scores for given data
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for inference
            
        outputs:
          - y_pred: numpy_array[int]
            2d numpy array of predicted labels for input data
          - y_pred_flat: list[int]
            flattened list of all predicted labels ignoring padding tokens
          - scores: numpy_array[float]
            flattened list of confidence scores for all predicted labels ignoring padding tokens
        '''
    
        y_pred_flat = []
        y_pred = []
        scores = []
        
        for start in range(0,len(X),batch_size):

            #get batch index
            if start+batch_size < len(X):
                stop = start+batch_size
            else:
                stop = len(X)

            embeds = np.take(self.embeddings,X[start:stop],0)
            feed_dict = {self.doc_inputs:embeds,
                         self.doc_lens:doc_lens[start:stop],
                         self.doc_idx:self._gen_doc_idx(doc_lens[start:stop]),
                         self.dropout:1.0}
            pred,pred_flat,score = self.sess.run([self.pred,self.pred_flat,self.seq_score],
                                   feed_dict=feed_dict)

            score = np.exp(score)
            y_pred.append(pred)
            y_pred_flat.extend(pred_flat)
            scores.extend(score)

            sys.stdout.write("processed %i of %i records        \r" \
                             % (stop,len(X)))
            sys.stdout.flush()

        print()
        y_pred = np.vstack(y_pred)
        return y_pred,y_pred_flat,np.array(scores)

    def score(self,X,y,doc_lens,batch_size=128):
    
        '''
        return the entity-level exact F1 score for given input sequences
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - y: numpy array[int]
            2d numpy array (doc x ner labels) of labels for given data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for inference
            
        outputs:
          - entity-level exact F1 score for given input sequences
        '''
        
        y_preds_,_,_ = self.predict(X,doc_lens,batch_size)
        y_pred = []
        y_true = []
        for y_pred_,y_true_,l in zip(y_preds_,y,doc_lens):
            y_p = [self.idx2label[l] for l in y_pred_[:l]]
            y_t = [self.idx2label[l] for l in y_true_[:l]]
            y_pred.append(y_p)
            y_true.append(y_t)
        
        evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
        results, results_by_tag = evaluator.evaluate()
        f1 = results['exact']['f1']
        
        return f1

    def save(self,filename):
    
        '''
        save the model weights to a file
        
        parameters:
          - filepath: string
            path to save model weights
        
        outputs:
            None
        '''
        
        self.saver.save(self.sess,filename)

    def load(self,filename):
    
        '''
        load model weights from a file
        
        parameters:
          - filepath: string
            path from which to load model weights
        
        outputs:
            None
        '''
        
        self.saver.restore(self.sess,filename)


In [None]:
class tt_birnn_crf(object):

    '''
    birnn-crf for ner tagging
    
    parameters:
      - embedding_matrix: numpy array[float]
        numpy array of word embeddings
        each row should represent a word embedding
        NOTE: the word index 0 is dropped, so the first row is ignored
      - num_classes: int
        number of output classes
      - max_len: int (default: 50)
        maximum number of input tokens in any sequence
      - rnn_size: int (default: 300)
        number of rnn units in RNN layer
      - dropout_keep: float (default: 0.9)
        dropout keep rate after rnn layer
      - lr: float (default: 1E-4)
        learning rate for adam optimizer
       
    methods:
      - train(X,y,doc_lens,batch_size=128,epochs=25,patience=10,
              validation_data=None,savebest=False,filepath=None)
        train network on given data
      - predict(X,doc_lens,batch_size=128)
        return the predicted labels, flattened labels ignoring padding tokens, 
        and confidence scores for given data
      - score(X,y,doc_lens,batch_size=128)
        return the entity-level exact F1 score for given input sequences
      - save(filepath)
        save the model weights to a file
      - load(filepath)
        load model weights from a file
    '''

    def __init__(self,embedding_matrix,num_classes,max_len=50,rnn_size=300,dropout_keep=0.9,learning_rate=1E-4):
    
        #model params
        self.num_classes = num_classes
        self.embeddings = embedding_matrix.astype(np.float32)
        self.max_len = max_len
        self.rnn_size = rnn_size
        self.dropout_keep = dropout_keep
        self.idx2label = {0:'O',1:'B-ENT',2:'I-ENT'}
        self.lr = learning_rate
        
        #model inputs
        self.doc_inputs = tf.placeholder(tf.float32,shape=[None,max_len,200])
        self.doc_lens = tf.placeholder(tf.int32,shape=[None])
        self.labels = tf.placeholder(tf.int32,shape=[None,max_len])
        self.doc_idx = tf.placeholder(tf.int32,shape=[None,2])
        self.dropout = tf.placeholder(tf.float32)
        rnn_input = tf.nn.dropout(self.doc_inputs,self.dropout)

        #bi-rnn              
        with tf.variable_scope('rnn',initializer=tf.contrib.layers.xavier_initializer()):
            [outputs_fw,outputs_bw],_ = tf.nn.bidirectional_dynamic_rnn(
                            BasicRNNCell(self.rnn_size/2),BasicRNNCell(self.rnn_size/2),
                            rnn_input,sequence_length=self.doc_lens,dtype=tf.float32)
            outputs = tf.concat((outputs_fw,outputs_bw),2)
            outputs = tf.nn.dropout(outputs,self.dropout)
        
        #linear chain conditional random field
        unary_scores = tf.layers.dense(outputs,self.num_classes,
                       kernel_initializer=tf.contrib.layers.xavier_initializer())
        log_likelihood, self.transition_params = \
                       crf_log_likelihood(unary_scores,self.labels,self.doc_lens)
        self.pred, viterbi_score = crf_decode(unary_scores,self.transition_params,self.doc_lens)
        self.pred_flat = tf.gather_nd(self.pred,self.doc_idx)
        seq_score,_ = crf_log_likelihood(unary_scores,self.pred,self.doc_lens,
                                         self.transition_params)
        self.seq_score = seq_score/tf.cast(self.doc_lens,tf.float32)

        #loss, accuracy, and training functions
        self.loss = tf.reduce_mean(-log_likelihood)
        self.optimizer = tf.train.AdamOptimizer(self.lr,0.9,0.99).minimize(self.loss)
        
        #init ops
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.saver = tf.train.Saver()
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())

    def _gen_doc_idx(self,doc_lens):
    
        doc_idx = []
        for i,l in enumerate(doc_lens):
            for j in range(l):
                doc_idx.append([i,j])
        doc_idx = np.array(doc_idx)

        return doc_idx
        
    def _flatten_y(self,y,doc_lens):
    
        y_flattened = []
        for i,(doc,l) in enumerate(zip(y,doc_lens)):       
            y_flattened.extend(doc[:l])
            
        return y_flattened
                    
    def train(self,X,y,doc_lens,batch_size=128,epochs=500,patience=20,
              validation_data=None,savebest=False,filepath=None):
    
        '''
        train network on given data
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - y: numpy array[int]
            2d numpy array (doc x ner labels) of labels for given data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for training
          - epochs: int (default: 500)
            number of epochs to train for
          - patience: int (default: 10)
            training stops after no improvement in validation score
            for this number of epochs
          - validation_data: tuple (optional)
            tuple of numpy arrays (X,y) representing validation data
          - savebest: boolean (default: False)
            set to True to save the best model based on validation score per epoch
          - filepath: string (optional)
            path to save model if savebest is set to True
        
        outputs:
            None
        '''
    
        if savebest==True and filepath==None:
            raise Exception("Please enter a path to save the network")

        if validation_data:
            validation_size = len(validation_data[0])
        else:
            validation_size = len(X)

        print('Entered BiRNN-CRF. Training network on %i documents, validation on %i documents' \
              % (len(X), validation_size))

        #track best model for saving
        prevbest = 0
        pat_count = 0

        for ep in range(epochs):

            #shuffle data
            xyz = list(zip(X,y,doc_lens))            
            random.shuffle(xyz)
            X,y,doc_lens = zip(*xyz)
            X = list(X)
            y = list(y)
            doc_lens = list(doc_lens)

            y_pred = []
            y_true = []
            start_time = time.time()

            #train
            for start in range(0,len(X),batch_size):

                #get batch index
                if start+batch_size < len(X):
                    stop = start+batch_size
                else:
                    stop = len(X)

                embeds = np.take(self.embeddings,X[start:stop],0)
                feed_dict = {self.doc_inputs:embeds,
                             self.labels:y[start:stop],
                             self.doc_lens:doc_lens[start:stop],
                             self.doc_idx:self._gen_doc_idx(doc_lens[start:stop]),
                             self.dropout:self.dropout_keep}
                preds,loss,_ = self.sess.run([self.pred,self.loss,self.optimizer],
                              feed_dict=feed_dict)

                #track correct predictions
                for y_pred_,y_true_,l in zip(preds,y[start:stop],doc_lens[start:stop]):
                    y_p = [self.idx2label[l] for l in y_pred_[:l]]
                    y_t = [self.idx2label[l] for l in y_true_[:l]]
                    y_pred.append(y_p)
                    y_true.append(y_t)
                    
                sys.stdout.write("epoch %i, sample %i of %i, loss: %f        \r"\
                                 % (ep+1,stop,len(X),loss))
                sys.stdout.flush()

            #checkpoint after every epoch
            print("\ntraining time: %.2f" % (time.time()-start_time))
            evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
            results, results_by_tag = evaluator.evaluate()
            f1 = results['exact']['f1']
            print("epoch %i training f1: %.4f" % (ep+1,f1))

            f1 = self.score(validation_data[0],validation_data[1],
                            validation_data[2],batch_size=batch_size)
            print("epoch %i validation f1: %.4f" % (ep+1,f1))

            #save if performance better than previous best
            if f1 >= prevbest:
                prevbest = f1
                pat_count = 0
                if savebest:
                    self.save(filepath)
            else:
                pat_count += 1
                if pat_count >= patience:
                    break

            #reset timer
            start_time = time.time()

    def predict(self,X,doc_lens,batch_size=128):
    
        '''
        return the predicted labels, flattened labels ignoring padding tokens, 
        and confidence scores for given data
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for inference
            
        outputs:
          - y_pred: numpy_array[int]
            2d numpy array of predicted labels for input data
          - y_pred_flat: list[int]
            flattened list of all predicted labels ignoring padding tokens
          - scores: numpy_array[float]
            flattened list of confidence scores for all predicted labels ignoring padding tokens
        '''
    
        y_pred_flat = []
        y_pred = []
        scores = []
        
        for start in range(0,len(X),batch_size):

            #get batch index
            if start+batch_size < len(X):
                stop = start+batch_size
            else:
                stop = len(X)

            embeds = np.take(self.embeddings,X[start:stop],0)
            feed_dict = {self.doc_inputs:embeds,
                         self.doc_lens:doc_lens[start:stop],
                         self.doc_idx:self._gen_doc_idx(doc_lens[start:stop]),
                         self.dropout:1.0}
            pred,pred_flat,score = self.sess.run([self.pred,self.pred_flat,self.seq_score],
                                   feed_dict=feed_dict)

            score = np.exp(score)
            y_pred.append(pred)
            y_pred_flat.extend(pred_flat)
            scores.extend(score)

            sys.stdout.write("processed %i of %i records        \r" \
                             % (stop,len(X)))
            sys.stdout.flush()

        print()
        y_pred = np.vstack(y_pred)
        return y_pred,y_pred_flat,np.array(scores)

    def score(self,X,y,doc_lens,batch_size=128):
    
        '''
        return the entity-level exact F1 score for given input sequences
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - y: numpy array[int]
            2d numpy array (doc x ner labels) of labels for given data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for inference
            
        outputs:
          - entity-level exact F1 score for given input sequences
        '''
        
        y_preds_,_,_ = self.predict(X,doc_lens,batch_size)
        y_pred = []
        y_true = []
        for y_pred_,y_true_,l in zip(y_preds_,y,doc_lens):
            y_p = [self.idx2label[l] for l in y_pred_[:l]]
            y_t = [self.idx2label[l] for l in y_true_[:l]]
            y_pred.append(y_p)
            y_true.append(y_t)
        
        evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
        results, results_by_tag = evaluator.evaluate()
        f1 = results['exact']['f1']
        
        return f1

    def save(self,filename):
    
        '''
        save the model weights to a file
        
        parameters:
          - filepath: string
            path to save model weights
        
        outputs:
            None
        '''
        
        self.saver.save(self.sess,filename)

    def load(self,filename):
    
        '''
        load model weights from a file
        
        parameters:
          - filepath: string
            path from which to load model weights
        
        outputs:
            None
        '''
        
        self.saver.restore(self.sess,filename)


In [None]:
class tt_bigru_crf(object):

    '''
    bigru-crf for ner tagging
    
    parameters:
      - embedding_matrix: numpy array[float]
        numpy array of word embeddings
        each row should represent a word embedding
        NOTE: the word index 0 is dropped, so the first row is ignored
      - num_classes: int
        number of output classes
      - max_len: int (default: 50)
        maximum number of input tokens in any sequence
      - rnn_size: int (default: 300)
        number of rnn units in RNN layer
      - dropout_keep: float (default: 0.9)
        dropout keep rate after rnn layer
      - lr: float (default: 1E-4)
        learning rate for adam optimizer
       
    methods:
      - train(X,y,doc_lens,batch_size=128,epochs=25,patience=10,
              validation_data=None,savebest=False,filepath=None)
        train network on given data
      - predict(X,doc_lens,batch_size=128)
        return the predicted labels, flattened labels ignoring padding tokens, 
        and confidence scores for given data
      - score(X,y,doc_lens,batch_size=128)
        return the entity-level exact F1 score for given input sequences
      - save(filepath)
        save the model weights to a file
      - load(filepath)
        load model weights from a file
    '''

    def __init__(self,embedding_matrix,num_classes,max_len=50,rnn_size=300,dropout_keep=0.9,learning_rate=1E-4):
    
        #model params
        self.num_classes = num_classes
        self.embeddings = embedding_matrix.astype(np.float32)
        self.max_len = max_len
        self.rnn_size = rnn_size
        self.dropout_keep = dropout_keep
        self.idx2label = {0:'O',1:'B-ENT',2:'I-ENT'}
        self.lr = learning_rate
        
        #model inputs
        self.doc_inputs = tf.placeholder(tf.float32,shape=[None,max_len,200])
        self.doc_lens = tf.placeholder(tf.int32,shape=[None])
        self.labels = tf.placeholder(tf.int32,shape=[None,max_len])
        self.doc_idx = tf.placeholder(tf.int32,shape=[None,2])
        self.dropout = tf.placeholder(tf.float32)
        rnn_input = tf.nn.dropout(self.doc_inputs,self.dropout)

        #bi-rnn              
        with tf.variable_scope('rnn',initializer=tf.contrib.layers.xavier_initializer()):
            with tf.variable_scope('forward'):        
              gru_fw_cell = tf.contrib.rnn.GRUCell(self.rnn_size/2)        
              gru_fw_cell = tf.contrib.rnn.DropoutWrapper(gru_fw_cell)    
            with tf.variable_scope('backward'):        
              gru_bw_cell = tf.contrib.rnn.GRUCell(self.rnn_size/2)        
              gru_bw_cell = tf.contrib.rnn.DropoutWrapper(gru_bw_cell)

            [outputs_fw,outputs_bw],_ = tf.nn.bidirectional_dynamic_rnn(
                            gru_fw_cell,gru_bw_cell,
                            rnn_input,sequence_length=self.doc_lens,dtype=tf.float32)
            outputs = tf.concat((outputs_fw,outputs_bw),2)
            outputs = tf.nn.dropout(outputs,self.dropout)
        
        #linear chain conditional random field
        unary_scores = tf.layers.dense(outputs,self.num_classes,
                       kernel_initializer=tf.contrib.layers.xavier_initializer())
        log_likelihood, self.transition_params = \
                       crf_log_likelihood(unary_scores,self.labels,self.doc_lens)
        self.pred, viterbi_score = crf_decode(unary_scores,self.transition_params,self.doc_lens)
        self.pred_flat = tf.gather_nd(self.pred,self.doc_idx)
        seq_score,_ = crf_log_likelihood(unary_scores,self.pred,self.doc_lens,
                                         self.transition_params)
        self.seq_score = seq_score/tf.cast(self.doc_lens,tf.float32)

        #loss, accuracy, and training functions
        self.loss = tf.reduce_mean(-log_likelihood)
        self.optimizer = tf.train.AdamOptimizer(self.lr,0.9,0.99).minimize(self.loss)
        
        #init ops
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.saver = tf.train.Saver()
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())

    def _gen_doc_idx(self,doc_lens):
    
        doc_idx = []
        for i,l in enumerate(doc_lens):
            for j in range(l):
                doc_idx.append([i,j])
        doc_idx = np.array(doc_idx)

        return doc_idx
        
    def _flatten_y(self,y,doc_lens):
    
        y_flattened = []
        for i,(doc,l) in enumerate(zip(y,doc_lens)):       
            y_flattened.extend(doc[:l])
            
        return y_flattened
                    
    def train(self,X,y,doc_lens,batch_size=128,epochs=500,patience=20,
              validation_data=None,savebest=False,filepath=None):
    
        '''
        train network on given data
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - y: numpy array[int]
            2d numpy array (doc x ner labels) of labels for given data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for training
          - epochs: int (default: 500)
            number of epochs to train for
          - patience: int (default: 10)
            training stops after no improvement in validation score
            for this number of epochs
          - validation_data: tuple (optional)
            tuple of numpy arrays (X,y) representing validation data
          - savebest: boolean (default: False)
            set to True to save the best model based on validation score per epoch
          - filepath: string (optional)
            path to save model if savebest is set to True
        
        outputs:
            None
        '''
    
        if savebest==True and filepath==None:
            raise Exception("Please enter a path to save the network")

        if validation_data:
            validation_size = len(validation_data[0])
        else:
            validation_size = len(X)

        print('Entered BiGRU-CRF. Training network on %i documents, validation on %i documents' \
              % (len(X), validation_size))

        #track best model for saving
        prevbest = 0
        pat_count = 0

        for ep in range(epochs):

            #shuffle data
            xyz = list(zip(X,y,doc_lens))            
            random.shuffle(xyz)
            X,y,doc_lens = zip(*xyz)
            X = list(X)
            y = list(y)
            doc_lens = list(doc_lens)

            y_pred = []
            y_true = []
            start_time = time.time()

            #train
            for start in range(0,len(X),batch_size):

                #get batch index
                if start+batch_size < len(X):
                    stop = start+batch_size
                else:
                    stop = len(X)

                embeds = np.take(self.embeddings,X[start:stop],0)
                feed_dict = {self.doc_inputs:embeds,
                             self.labels:y[start:stop],
                             self.doc_lens:doc_lens[start:stop],
                             self.doc_idx:self._gen_doc_idx(doc_lens[start:stop]),
                             self.dropout:self.dropout_keep}
                preds,loss,_ = self.sess.run([self.pred,self.loss,self.optimizer],
                              feed_dict=feed_dict)

                #track correct predictions
                for y_pred_,y_true_,l in zip(preds,y[start:stop],doc_lens[start:stop]):
                    y_p = [self.idx2label[l] for l in y_pred_[:l]]
                    y_t = [self.idx2label[l] for l in y_true_[:l]]
                    y_pred.append(y_p)
                    y_true.append(y_t)
                    
                sys.stdout.write("epoch %i, sample %i of %i, loss: %f        \r"\
                                 % (ep+1,stop,len(X),loss))
                sys.stdout.flush()

            #checkpoint after every epoch
            print("\ntraining time: %.2f" % (time.time()-start_time))
            evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
            results, results_by_tag = evaluator.evaluate()
            f1 = results['exact']['f1']
            print("epoch %i training f1: %.4f" % (ep+1,f1))

            f1 = self.score(validation_data[0],validation_data[1],
                            validation_data[2],batch_size=batch_size)
            print("epoch %i validation f1: %.4f" % (ep+1,f1))

            #save if performance better than previous best
            if f1 >= prevbest:
                prevbest = f1
                pat_count = 0
                if savebest:
                    self.save(filepath)
            else:
                pat_count += 1
                if pat_count >= patience:
                    break

            #reset timer
            start_time = time.time()

    def predict(self,X,doc_lens,batch_size=128):
    
        '''
        return the predicted labels, flattened labels ignoring padding tokens, 
        and confidence scores for given data
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for inference
            
        outputs:
          - y_pred: numpy_array[int]
            2d numpy array of predicted labels for input data
          - y_pred_flat: list[int]
            flattened list of all predicted labels ignoring padding tokens
          - scores: numpy_array[float]
            flattened list of confidence scores for all predicted labels ignoring padding tokens
        '''
    
        y_pred_flat = []
        y_pred = []
        scores = []
        
        for start in range(0,len(X),batch_size):

            #get batch index
            if start+batch_size < len(X):
                stop = start+batch_size
            else:
                stop = len(X)

            embeds = np.take(self.embeddings,X[start:stop],0)
            feed_dict = {self.doc_inputs:embeds,
                         self.doc_lens:doc_lens[start:stop],
                         self.doc_idx:self._gen_doc_idx(doc_lens[start:stop]),
                         self.dropout:1.0}
            pred,pred_flat,score = self.sess.run([self.pred,self.pred_flat,self.seq_score],
                                   feed_dict=feed_dict)

            score = np.exp(score)
            y_pred.append(pred)
            y_pred_flat.extend(pred_flat)
            scores.extend(score)

            sys.stdout.write("processed %i of %i records        \r" \
                             % (stop,len(X)))
            sys.stdout.flush()

        print()
        y_pred = np.vstack(y_pred)
        return y_pred,y_pred_flat,np.array(scores)

    def score(self,X,y,doc_lens,batch_size=128):
    
        '''
        return the entity-level exact F1 score for given input sequences
        
        parameters:
          - X: numpy array[int]
            2d numpy array (doc x word ids) of input data
          - y: numpy array[int]
            2d numpy array (doc x ner labels) of labels for given data
          - doc_lens: list[int]
            true sequence length of each sample
          - batch size: int (default: 128)
            batch size to use for inference
            
        outputs:
          - entity-level exact F1 score for given input sequences
        '''
        
        y_preds_,_,_ = self.predict(X,doc_lens,batch_size)
        y_pred = []
        y_true = []
        for y_pred_,y_true_,l in zip(y_preds_,y,doc_lens):
            y_p = [self.idx2label[l] for l in y_pred_[:l]]
            y_t = [self.idx2label[l] for l in y_true_[:l]]
            y_pred.append(y_p)
            y_true.append(y_t)
        
        evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
        results, results_by_tag = evaluator.evaluate()
        f1 = results['exact']['f1']
        
        return f1

    def save(self,filename):
    
        '''
        save the model weights to a file
        
        parameters:
          - filepath: string
            path to save model weights
        
        outputs:
            None
        '''
        
        self.saver.save(self.sess,filename)

    def load(self,filename):
    
        '''
        load model weights from a file
        
        parameters:
          - filepath: string
            path from which to load model weights
        
        outputs:
            None
        '''
        
        self.saver.restore(self.sess,filename)


In [None]:
# datasets to test on
datasets = [
            #'BC2GM',
            #'BC4CHEMD', 
            #'NCBI-disease',
            's800'
           ]

# supervised dataset sizes to test on
data_sizes = [
              2000
             ]


In [None]:
pretrain_dataset = 'medmentions'

In [None]:
batch_size_s = 128
patience_s = 15
holdout_confidence = 0.98

In [None]:
vocab = np.load('/content/drive/MyDrive/Pubmed/vocab.npy').astype(np.int16)

Asymmetric tri-training for aforementioned datasets of specified labeled training sizes

In [None]:
for dataset in datasets:

    # iterate over each data size setting
    for data_size in data_sizes:

        print('Training in the dataset loop',dataset,data_size)
        
        # load data
        X = np.load('/content/drive/MyDrive/Pubmed/%s_X_train.npy' % dataset).astype(np.int16)
        y = np.load('/content/drive/MyDrive/Pubmed/%s_y_train.npy' % dataset).astype(np.int16)
        max_len = 50
        num_classes = np.max(y) + 1
        
        with open('/content/drive/MyDrive/Pubmed/%s_senlens_train.pkl' % dataset,'rb') as f:
            doc_len = pickle.load(f)
        doc_len = [l if l <=max_len else max_len for l in doc_len]
        
        # load conversion dictionaries
        label2idx = {'O':0, 'B':1, 'I': 2}
        idx2label = {0:'O',1:'B-ENT',2:'I-ENT'}
            
        # train val split
        start_size = data_size
        train_size = int(0.8 * data_size)
        X_start = X[:start_size]
        y_start = y[:start_size]
        doc_len_start = np.array(doc_len[:start_size])
        X_holdout = X[start_size:]
        y_holdout = y[start_size:]
        doc_len_holdout = np.array(doc_len[start_size:])
        
        X_train = X_start[:train_size]
        X_val = X_start[train_size:]
        y_train = y_start[:train_size]
        y_val = y_start[train_size:]
        doc_len_train = np.array(doc_len_start[:train_size])
        doc_len_val = np.array(doc_len_start[train_size:])
        
        # load test data
        X_test = np.load('/content/drive/MyDrive/Pubmed/%s_X_test.npy' % dataset).astype(np.int16)
        y_test = np.load('/content/drive/MyDrive/Pubmed/%s_y_test.npy' % dataset).astype(np.int16)
        with open('/content/drive/MyDrive/Pubmed/%s_senlens_test.pkl' % dataset,'rb') as f:
            doc_len_test = pickle.load(f)
        doc_len_test = [l if l <=max_len else max_len for l in doc_len_test]
        y_true = []
        for y_true_,l in zip(y_test,doc_len_test):
            y = [idx2label[l] for l in y_true_[:l]]
            y_true.append(y)
        y_true_val = []
        for y_true_,l in zip(y_val,doc_len_val):
            y = [idx2label[l] for l in y_true_[:l]]
            y_true_val.append(y)
        

        i = 0
        training_sizes = []
        filter_size = np.inf
        
        # loop through holdout set until no new samples are added
        while filter_size > 0:
                                
            num_samples = len(X_train)
            training_sizes.append(num_samples)
            print('train size:',num_samples)
            
            # train model
            tf.reset_default_graph()
            model1 = tt_bilstm_crf(vocab, num_classes, max_len)
            tf.reset_default_graph()
            model2 = tt_birnn_crf(vocab, num_classes, max_len)
            tf.reset_default_graph()
            model3 = tt_bigru_crf(vocab, num_classes, max_len)
            
            
            model1.train(X_train,y_train,doc_len_train,
                        batch_size=batch_size_s,patience=patience_s,
                        validation_data=(X_val,y_val,doc_len_val),
                        savebest=True,filepath='/content/drive/MyDrive/Pubmed/SavedModels/%s_tt_lstm_crf_%i_%i.ckpt' % (dataset,train_size,i))
            
            model2.train(X_train,y_train,doc_len_train,
                        batch_size=batch_size_s,patience=patience_s,
                        validation_data=(X_val,y_val,doc_len_val),
                        savebest=True,filepath='/content/drive/MyDrive/Pubmed/SavedModels/%s_tt_rnn_crf_%i_%i.ckpt' % (dataset,train_size,i))
            
            
            # add high conf from holdout to test
            model1.load('/content/drive/MyDrive/Pubmed/SavedModels/%s_tt_lstm_crf_%i_%i.ckpt' % (dataset,train_size,i))
            model2.load('/content/drive/MyDrive/Pubmed/SavedModels/%s_tt_rnn_crf_%i_%i.ckpt' % (dataset,train_size,i))
            
            # Procedure for adding samples to training set via M1-M2 combination
            y_pred_lstm,_,scores_lstm = model1.predict(X_holdout,doc_len_holdout)
            #print('y_pred_lstm: ', y_pred_lstm)
            #print('scores lstm: ', scores_lstm)
            
            y_pred_rnn,_,scores_rnn = model2.predict(X_holdout,doc_len_holdout)
            #print('y_pred_rnn: ', y_pred_rnn)
            #print('scores rnn: ', scores_rnn)

            print('Before M1-M2 iteration, doc_len_holdout array size:', doc_len_holdout.size)
            print('Before M1-M2 iteration, X_train array:', X_train.size)

            val1 = []
            # For adding samples to training set through predictions of M1 and M2 on set S
            for x in range(y_pred_lstm.shape[0]):
              if(np.array_equiv(y_pred_lstm[x], y_pred_rnn[x]) and (min(scores_lstm[x], scores_rnn[x])>=holdout_confidence)):
                val1.insert(x, True)
              else:
                val1.insert(x, False)   
            
            val_size1 = np.sum(val1)
            print('Val size 1: ', val_size1)
            print('For M1-M2 combination, iteration %i: adding %i samples from holdout to train' % (i+1, val_size1))
            X_train = np.vstack((X_train,X_holdout[val1]))
            print('Printing X_train after adding from holdout', X_train.shape)
            y_train = np.vstack((y_train,y_pred_lstm[val1]))
            print('Printing y_train after adding from holdout', y_train.shape)
            doc_len_train = np.hstack((doc_len_train,doc_len_holdout[val1]))
            print('Printing doc_len_train after adding from holdout', doc_len_train.shape)
            val_inv1 = np.invert(val1)
            invert_size1 = np.sum(val_inv1)
            print('Printing inverted val size: ', invert_size1)
            X_holdout = X_holdout[val_inv1]
            print('Printing X_holdout after removing from holdout', X_holdout.shape)
            doc_len_holdout = doc_len_holdout[val_inv1]
            print('Printing doc_len_holdout after removing from holdout', doc_len_holdout.shape)

            print('After M1-M2 iteration, doc_len_holdout array:', doc_len_holdout.size)
            print('After M1-M2 iteration, X_train array:', X_train.size)
            

            model3.train(X_train,y_train,doc_len_train,
                        batch_size=batch_size_s,patience=patience_s,
                        validation_data=(X_val,y_val,doc_len_val),
                        savebest=True,filepath='/content/drive/MyDrive/Pubmed/SavedModels/%s_tt_gru_crf_%i_%i.ckpt' % (dataset,train_size,i))
            model3.load('/content/drive/MyDrive/Pubmed/SavedModels/%s_tt_gru_crf_%i_%i.ckpt' % (dataset,train_size,i))
            
            # Procedure for adding samples to training set via M1-M3 combination
            y_pred_gru,_,scores_gru = model3.predict(X_holdout,doc_len_holdout)
            #print('y_pred_gru: ', y_pred_gru.shape)
            #print('scores gru: ', scores_gru.shape)

            y_pred_lstm,_,scores_lstm = model1.predict(X_holdout,doc_len_holdout)
            #print('y_pred_lstm: ', y_pred_lstm.shape)
            #print('scores_lstm: ', scores_lstm.shape)

            print('Before M1-M3 iteration, doc_len_holdout array:', doc_len_holdout.size)
            print('Before M1-M3 iteration, X_train array:', X_train.size)

            val2 = []
            for x in range(y_pred_gru.shape[0]):
              if(np.array_equiv(y_pred_lstm[x], y_pred_gru[x]) and (min(scores_lstm[x], scores_gru[x])>=holdout_confidence)):
                val2.insert(x, True)
              else:
                val2.insert(x, False)
                
            val_size2 = np.sum(val2)
            print('Val size 2: ', val_size2)
            print('iteration %i: adding %i samples from holdout to train' % (i+1, val_size2))
            X_train = np.vstack((X_train,X_holdout[val2]))
            print('Printing X_train after adding from holdout', X_train.shape)
            y_train = np.vstack((y_train,y_pred_gru[val2]))
            print('Printing y_train after adding from holdout', y_train.shape)
            doc_len_train = np.hstack((doc_len_train,doc_len_holdout[val2]))
            print('Printing doc_len_train after adding from holdout', doc_len_train.shape)
            val_inv2 = np.invert(val2)
            invert_size2 = np.sum(val_inv2)
            print('Printing inverted val size: ', invert_size2)
            X_holdout = X_holdout[val_inv2]
            print('Printing X_holdout after adding from holdout', X_holdout.shape)
            doc_len_holdout = doc_len_holdout[val_inv2]
            print('Printing doc_len_holdout after adding from holdout', doc_len_holdout.shape)    

            print('After M1-M3 iteration, doc_len_holdout array:', doc_len_holdout.size)
            print('After M1-M3 iteration, X_train array:', X_train.size)  
            
            # Procedure for adding samples to training set via M2-M3 combination
            y_pred_gru,_,scores_gru = model3.predict(X_holdout,doc_len_holdout)
            print('y_pred_gru: ', y_pred_gru.shape)
            print('scores gru: ', scores_gru.shape)

            y_pred_rnn,_,scores_rnn = model2.predict(X_holdout,doc_len_holdout)
            print('y_pred_rnn: ', y_pred_rnn.shape)
            print('scores_rnn: ', scores_rnn.shape)

            print('Before M2-M3 iteration, doc_len_holdout array:', doc_len_holdout.size)
            print('Before M2-M3 iteration, X_train array:', X_train.size)
            
            val3 = []
            for x in range(y_pred_gru.shape[0]):
              if(np.array_equiv(y_pred_rnn[x], y_pred_gru[x]) and (min(scores_rnn[x], scores_gru[x])>=holdout_confidence)):
                val3.insert(x, True)
              else:
                val3.insert(x, False)
            
            val_size3 = np.sum(val3)
            print('Val size-3: ', val_size3)
            print('iteration %i: adding %i samples from holdout to train' % (i+1, val_size3))
            X_train = np.vstack((X_train,X_holdout[val3]))
            print('Printing X_train after adding from holdout', X_train.shape)
            y_train = np.vstack((y_train,y_pred_gru[val3]))
            print('Printing y_train after adding from holdout', y_train.shape)
            doc_len_train = np.hstack((doc_len_train,doc_len_holdout[val3]))
            print('Printing doc_len_train after adding from holdout', doc_len_train.shape)
            val_inv3 = np.invert(val3)
            invert_size3 = np.sum(val_inv3)
            print('Printing inverted val size: ', invert_size3)
            X_holdout = X_holdout[val_inv3]
            print('Printing X_holdout after adding from holdout', X_holdout.shape)
            doc_len_holdout = doc_len_holdout[val_inv3]
            print('Printing doc_len_holdout after adding from holdout', doc_len_holdout.shape)
            
            print('After M2-M3 iteration, doc_len_holdout array:', doc_len_holdout.size)
            print('After M2-M3 iteration, X_train array:', X_train.size)

            filter_size = val_size1 + val_size2 + val_size3
            print('Filter size for this iteration: ', filter_size)

            # evaluate on test set for M1
            y_preds_l_,_,_ = model1.predict(X_test,doc_len_test)
            y_predl = []
            for y_pred1_,l in zip(y_preds_l_,doc_len_test):
                y = [idx2label[l] for l in y_pred1_[:l]]
                y_predl.append(y)
            
            evaluator1 = Evaluator(y_true, y_predl, tags=['ENT'], loader="list")
            results1, results_by_tag1 = evaluator1.evaluate()
            
            precision1 = results1['exact']['precision']
            recall1 = results1['exact']['recall']
            f1_1 = results1['exact']['f1']

            print('Test set evaluation for BiLSTM CRF iteration %i results on %i samples' % (i+1,num_samples))
            print('precision:',precision1)
            print('recall:',recall1)
            print('f1:',f1_1)
            print()

            # evaluate on test set for M2
            y_preds_r_,_,_ = model2.predict(X_test,doc_len_test)
            y_predr = []
            for y_pred2_,l in zip(y_preds_r_,doc_len_test):
                y = [idx2label[l] for l in y_pred2_[:l]]
                y_predr.append(y)
            
            evaluator2 = Evaluator(y_true, y_predr, tags=['ENT'], loader="list")
            results2, results_by_tag2 = evaluator2.evaluate()
            
            precision2 = results2['exact']['precision']
            recall2 = results2['exact']['recall']
            f1_2 = results2['exact']['f1']

            print('Test set evaluation for BiRNN CRF iteration %i results on %i samples' % (i+1,num_samples))
            print('precision:',precision2)
            print('recall:',recall2)
            print('f1:',f1_2)
            print()

            # evaluate on test set for M3
            y_preds_g_,_,_ = model3.predict(X_test,doc_len_test)
            y_predg = []
            for y_pred3_,l in zip(y_preds_g_,doc_len_test):
                y = [idx2label[l] for l in y_pred3_[:l]]
                y_predg.append(y)
            
            evaluator3 = Evaluator(y_true, y_predg, tags=['ENT'], loader="list")
            results3, results_by_tag3 = evaluator3.evaluate()
            
            precision3 = results3['exact']['precision']
            recall3 = results3['exact']['recall']
            f1_3 = results3['exact']['f1']

            print('Test set evaluation for BiGRU CRF iteration %i results on %i samples' % (i+1,num_samples))
            print('precision:',precision3)
            print('recall:',recall3)
            print('f1:',f1_3)
            print()
            
            i += 1

            # track size of training set after each iteration of semi-supervised training
            with open('/content/drive/MyDrive/Pubmed/Asymmetric/%s_tritrain_datasize_%i.pkl' % (dataset,train_size),'wb') as f:
              pickle.dump(training_sizes,f)
            np.save("/content/drive/MyDrive/Pubmed/Asymmetric/%s_X_train_%i_it_%i.npy" % (dataset,data_size,i+1),X_train)
            np.save("/content/drive/MyDrive/Pubmed/Asymmetric/%s_y_train_%i_it_%i.npy" % (dataset,data_size,i+1),y_train)
            np.save("/content/drive/MyDrive/Pubmed/Asymmetric/%s_X_val_%i_it_%i.npy" % (dataset,data_size,i+1),X_val)
            np.save("/content/drive/MyDrive/Pubmed/Asymmetric/%s_y_val_%i_it_%i.npy" % (dataset,data_size,i+1),y_val)
            with open('/content/drive/MyDrive/Pubmed/Asymmetric/%s_senlens_%i_it_%i.pkl' % (dataset,data_size,i+1),'wb') as f:
              pickle.dump(doc_len_train,f)
            with open('/content/drive/MyDrive/Pubmed/Asymmetric/%s_senlens_val_%i_it_%i.pkl' % (dataset,data_size,i+1),'wb') as f:
              pickle.dump(doc_len_val,f)
        

Training in the dataset loop BC2GM 2000
train size: 1600
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API


Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


Instructions for updating:
Please use `layer.add_weight` method instead.


Instructions for updating:
Please use `layer.add_weight` method instead.


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use keras.layers.Dense instead.


Instructions for updating:
Use keras.layers.Dense instead.


Instructions for updating:
Please use `layer.__call__` method instead.


Instructions for updating:
Please use `layer.__call__` method instead.


Instructions for updating:
This class is equivalent as tf.keras.layers.SimpleRNNCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
This class is equivalent as tf.keras.layers.SimpleRNNCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Entered BiLSTM-CRF. Training network on 1600 documents, validation on 400 documents

training time: 3.61
epoch 1 training f1: 0.0264

epoch 1 validation f1: 0.0298

training time: 1.95
epoch 2 training f1: 0.0255

epoch 2 validation f1: 0.0257

training time: 1.96
epoch 3 training f1: 0.0230

epoch 3 validation f1: 0.0246

training time: 1.98
epoch 4 training f1: 0.0137

epoch 4 validation f1: 0.0000

training time: 1.98
epoch 5 training f1: 0.0034

epoch 5 validation f1: 0.0000

training time: 2.05
epoch 6 training f1: 0.0000

epoch 6 validation f1: 0.0000

training time: 1.96
epoch 7 training f1: 0.0010

epoch 7 validation f1: 0.0000

training time: 1.96
epoch 8 training f1: 0.0020

epoch 8 validation f1: 0.0000

training time: 1.94
epoch 9 training f1: 0.0021

epoch 9 validation f1: 0.0041

training time: 1.96
epoch 10 training f1: 0.0031

epoch 10 validation f1: 0.0082

training time: 1.97
epoch 11 training f1: 0.0112

epoch 11 validation f1: 0.0123

training time: 2.00
epoch 12 tr

2022-04-28 06:05:28 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_0.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_0.ckpt


2022-04-28 06:05:28 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_0.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 10574
Before M1-M2 iteration, X_train array: 80000
Val size 1:  2510
For M1-M2 combination, iteration 1: adding 2510 samples from holdout to train
Printing X_train after adding from holdout (4110, 50)
Printing y_train after adding from holdout (4110, 50)
Printing doc_len_train after adding from holdout (4110,)
Printing inverted val size:  8064
Printing X_holdout after removing from holdout (8064, 50)
Printing doc_len_holdout after removing from holdout (8064,)
After M1-M2 iteration, doc_len_holdout array: 8064
After M1-M2 iteration, X_train array: 205500
Entered BiGRU-CRF. Training network on 4110 documents, validation on 400 documents

training time: 5.67
epoch 1 training f1: 0.0116

epoch 1 validation f1: 0.0283

training time: 5.02
epoch 2 training f1: 0.0122

epoch 2 validation f1: 0.0347

training time: 4.95
epoch 3 training f1: 0.0171

epoch 3 validation f1: 0.0063

training time: 5.10
epoch 4 training f1: 0.0198

epoch 4 vali

2022-04-28 06:26:17 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_0.ckpt




Before M1-M3 iteration, doc_len_holdout array: 8064
Before M1-M3 iteration, X_train array: 205500
Val size 2:  749
iteration 1: adding 749 samples from holdout to train
Printing X_train after adding from holdout (4859, 50)
Printing y_train after adding from holdout (4859, 50)
Printing doc_len_train after adding from holdout (4859,)
Printing inverted val size:  7315
Printing X_holdout after adding from holdout (7315, 50)
Printing doc_len_holdout after adding from holdout (7315,)
After M1-M3 iteration, doc_len_holdout array: 7315
After M1-M3 iteration, X_train array: 242950

y_pred_gru:  (7315, 50)
scores gru:  (7315,)

y_pred_rnn:  (7315, 50)
scores_rnn:  (7315,)
Before M2-M3 iteration, doc_len_holdout array: 7315
Before M2-M3 iteration, X_train array: 242950
Val size-3:  428
iteration 1: adding 428 samples from holdout to train
Printing X_train after adding from holdout (5287, 50)
Printing y_train after adding from holdout (5287, 50)
Printing doc_len_train after adding from holdout (

2022-04-28 06:45:39 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_1.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_1.ckpt


2022-04-28 06:45:39 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_1.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 6887
Before M1-M2 iteration, X_train array: 264350
Val size 1:  625
For M1-M2 combination, iteration 2: adding 625 samples from holdout to train
Printing X_train after adding from holdout (5912, 50)
Printing y_train after adding from holdout (5912, 50)
Printing doc_len_train after adding from holdout (5912,)
Printing inverted val size:  6262
Printing X_holdout after removing from holdout (6262, 50)
Printing doc_len_holdout after removing from holdout (6262,)
After M1-M2 iteration, doc_len_holdout array: 6262
After M1-M2 iteration, X_train array: 295600
Entered BiGRU-CRF. Training network on 5912 documents, validation on 400 documents

training time: 7.78
epoch 1 training f1: 0.0096

epoch 1 validation f1: 0.0308

training time: 7.04
epoch 2 training f1: 0.0121

epoch 2 validation f1: 0.0000

training time: 7.39
epoch 3 training f1: 0.0091

epoch 3 validation f1: 0.0069

training time: 7.01
epoch 4 training f1: 0.0166

epoch 4 valida

2022-04-28 07:04:38 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_1.ckpt




Before M1-M3 iteration, doc_len_holdout array: 6262
Before M1-M3 iteration, X_train array: 295600
Val size 2:  349
iteration 2: adding 349 samples from holdout to train
Printing X_train after adding from holdout (6261, 50)
Printing y_train after adding from holdout (6261, 50)
Printing doc_len_train after adding from holdout (6261,)
Printing inverted val size:  5913
Printing X_holdout after adding from holdout (5913, 50)
Printing doc_len_holdout after adding from holdout (5913,)
After M1-M3 iteration, doc_len_holdout array: 5913
After M1-M3 iteration, X_train array: 313050

y_pred_gru:  (5913, 50)
scores gru:  (5913,)

y_pred_rnn:  (5913, 50)
scores_rnn:  (5913,)
Before M2-M3 iteration, doc_len_holdout array: 5913
Before M2-M3 iteration, X_train array: 313050
Val size-3:  251
iteration 2: adding 251 samples from holdout to train
Printing X_train after adding from holdout (6512, 50)
Printing y_train after adding from holdout (6512, 50)
Printing doc_len_train after adding from holdout (

2022-04-28 07:26:58 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_2.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_2.ckpt


2022-04-28 07:26:58 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_2.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 5662
Before M1-M2 iteration, X_train array: 325600
Val size 1:  211
For M1-M2 combination, iteration 3: adding 211 samples from holdout to train
Printing X_train after adding from holdout (6723, 50)
Printing y_train after adding from holdout (6723, 50)
Printing doc_len_train after adding from holdout (6723,)
Printing inverted val size:  5451
Printing X_holdout after removing from holdout (5451, 50)
Printing doc_len_holdout after removing from holdout (5451,)
After M1-M2 iteration, doc_len_holdout array: 5451
After M1-M2 iteration, X_train array: 336150
Entered BiGRU-CRF. Training network on 6723 documents, validation on 400 documents

training time: 8.62
epoch 1 training f1: 0.0030

epoch 1 validation f1: 0.0178

training time: 8.14
epoch 2 training f1: 0.0023

epoch 2 validation f1: 0.0043

training time: 7.95
epoch 3 training f1: 0.0000

epoch 3 validation f1: 0.0043

training time: 8.16
epoch 4 training f1: 0.0068

epoch 4 valida

2022-04-28 07:39:52 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_2.ckpt




Before M1-M3 iteration, doc_len_holdout array: 5451
Before M1-M3 iteration, X_train array: 336150
Val size 2:  210
iteration 3: adding 210 samples from holdout to train
Printing X_train after adding from holdout (6933, 50)
Printing y_train after adding from holdout (6933, 50)
Printing doc_len_train after adding from holdout (6933,)
Printing inverted val size:  5241
Printing X_holdout after adding from holdout (5241, 50)
Printing doc_len_holdout after adding from holdout (5241,)
After M1-M3 iteration, doc_len_holdout array: 5241
After M1-M3 iteration, X_train array: 346650

y_pred_gru:  (5241, 50)
scores gru:  (5241,)

y_pred_rnn:  (5241, 50)
scores_rnn:  (5241,)
Before M2-M3 iteration, doc_len_holdout array: 5241
Before M2-M3 iteration, X_train array: 346650
Val size-3:  62
iteration 3: adding 62 samples from holdout to train
Printing X_train after adding from holdout (6995, 50)
Printing y_train after adding from holdout (6995, 50)
Printing doc_len_train after adding from holdout (69

2022-04-28 08:08:10 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_3.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_3.ckpt


2022-04-28 08:08:10 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_3.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 5179
Before M1-M2 iteration, X_train array: 349750
Val size 1:  133
For M1-M2 combination, iteration 4: adding 133 samples from holdout to train
Printing X_train after adding from holdout (7128, 50)
Printing y_train after adding from holdout (7128, 50)
Printing doc_len_train after adding from holdout (7128,)
Printing inverted val size:  5046
Printing X_holdout after removing from holdout (5046, 50)
Printing doc_len_holdout after removing from holdout (5046,)
After M1-M2 iteration, doc_len_holdout array: 5046
After M1-M2 iteration, X_train array: 356400
Entered BiGRU-CRF. Training network on 7128 documents, validation on 400 documents

training time: 9.17
epoch 1 training f1: 0.0089

epoch 1 validation f1: 0.0000

training time: 8.51
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 8.82
epoch 3 training f1: 0.0000

epoch 3 validation f1: 0.0044

training time: 8.70
epoch 4 training f1: 0.0183

epoch 4 valida

2022-04-28 08:39:08 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_3.ckpt




Before M1-M3 iteration, doc_len_holdout array: 5046
Before M1-M3 iteration, X_train array: 356400
Val size 2:  317
iteration 4: adding 317 samples from holdout to train
Printing X_train after adding from holdout (7445, 50)
Printing y_train after adding from holdout (7445, 50)
Printing doc_len_train after adding from holdout (7445,)
Printing inverted val size:  4729
Printing X_holdout after adding from holdout (4729, 50)
Printing doc_len_holdout after adding from holdout (4729,)
After M1-M3 iteration, doc_len_holdout array: 4729
After M1-M3 iteration, X_train array: 372250

y_pred_gru:  (4729, 50)
scores gru:  (4729,)

y_pred_rnn:  (4729, 50)
scores_rnn:  (4729,)
Before M2-M3 iteration, doc_len_holdout array: 4729
Before M2-M3 iteration, X_train array: 372250
Val size-3:  84
iteration 4: adding 84 samples from holdout to train
Printing X_train after adding from holdout (7529, 50)
Printing y_train after adding from holdout (7529, 50)
Printing doc_len_train after adding from holdout (75

2022-04-28 09:36:26 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_4.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_4.ckpt


2022-04-28 09:36:26 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_4.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 4645
Before M1-M2 iteration, X_train array: 376450
Val size 1:  189
For M1-M2 combination, iteration 5: adding 189 samples from holdout to train
Printing X_train after adding from holdout (7718, 50)
Printing y_train after adding from holdout (7718, 50)
Printing doc_len_train after adding from holdout (7718,)
Printing inverted val size:  4456
Printing X_holdout after removing from holdout (4456, 50)
Printing doc_len_holdout after removing from holdout (4456,)
After M1-M2 iteration, doc_len_holdout array: 4456
After M1-M2 iteration, X_train array: 385900
Entered BiGRU-CRF. Training network on 7718 documents, validation on 400 documents

training time: 10.03
epoch 1 training f1: 0.0078

epoch 1 validation f1: 0.0030

training time: 9.41
epoch 2 training f1: 0.0021

epoch 2 validation f1: 0.0000

training time: 9.33
epoch 3 training f1: 0.0022

epoch 3 validation f1: 0.0000

training time: 9.34
epoch 4 training f1: 0.0025

epoch 4 valid

2022-04-28 10:01:03 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_4.ckpt




Before M1-M3 iteration, doc_len_holdout array: 4456
Before M1-M3 iteration, X_train array: 385900
Val size 2:  234
iteration 5: adding 234 samples from holdout to train
Printing X_train after adding from holdout (7952, 50)
Printing y_train after adding from holdout (7952, 50)
Printing doc_len_train after adding from holdout (7952,)
Printing inverted val size:  4222
Printing X_holdout after adding from holdout (4222, 50)
Printing doc_len_holdout after adding from holdout (4222,)
After M1-M3 iteration, doc_len_holdout array: 4222
After M1-M3 iteration, X_train array: 397600

y_pred_gru:  (4222, 50)
scores gru:  (4222,)

y_pred_rnn:  (4222, 50)
scores_rnn:  (4222,)
Before M2-M3 iteration, doc_len_holdout array: 4222
Before M2-M3 iteration, X_train array: 397600
Val size-3:  71
iteration 5: adding 71 samples from holdout to train
Printing X_train after adding from holdout (8023, 50)
Printing y_train after adding from holdout (8023, 50)
Printing doc_len_train after adding from holdout (80

2022-04-28 10:34:54 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_5.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_5.ckpt


2022-04-28 10:34:54 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_5.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 4151
Before M1-M2 iteration, X_train array: 401150
Val size 1:  113
For M1-M2 combination, iteration 6: adding 113 samples from holdout to train
Printing X_train after adding from holdout (8136, 50)
Printing y_train after adding from holdout (8136, 50)
Printing doc_len_train after adding from holdout (8136,)
Printing inverted val size:  4038
Printing X_holdout after removing from holdout (4038, 50)
Printing doc_len_holdout after removing from holdout (4038,)
After M1-M2 iteration, doc_len_holdout array: 4038
After M1-M2 iteration, X_train array: 406800
Entered BiGRU-CRF. Training network on 8136 documents, validation on 400 documents

training time: 10.46
epoch 1 training f1: 0.0042

epoch 1 validation f1: 0.0000

training time: 9.96
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 10.00
epoch 3 training f1: 0.0009

epoch 3 validation f1: 0.0000

training time: 9.74
epoch 4 training f1: 0.0018

epoch 4 vali

2022-04-28 10:48:58 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_5.ckpt




Before M1-M3 iteration, doc_len_holdout array: 4038
Before M1-M3 iteration, X_train array: 406800
Val size 2:  81
iteration 6: adding 81 samples from holdout to train
Printing X_train after adding from holdout (8217, 50)
Printing y_train after adding from holdout (8217, 50)
Printing doc_len_train after adding from holdout (8217,)
Printing inverted val size:  3957
Printing X_holdout after adding from holdout (3957, 50)
Printing doc_len_holdout after adding from holdout (3957,)
After M1-M3 iteration, doc_len_holdout array: 3957
After M1-M3 iteration, X_train array: 410850

y_pred_gru:  (3957, 50)
scores gru:  (3957,)

y_pred_rnn:  (3957, 50)
scores_rnn:  (3957,)
Before M2-M3 iteration, doc_len_holdout array: 3957
Before M2-M3 iteration, X_train array: 410850
Val size-3:  26
iteration 6: adding 26 samples from holdout to train
Printing X_train after adding from holdout (8243, 50)
Printing y_train after adding from holdout (8243, 50)
Printing doc_len_train after adding from holdout (8243

2022-04-28 11:40:02 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_6.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_6.ckpt


2022-04-28 11:40:02 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_6.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3931
Before M1-M2 iteration, X_train array: 412150
Val size 1:  33
For M1-M2 combination, iteration 7: adding 33 samples from holdout to train
Printing X_train after adding from holdout (8276, 50)
Printing y_train after adding from holdout (8276, 50)
Printing doc_len_train after adding from holdout (8276,)
Printing inverted val size:  3898
Printing X_holdout after removing from holdout (3898, 50)
Printing doc_len_holdout after removing from holdout (3898,)
After M1-M2 iteration, doc_len_holdout array: 3898
After M1-M2 iteration, X_train array: 413800
Entered BiGRU-CRF. Training network on 8276 documents, validation on 400 documents

training time: 10.67
epoch 1 training f1: 0.0040

epoch 1 validation f1: 0.0000

training time: 10.12
epoch 2 training f1: 0.0006

epoch 2 validation f1: 0.0000

training time: 10.08
epoch 3 training f1: 0.0000

epoch 3 validation f1: 0.0000

training time: 10.01
epoch 4 training f1: 0.0000

epoch 4 vali

2022-04-28 12:03:31 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_6.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3898
Before M1-M3 iteration, X_train array: 413800
Val size 2:  243
iteration 7: adding 243 samples from holdout to train
Printing X_train after adding from holdout (8519, 50)
Printing y_train after adding from holdout (8519, 50)
Printing doc_len_train after adding from holdout (8519,)
Printing inverted val size:  3655
Printing X_holdout after adding from holdout (3655, 50)
Printing doc_len_holdout after adding from holdout (3655,)
After M1-M3 iteration, doc_len_holdout array: 3655
After M1-M3 iteration, X_train array: 425950

y_pred_gru:  (3655, 50)
scores gru:  (3655,)

y_pred_rnn:  (3655, 50)
scores_rnn:  (3655,)
Before M2-M3 iteration, doc_len_holdout array: 3655
Before M2-M3 iteration, X_train array: 425950
Val size-3:  18
iteration 7: adding 18 samples from holdout to train
Printing X_train after adding from holdout (8537, 50)
Printing y_train after adding from holdout (8537, 50)
Printing doc_len_train after adding from holdout (85

2022-04-28 12:47:10 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_7.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_7.ckpt


2022-04-28 12:47:10 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_7.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3637
Before M1-M2 iteration, X_train array: 426850
Val size 1:  63
For M1-M2 combination, iteration 8: adding 63 samples from holdout to train
Printing X_train after adding from holdout (8600, 50)
Printing y_train after adding from holdout (8600, 50)
Printing doc_len_train after adding from holdout (8600,)
Printing inverted val size:  3574
Printing X_holdout after removing from holdout (3574, 50)
Printing doc_len_holdout after removing from holdout (3574,)
After M1-M2 iteration, doc_len_holdout array: 3574
After M1-M2 iteration, X_train array: 430000
Entered BiGRU-CRF. Training network on 8600 documents, validation on 400 documents

training time: 11.07
epoch 1 training f1: 0.0082

epoch 1 validation f1: 0.0000

training time: 10.37
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 10.52
epoch 3 training f1: 0.0000

epoch 3 validation f1: 0.0000

training time: 10.60
epoch 4 training f1: 0.0127

epoch 4 vali

2022-04-28 13:08:25 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_7.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3574
Before M1-M3 iteration, X_train array: 430000
Val size 2:  76
iteration 8: adding 76 samples from holdout to train
Printing X_train after adding from holdout (8676, 50)
Printing y_train after adding from holdout (8676, 50)
Printing doc_len_train after adding from holdout (8676,)
Printing inverted val size:  3498
Printing X_holdout after adding from holdout (3498, 50)
Printing doc_len_holdout after adding from holdout (3498,)
After M1-M3 iteration, doc_len_holdout array: 3498
After M1-M3 iteration, X_train array: 433800

y_pred_gru:  (3498, 50)
scores gru:  (3498,)

y_pred_rnn:  (3498, 50)
scores_rnn:  (3498,)
Before M2-M3 iteration, doc_len_holdout array: 3498
Before M2-M3 iteration, X_train array: 433800
Val size-3:  25
iteration 8: adding 25 samples from holdout to train
Printing X_train after adding from holdout (8701, 50)
Printing y_train after adding from holdout (8701, 50)
Printing doc_len_train after adding from holdout (8701

2022-04-28 13:43:12 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_8.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_8.ckpt


2022-04-28 13:43:13 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_8.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3473
Before M1-M2 iteration, X_train array: 435050
Val size 1:  13
For M1-M2 combination, iteration 9: adding 13 samples from holdout to train
Printing X_train after adding from holdout (8714, 50)
Printing y_train after adding from holdout (8714, 50)
Printing doc_len_train after adding from holdout (8714,)
Printing inverted val size:  3460
Printing X_holdout after removing from holdout (3460, 50)
Printing doc_len_holdout after removing from holdout (3460,)
After M1-M2 iteration, doc_len_holdout array: 3460
After M1-M2 iteration, X_train array: 435700
Entered BiGRU-CRF. Training network on 8714 documents, validation on 400 documents

training time: 11.56
epoch 1 training f1: 0.0100

epoch 1 validation f1: 0.0000

training time: 10.77
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 10.77
epoch 3 training f1: 0.0076

epoch 3 validation f1: 0.0084

training time: 10.63
epoch 4 training f1: 0.0150

epoch 4 vali

2022-04-28 14:06:28 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_8.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3460
Before M1-M3 iteration, X_train array: 435700
Val size 2:  49
iteration 9: adding 49 samples from holdout to train
Printing X_train after adding from holdout (8763, 50)
Printing y_train after adding from holdout (8763, 50)
Printing doc_len_train after adding from holdout (8763,)
Printing inverted val size:  3411
Printing X_holdout after adding from holdout (3411, 50)
Printing doc_len_holdout after adding from holdout (3411,)
After M1-M3 iteration, doc_len_holdout array: 3411
After M1-M3 iteration, X_train array: 438150

y_pred_gru:  (3411, 50)
scores gru:  (3411,)

y_pred_rnn:  (3411, 50)
scores_rnn:  (3411,)
Before M2-M3 iteration, doc_len_holdout array: 3411
Before M2-M3 iteration, X_train array: 438150
Val size-3:  14
iteration 9: adding 14 samples from holdout to train
Printing X_train after adding from holdout (8777, 50)
Printing y_train after adding from holdout (8777, 50)
Printing doc_len_train after adding from holdout (8777

2022-04-28 14:43:08 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_9.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_9.ckpt


2022-04-28 14:43:08 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_9.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3397
Before M1-M2 iteration, X_train array: 438850
Val size 1:  24
For M1-M2 combination, iteration 10: adding 24 samples from holdout to train
Printing X_train after adding from holdout (8801, 50)
Printing y_train after adding from holdout (8801, 50)
Printing doc_len_train after adding from holdout (8801,)
Printing inverted val size:  3373
Printing X_holdout after removing from holdout (3373, 50)
Printing doc_len_holdout after removing from holdout (3373,)
After M1-M2 iteration, doc_len_holdout array: 3373
After M1-M2 iteration, X_train array: 440050
Entered BiGRU-CRF. Training network on 8801 documents, validation on 400 documents

training time: 11.51
epoch 1 training f1: 0.0076

epoch 1 validation f1: 0.0000

training time: 10.80
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 10.72
epoch 3 training f1: 0.0015

epoch 3 validation f1: 0.0042

training time: 10.69
epoch 4 training f1: 0.0068

epoch 4 val

2022-04-28 15:04:17 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_9.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3373
Before M1-M3 iteration, X_train array: 440050
Val size 2:  36
iteration 10: adding 36 samples from holdout to train
Printing X_train after adding from holdout (8837, 50)
Printing y_train after adding from holdout (8837, 50)
Printing doc_len_train after adding from holdout (8837,)
Printing inverted val size:  3337
Printing X_holdout after adding from holdout (3337, 50)
Printing doc_len_holdout after adding from holdout (3337,)
After M1-M3 iteration, doc_len_holdout array: 3337
After M1-M3 iteration, X_train array: 441850

y_pred_gru:  (3337, 50)
scores gru:  (3337,)

y_pred_rnn:  (3337, 50)
scores_rnn:  (3337,)
Before M2-M3 iteration, doc_len_holdout array: 3337
Before M2-M3 iteration, X_train array: 441850
Val size-3:  3
iteration 10: adding 3 samples from holdout to train
Printing X_train after adding from holdout (8840, 50)
Printing y_train after adding from holdout (8840, 50)
Printing doc_len_train after adding from holdout (8840

2022-04-28 15:46:58 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_10.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_10.ckpt


2022-04-28 15:46:58 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_10.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3334
Before M1-M2 iteration, X_train array: 442000
Val size 1:  26
For M1-M2 combination, iteration 11: adding 26 samples from holdout to train
Printing X_train after adding from holdout (8866, 50)
Printing y_train after adding from holdout (8866, 50)
Printing doc_len_train after adding from holdout (8866,)
Printing inverted val size:  3308
Printing X_holdout after removing from holdout (3308, 50)
Printing doc_len_holdout after removing from holdout (3308,)
After M1-M2 iteration, doc_len_holdout array: 3308
After M1-M2 iteration, X_train array: 443300
Entered BiGRU-CRF. Training network on 8866 documents, validation on 400 documents

training time: 11.75
epoch 1 training f1: 0.0064

epoch 1 validation f1: 0.0000

training time: 10.91
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 10.96
epoch 3 training f1: 0.0008

epoch 3 validation f1: 0.0044

training time: 11.12
epoch 4 training f1: 0.0033

epoch 4 val

2022-04-28 16:11:38 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_10.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3308
Before M1-M3 iteration, X_train array: 443300
Val size 2:  52
iteration 11: adding 52 samples from holdout to train
Printing X_train after adding from holdout (8918, 50)
Printing y_train after adding from holdout (8918, 50)
Printing doc_len_train after adding from holdout (8918,)
Printing inverted val size:  3256
Printing X_holdout after adding from holdout (3256, 50)
Printing doc_len_holdout after adding from holdout (3256,)
After M1-M3 iteration, doc_len_holdout array: 3256
After M1-M3 iteration, X_train array: 445900

y_pred_gru:  (3256, 50)
scores gru:  (3256,)

y_pred_rnn:  (3256, 50)
scores_rnn:  (3256,)
Before M2-M3 iteration, doc_len_holdout array: 3256
Before M2-M3 iteration, X_train array: 445900
Val size-3:  5
iteration 11: adding 5 samples from holdout to train
Printing X_train after adding from holdout (8923, 50)
Printing y_train after adding from holdout (8923, 50)
Printing doc_len_train after adding from holdout (8923

2022-04-28 16:34:39 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_11.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_11.ckpt


2022-04-28 16:34:40 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_11.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3251
Before M1-M2 iteration, X_train array: 446150
Val size 1:  5
For M1-M2 combination, iteration 12: adding 5 samples from holdout to train
Printing X_train after adding from holdout (8928, 50)
Printing y_train after adding from holdout (8928, 50)
Printing doc_len_train after adding from holdout (8928,)
Printing inverted val size:  3246
Printing X_holdout after removing from holdout (3246, 50)
Printing doc_len_holdout after removing from holdout (3246,)
After M1-M2 iteration, doc_len_holdout array: 3246
After M1-M2 iteration, X_train array: 446400
Entered BiGRU-CRF. Training network on 8928 documents, validation on 400 documents

training time: 11.81
epoch 1 training f1: 0.0076

epoch 1 validation f1: 0.0145

training time: 11.00
epoch 2 training f1: 0.0035

epoch 2 validation f1: 0.0000

training time: 10.94
epoch 3 training f1: 0.0012

epoch 3 validation f1: 0.0000

training time: 10.93
epoch 4 training f1: 0.0067

epoch 4 valid

2022-04-28 16:57:43 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_11.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3246
Before M1-M3 iteration, X_train array: 446400
Val size 2:  0
iteration 12: adding 0 samples from holdout to train
Printing X_train after adding from holdout (8928, 50)
Printing y_train after adding from holdout (8928, 50)
Printing doc_len_train after adding from holdout (8928,)
Printing inverted val size:  3246
Printing X_holdout after adding from holdout (3246, 50)
Printing doc_len_holdout after adding from holdout (3246,)
After M1-M3 iteration, doc_len_holdout array: 3246
After M1-M3 iteration, X_train array: 446400

y_pred_gru:  (3246, 50)
scores gru:  (3246,)

y_pred_rnn:  (3246, 50)
scores_rnn:  (3246,)
Before M2-M3 iteration, doc_len_holdout array: 3246
Before M2-M3 iteration, X_train array: 446400
Val size-3:  7
iteration 12: adding 7 samples from holdout to train
Printing X_train after adding from holdout (8935, 50)
Printing y_train after adding from holdout (8935, 50)
Printing doc_len_train after adding from holdout (8935,)

2022-04-28 17:37:49 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_12.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_12.ckpt


2022-04-28 17:37:49 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_12.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3239
Before M1-M2 iteration, X_train array: 446750
Val size 1:  8
For M1-M2 combination, iteration 13: adding 8 samples from holdout to train
Printing X_train after adding from holdout (8943, 50)
Printing y_train after adding from holdout (8943, 50)
Printing doc_len_train after adding from holdout (8943,)
Printing inverted val size:  3231
Printing X_holdout after removing from holdout (3231, 50)
Printing doc_len_holdout after removing from holdout (3231,)
After M1-M2 iteration, doc_len_holdout array: 3231
After M1-M2 iteration, X_train array: 447150
Entered BiGRU-CRF. Training network on 8943 documents, validation on 400 documents

training time: 11.67
epoch 1 training f1: 0.0000

epoch 1 validation f1: 0.0000

training time: 10.81
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 10.94
epoch 3 training f1: 0.0017

epoch 3 validation f1: 0.0042

training time: 11.00
epoch 4 training f1: 0.0055

epoch 4 valid

2022-04-28 17:56:20 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_12.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3231
Before M1-M3 iteration, X_train array: 447150
Val size 2:  29
iteration 13: adding 29 samples from holdout to train
Printing X_train after adding from holdout (8972, 50)
Printing y_train after adding from holdout (8972, 50)
Printing doc_len_train after adding from holdout (8972,)
Printing inverted val size:  3202
Printing X_holdout after adding from holdout (3202, 50)
Printing doc_len_holdout after adding from holdout (3202,)
After M1-M3 iteration, doc_len_holdout array: 3202
After M1-M3 iteration, X_train array: 448600

y_pred_gru:  (3202, 50)
scores gru:  (3202,)

y_pred_rnn:  (3202, 50)
scores_rnn:  (3202,)
Before M2-M3 iteration, doc_len_holdout array: 3202
Before M2-M3 iteration, X_train array: 448600
Val size-3:  7
iteration 13: adding 7 samples from holdout to train
Printing X_train after adding from holdout (8979, 50)
Printing y_train after adding from holdout (8979, 50)
Printing doc_len_train after adding from holdout (8979

2022-04-28 18:23:44 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_13.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_13.ckpt


2022-04-28 18:23:44 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_13.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3195
Before M1-M2 iteration, X_train array: 448950
Val size 1:  15
For M1-M2 combination, iteration 14: adding 15 samples from holdout to train
Printing X_train after adding from holdout (8994, 50)
Printing y_train after adding from holdout (8994, 50)
Printing doc_len_train after adding from holdout (8994,)
Printing inverted val size:  3180
Printing X_holdout after removing from holdout (3180, 50)
Printing doc_len_holdout after removing from holdout (3180,)
After M1-M2 iteration, doc_len_holdout array: 3180
After M1-M2 iteration, X_train array: 449700
Entered BiGRU-CRF. Training network on 8994 documents, validation on 400 documents

training time: 12.05
epoch 1 training f1: 0.0093

epoch 1 validation f1: 0.0000

training time: 11.16
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 11.21
epoch 3 training f1: 0.0016

epoch 3 validation f1: 0.0044

training time: 11.26
epoch 4 training f1: 0.0087

epoch 4 val

2022-04-28 18:51:47 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_13.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3180
Before M1-M3 iteration, X_train array: 449700
Val size 2:  30
iteration 14: adding 30 samples from holdout to train
Printing X_train after adding from holdout (9024, 50)
Printing y_train after adding from holdout (9024, 50)
Printing doc_len_train after adding from holdout (9024,)
Printing inverted val size:  3150
Printing X_holdout after adding from holdout (3150, 50)
Printing doc_len_holdout after adding from holdout (3150,)
After M1-M3 iteration, doc_len_holdout array: 3150
After M1-M3 iteration, X_train array: 451200

y_pred_gru:  (3150, 50)
scores gru:  (3150,)

y_pred_rnn:  (3150, 50)
scores_rnn:  (3150,)
Before M2-M3 iteration, doc_len_holdout array: 3150
Before M2-M3 iteration, X_train array: 451200
Val size-3:  7
iteration 14: adding 7 samples from holdout to train
Printing X_train after adding from holdout (9031, 50)
Printing y_train after adding from holdout (9031, 50)
Printing doc_len_train after adding from holdout (9031

2022-04-28 19:29:16 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_14.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_14.ckpt


2022-04-28 19:29:16 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_14.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3143
Before M1-M2 iteration, X_train array: 451550
Val size 1:  2
For M1-M2 combination, iteration 15: adding 2 samples from holdout to train
Printing X_train after adding from holdout (9033, 50)
Printing y_train after adding from holdout (9033, 50)
Printing doc_len_train after adding from holdout (9033,)
Printing inverted val size:  3141
Printing X_holdout after removing from holdout (3141, 50)
Printing doc_len_holdout after removing from holdout (3141,)
After M1-M2 iteration, doc_len_holdout array: 3141
After M1-M2 iteration, X_train array: 451650
Entered BiGRU-CRF. Training network on 9033 documents, validation on 400 documents

training time: 11.82
epoch 1 training f1: 0.0072

epoch 1 validation f1: 0.0035

training time: 11.21
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 11.13
epoch 3 training f1: 0.0006

epoch 3 validation f1: 0.0000

training time: 11.22
epoch 4 training f1: 0.0063

epoch 4 valid

2022-04-28 19:54:35 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_14.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3141
Before M1-M3 iteration, X_train array: 451650
Val size 2:  33
iteration 15: adding 33 samples from holdout to train
Printing X_train after adding from holdout (9066, 50)
Printing y_train after adding from holdout (9066, 50)
Printing doc_len_train after adding from holdout (9066,)
Printing inverted val size:  3108
Printing X_holdout after adding from holdout (3108, 50)
Printing doc_len_holdout after adding from holdout (3108,)
After M1-M3 iteration, doc_len_holdout array: 3108
After M1-M3 iteration, X_train array: 453300

y_pred_gru:  (3108, 50)
scores gru:  (3108,)

y_pred_rnn:  (3108, 50)
scores_rnn:  (3108,)
Before M2-M3 iteration, doc_len_holdout array: 3108
Before M2-M3 iteration, X_train array: 453300
Val size-3:  2
iteration 15: adding 2 samples from holdout to train
Printing X_train after adding from holdout (9068, 50)
Printing y_train after adding from holdout (9068, 50)
Printing doc_len_train after adding from holdout (9068

2022-04-28 20:23:30 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_15.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_15.ckpt


2022-04-28 20:23:31 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_15.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3106
Before M1-M2 iteration, X_train array: 453400
Val size 1:  9
For M1-M2 combination, iteration 16: adding 9 samples from holdout to train
Printing X_train after adding from holdout (9077, 50)
Printing y_train after adding from holdout (9077, 50)
Printing doc_len_train after adding from holdout (9077,)
Printing inverted val size:  3097
Printing X_holdout after removing from holdout (3097, 50)
Printing doc_len_holdout after removing from holdout (3097,)
After M1-M2 iteration, doc_len_holdout array: 3097
After M1-M2 iteration, X_train array: 453850
Entered BiGRU-CRF. Training network on 9077 documents, validation on 400 documents

training time: 11.92
epoch 1 training f1: 0.0006

epoch 1 validation f1: 0.0000

training time: 11.08
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 11.27
epoch 3 training f1: 0.0017

epoch 3 validation f1: 0.0044

training time: 11.19
epoch 4 training f1: 0.0047

epoch 4 valid

2022-04-28 20:55:59 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_15.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3097
Before M1-M3 iteration, X_train array: 453850
Val size 2:  15
iteration 16: adding 15 samples from holdout to train
Printing X_train after adding from holdout (9092, 50)
Printing y_train after adding from holdout (9092, 50)
Printing doc_len_train after adding from holdout (9092,)
Printing inverted val size:  3082
Printing X_holdout after adding from holdout (3082, 50)
Printing doc_len_holdout after adding from holdout (3082,)
After M1-M3 iteration, doc_len_holdout array: 3082
After M1-M3 iteration, X_train array: 454600

y_pred_gru:  (3082, 50)
scores gru:  (3082,)

y_pred_rnn:  (3082, 50)
scores_rnn:  (3082,)
Before M2-M3 iteration, doc_len_holdout array: 3082
Before M2-M3 iteration, X_train array: 454600
Val size-3:  28
iteration 16: adding 28 samples from holdout to train
Printing X_train after adding from holdout (9120, 50)
Printing y_train after adding from holdout (9120, 50)
Printing doc_len_train after adding from holdout (91

2022-04-28 21:30:14 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_16.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_16.ckpt


2022-04-28 21:30:14 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_16.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3054
Before M1-M2 iteration, X_train array: 456000
Val size 1:  2
For M1-M2 combination, iteration 17: adding 2 samples from holdout to train
Printing X_train after adding from holdout (9122, 50)
Printing y_train after adding from holdout (9122, 50)
Printing doc_len_train after adding from holdout (9122,)
Printing inverted val size:  3052
Printing X_holdout after removing from holdout (3052, 50)
Printing doc_len_holdout after removing from holdout (3052,)
After M1-M2 iteration, doc_len_holdout array: 3052
After M1-M2 iteration, X_train array: 456100
Entered BiGRU-CRF. Training network on 9122 documents, validation on 400 documents

training time: 12.10
epoch 1 training f1: 0.0071

epoch 1 validation f1: 0.0181

training time: 11.42
epoch 2 training f1: 0.0038

epoch 2 validation f1: 0.0000

training time: 11.25
epoch 3 training f1: 0.0040

epoch 3 validation f1: 0.0000

training time: 11.62
epoch 4 training f1: 0.0080

epoch 4 valid

2022-04-28 21:54:01 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_16.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3052
Before M1-M3 iteration, X_train array: 456100
Val size 2:  23
iteration 17: adding 23 samples from holdout to train
Printing X_train after adding from holdout (9145, 50)
Printing y_train after adding from holdout (9145, 50)
Printing doc_len_train after adding from holdout (9145,)
Printing inverted val size:  3029
Printing X_holdout after adding from holdout (3029, 50)
Printing doc_len_holdout after adding from holdout (3029,)
After M1-M3 iteration, doc_len_holdout array: 3029
After M1-M3 iteration, X_train array: 457250

y_pred_gru:  (3029, 50)
scores gru:  (3029,)

y_pred_rnn:  (3029, 50)
scores_rnn:  (3029,)
Before M2-M3 iteration, doc_len_holdout array: 3029
Before M2-M3 iteration, X_train array: 457250
Val size-3:  1
iteration 17: adding 1 samples from holdout to train
Printing X_train after adding from holdout (9146, 50)
Printing y_train after adding from holdout (9146, 50)
Printing doc_len_train after adding from holdout (9146

2022-04-28 22:23:15 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_17.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_17.ckpt


2022-04-28 22:23:16 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_17.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3028
Before M1-M2 iteration, X_train array: 457300
Val size 1:  1
For M1-M2 combination, iteration 18: adding 1 samples from holdout to train
Printing X_train after adding from holdout (9147, 50)
Printing y_train after adding from holdout (9147, 50)
Printing doc_len_train after adding from holdout (9147,)
Printing inverted val size:  3027
Printing X_holdout after removing from holdout (3027, 50)
Printing doc_len_holdout after removing from holdout (3027,)
After M1-M2 iteration, doc_len_holdout array: 3027
After M1-M2 iteration, X_train array: 457350
Entered BiGRU-CRF. Training network on 9147 documents, validation on 400 documents

training time: 11.99
epoch 1 training f1: 0.0021

epoch 1 validation f1: 0.0000

training time: 11.47
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 11.60
epoch 3 training f1: 0.0008

epoch 3 validation f1: 0.0044

training time: 11.62
epoch 4 training f1: 0.0049

epoch 4 valid

2022-04-28 22:41:36 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_17.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3027
Before M1-M3 iteration, X_train array: 457350
Val size 2:  14
iteration 18: adding 14 samples from holdout to train
Printing X_train after adding from holdout (9161, 50)
Printing y_train after adding from holdout (9161, 50)
Printing doc_len_train after adding from holdout (9161,)
Printing inverted val size:  3013
Printing X_holdout after adding from holdout (3013, 50)
Printing doc_len_holdout after adding from holdout (3013,)
After M1-M3 iteration, doc_len_holdout array: 3013
After M1-M3 iteration, X_train array: 458050

y_pred_gru:  (3013, 50)
scores gru:  (3013,)

y_pred_rnn:  (3013, 50)
scores_rnn:  (3013,)
Before M2-M3 iteration, doc_len_holdout array: 3013
Before M2-M3 iteration, X_train array: 458050
Val size-3:  3
iteration 18: adding 3 samples from holdout to train
Printing X_train after adding from holdout (9164, 50)
Printing y_train after adding from holdout (9164, 50)
Printing doc_len_train after adding from holdout (9164

2022-04-28 23:22:49 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_18.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_18.ckpt


2022-04-28 23:22:49 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_18.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 3010
Before M1-M2 iteration, X_train array: 458200
Val size 1:  9
For M1-M2 combination, iteration 19: adding 9 samples from holdout to train
Printing X_train after adding from holdout (9173, 50)
Printing y_train after adding from holdout (9173, 50)
Printing doc_len_train after adding from holdout (9173,)
Printing inverted val size:  3001
Printing X_holdout after removing from holdout (3001, 50)
Printing doc_len_holdout after removing from holdout (3001,)
After M1-M2 iteration, doc_len_holdout array: 3001
After M1-M2 iteration, X_train array: 458650
Entered BiGRU-CRF. Training network on 9173 documents, validation on 400 documents

training time: 12.08
epoch 1 training f1: 0.0100

epoch 1 validation f1: 0.0000

training time: 11.75
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 11.38
epoch 3 training f1: 0.0057

epoch 3 validation f1: 0.0044

training time: 11.41
epoch 4 training f1: 0.0161

epoch 4 valid

2022-04-29 00:00:23 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_18.ckpt




Before M1-M3 iteration, doc_len_holdout array: 3001
Before M1-M3 iteration, X_train array: 458650
Val size 2:  93
iteration 19: adding 93 samples from holdout to train
Printing X_train after adding from holdout (9266, 50)
Printing y_train after adding from holdout (9266, 50)
Printing doc_len_train after adding from holdout (9266,)
Printing inverted val size:  2908
Printing X_holdout after adding from holdout (2908, 50)
Printing doc_len_holdout after adding from holdout (2908,)
After M1-M3 iteration, doc_len_holdout array: 2908
After M1-M3 iteration, X_train array: 463300

y_pred_gru:  (2908, 50)
scores gru:  (2908,)

y_pred_rnn:  (2908, 50)
scores_rnn:  (2908,)
Before M2-M3 iteration, doc_len_holdout array: 2908
Before M2-M3 iteration, X_train array: 463300
Val size-3:  1
iteration 19: adding 1 samples from holdout to train
Printing X_train after adding from holdout (9267, 50)
Printing y_train after adding from holdout (9267, 50)
Printing doc_len_train after adding from holdout (9267

2022-04-29 00:50:06 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_19.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_19.ckpt


2022-04-29 00:50:06 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_19.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 2907
Before M1-M2 iteration, X_train array: 463350
Val size 1:  8
For M1-M2 combination, iteration 20: adding 8 samples from holdout to train
Printing X_train after adding from holdout (9275, 50)
Printing y_train after adding from holdout (9275, 50)
Printing doc_len_train after adding from holdout (9275,)
Printing inverted val size:  2899
Printing X_holdout after removing from holdout (2899, 50)
Printing doc_len_holdout after removing from holdout (2899,)
After M1-M2 iteration, doc_len_holdout array: 2899
After M1-M2 iteration, X_train array: 463750
Entered BiGRU-CRF. Training network on 9275 documents, validation on 400 documents

training time: 12.20
epoch 1 training f1: 0.0041

epoch 1 validation f1: 0.0000

training time: 11.66
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 11.79
epoch 3 training f1: 0.0000

epoch 3 validation f1: 0.0044

training time: 11.77
epoch 4 training f1: 0.0016

epoch 4 valid

2022-04-29 01:18:39 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_19.ckpt




Before M1-M3 iteration, doc_len_holdout array: 2899
Before M1-M3 iteration, X_train array: 463750
Val size 2:  36
iteration 20: adding 36 samples from holdout to train
Printing X_train after adding from holdout (9311, 50)
Printing y_train after adding from holdout (9311, 50)
Printing doc_len_train after adding from holdout (9311,)
Printing inverted val size:  2863
Printing X_holdout after adding from holdout (2863, 50)
Printing doc_len_holdout after adding from holdout (2863,)
After M1-M3 iteration, doc_len_holdout array: 2863
After M1-M3 iteration, X_train array: 465550

y_pred_gru:  (2863, 50)
scores gru:  (2863,)

y_pred_rnn:  (2863, 50)
scores_rnn:  (2863,)
Before M2-M3 iteration, doc_len_holdout array: 2863
Before M2-M3 iteration, X_train array: 465550
Val size-3:  8
iteration 20: adding 8 samples from holdout to train
Printing X_train after adding from holdout (9319, 50)
Printing y_train after adding from holdout (9319, 50)
Printing doc_len_train after adding from holdout (9319

2022-04-29 01:59:15 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_20.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_20.ckpt


2022-04-29 01:59:15 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_20.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 2855
Before M1-M2 iteration, X_train array: 465950
Val size 1:  11
For M1-M2 combination, iteration 21: adding 11 samples from holdout to train
Printing X_train after adding from holdout (9330, 50)
Printing y_train after adding from holdout (9330, 50)
Printing doc_len_train after adding from holdout (9330,)
Printing inverted val size:  2844
Printing X_holdout after removing from holdout (2844, 50)
Printing doc_len_holdout after removing from holdout (2844,)
After M1-M2 iteration, doc_len_holdout array: 2844
After M1-M2 iteration, X_train array: 466500
Entered BiGRU-CRF. Training network on 9330 documents, validation on 400 documents

training time: 14.41
epoch 1 training f1: 0.0000

epoch 1 validation f1: 0.0000

training time: 13.75
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 12.49
epoch 3 training f1: 0.0046

epoch 3 validation f1: 0.0084

training time: 12.30
epoch 4 training f1: 0.0098

epoch 4 val

2022-04-29 02:20:21 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_20.ckpt




Before M1-M3 iteration, doc_len_holdout array: 2844
Before M1-M3 iteration, X_train array: 466500
Val size 2:  17
iteration 21: adding 17 samples from holdout to train
Printing X_train after adding from holdout (9347, 50)
Printing y_train after adding from holdout (9347, 50)
Printing doc_len_train after adding from holdout (9347,)
Printing inverted val size:  2827
Printing X_holdout after adding from holdout (2827, 50)
Printing doc_len_holdout after adding from holdout (2827,)
After M1-M3 iteration, doc_len_holdout array: 2827
After M1-M3 iteration, X_train array: 467350

y_pred_gru:  (2827, 50)
scores gru:  (2827,)

y_pred_rnn:  (2827, 50)
scores_rnn:  (2827,)
Before M2-M3 iteration, doc_len_holdout array: 2827
Before M2-M3 iteration, X_train array: 467350
Val size-3:  7
iteration 21: adding 7 samples from holdout to train
Printing X_train after adding from holdout (9354, 50)
Printing y_train after adding from holdout (9354, 50)
Printing doc_len_train after adding from holdout (9354

2022-04-29 03:04:01 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_21.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_21.ckpt


2022-04-29 03:04:01 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_21.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 2820
Before M1-M2 iteration, X_train array: 467700
Val size 1:  11
For M1-M2 combination, iteration 22: adding 11 samples from holdout to train
Printing X_train after adding from holdout (9365, 50)
Printing y_train after adding from holdout (9365, 50)
Printing doc_len_train after adding from holdout (9365,)
Printing inverted val size:  2809
Printing X_holdout after removing from holdout (2809, 50)
Printing doc_len_holdout after removing from holdout (2809,)
After M1-M2 iteration, doc_len_holdout array: 2809
After M1-M2 iteration, X_train array: 468250
Entered BiGRU-CRF. Training network on 9365 documents, validation on 400 documents

training time: 12.33
epoch 1 training f1: 0.0090

epoch 1 validation f1: 0.0000

training time: 12.00
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 12.01
epoch 3 training f1: 0.0023

epoch 3 validation f1: 0.0086

training time: 12.05
epoch 4 training f1: 0.0112

epoch 4 val

2022-04-29 03:27:48 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_21.ckpt




Before M1-M3 iteration, doc_len_holdout array: 2809
Before M1-M3 iteration, X_train array: 468250
Val size 2:  9
iteration 22: adding 9 samples from holdout to train
Printing X_train after adding from holdout (9374, 50)
Printing y_train after adding from holdout (9374, 50)
Printing doc_len_train after adding from holdout (9374,)
Printing inverted val size:  2800
Printing X_holdout after adding from holdout (2800, 50)
Printing doc_len_holdout after adding from holdout (2800,)
After M1-M3 iteration, doc_len_holdout array: 2800
After M1-M3 iteration, X_train array: 468700

y_pred_gru:  (2800, 50)
scores gru:  (2800,)

y_pred_rnn:  (2800, 50)
scores_rnn:  (2800,)
Before M2-M3 iteration, doc_len_holdout array: 2800
Before M2-M3 iteration, X_train array: 468700
Val size-3:  6
iteration 22: adding 6 samples from holdout to train
Printing X_train after adding from holdout (9380, 50)
Printing y_train after adding from holdout (9380, 50)
Printing doc_len_train after adding from holdout (9380,)

2022-04-29 04:05:31 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_22.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_22.ckpt


2022-04-29 04:05:31 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_22.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 2794
Before M1-M2 iteration, X_train array: 469000
Val size 1:  3
For M1-M2 combination, iteration 23: adding 3 samples from holdout to train
Printing X_train after adding from holdout (9383, 50)
Printing y_train after adding from holdout (9383, 50)
Printing doc_len_train after adding from holdout (9383,)
Printing inverted val size:  2791
Printing X_holdout after removing from holdout (2791, 50)
Printing doc_len_holdout after removing from holdout (2791,)
After M1-M2 iteration, doc_len_holdout array: 2791
After M1-M2 iteration, X_train array: 469150
Entered BiGRU-CRF. Training network on 9383 documents, validation on 400 documents

training time: 12.41
epoch 1 training f1: 0.0080

epoch 1 validation f1: 0.0000

training time: 12.06
epoch 2 training f1: 0.0000

epoch 2 validation f1: 0.0000

training time: 11.82
epoch 3 training f1: 0.0000

epoch 3 validation f1: 0.0044

training time: 11.92
epoch 4 training f1: 0.0062

epoch 4 valid

2022-04-29 04:28:37 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_22.ckpt




Before M1-M3 iteration, doc_len_holdout array: 2791
Before M1-M3 iteration, X_train array: 469150
Val size 2:  21
iteration 23: adding 21 samples from holdout to train
Printing X_train after adding from holdout (9404, 50)
Printing y_train after adding from holdout (9404, 50)
Printing doc_len_train after adding from holdout (9404,)
Printing inverted val size:  2770
Printing X_holdout after adding from holdout (2770, 50)
Printing doc_len_holdout after adding from holdout (2770,)
After M1-M3 iteration, doc_len_holdout array: 2770
After M1-M3 iteration, X_train array: 470200

y_pred_gru:  (2770, 50)
scores gru:  (2770,)

y_pred_rnn:  (2770, 50)
scores_rnn:  (2770,)
Before M2-M3 iteration, doc_len_holdout array: 2770
Before M2-M3 iteration, X_train array: 470200
Val size-3:  1
iteration 23: adding 1 samples from holdout to train
Printing X_train after adding from holdout (9405, 50)
Printing y_train after adding from holdout (9405, 50)
Printing doc_len_train after adding from holdout (9405

2022-04-29 05:10:47 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_lstm_crf_1600_23.ckpt


INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_23.ckpt


2022-04-29 05:10:48 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_rnn_crf_1600_23.ckpt




Before M1-M2 iteration, doc_len_holdout array size: 2769
Before M1-M2 iteration, X_train array: 470250
Val size 1:  4
For M1-M2 combination, iteration 24: adding 4 samples from holdout to train
Printing X_train after adding from holdout (9409, 50)
Printing y_train after adding from holdout (9409, 50)
Printing doc_len_train after adding from holdout (9409,)
Printing inverted val size:  2765
Printing X_holdout after removing from holdout (2765, 50)
Printing doc_len_holdout after removing from holdout (2765,)
After M1-M2 iteration, doc_len_holdout array: 2765
After M1-M2 iteration, X_train array: 470450
Entered BiGRU-CRF. Training network on 9409 documents, validation on 400 documents

training time: 12.74
epoch 1 training f1: 0.0079

epoch 1 validation f1: 0.0057

training time: 11.93
epoch 2 training f1: 0.0023

epoch 2 validation f1: 0.0000

training time: 12.08
epoch 3 training f1: 0.0036

epoch 3 validation f1: 0.0000

training time: 11.79
epoch 4 training f1: 0.0106

epoch 4 valid

2022-04-29 05:28:59 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/SavedModels/BC2GM_tt_gru_crf_1600_23.ckpt




Before M1-M3 iteration, doc_len_holdout array: 2765
Before M1-M3 iteration, X_train array: 470450
Val size 2:  4
iteration 24: adding 4 samples from holdout to train
Printing X_train after adding from holdout (9413, 50)
Printing y_train after adding from holdout (9413, 50)
Printing doc_len_train after adding from holdout (9413,)
Printing inverted val size:  2761
Printing X_holdout after adding from holdout (2761, 50)
Printing doc_len_holdout after adding from holdout (2761,)
After M1-M3 iteration, doc_len_holdout array: 2761
After M1-M3 iteration, X_train array: 470650

y_pred_gru:  (2761, 50)
scores gru:  (2761,)

y_pred_rnn:  (2761, 50)
scores_rnn:  (2761,)
Before M2-M3 iteration, doc_len_holdout array: 2761
Before M2-M3 iteration, X_train array: 470650
Val size-3:  2
iteration 24: adding 2 samples from holdout to train
Printing X_train after adding from holdout (9415, 50)
Printing y_train after adding from holdout (9415, 50)
Printing doc_len_train after adding from holdout (9415,)

Finetuning the pretrained model on obtained training data

In [None]:
batch_size_as = 128
patience_as = 15

In [None]:
for dataset in datasets:

    # iterate over each data size setting
    for data_size in data_sizes:

        print('Training in the dataset loop',dataset,data_size)
        
        # load training data
        X_train = np.load("/content/drive/MyDrive/Pubmed/Asymmetric/%s_X_train_%i.npy" % (dataset,data_size)).astype(np.int16)
        y_train = np.load("/content/drive/MyDrive/Pubmed/Asymmetric/%s_y_train_%i.npy" % (dataset,data_size)).astype(np.int16)
        max_len = 50
        num_classes = np.max(y_train) + 1
      
        with open('/content/drive/MyDrive/Pubmed/Asymmetric/%s_senlens_%i.pkl' % (dataset,data_size),'rb') as f:
            doc_len_train = pickle.load(f)
        doc_len_train = [l if l <=max_len else max_len for l in doc_len_train]

        # load training data
        X_val = np.load("/content/drive/MyDrive/Pubmed/Asymmetric/%s_X_val_%i.npy" % (dataset,data_size)).astype(np.int16)
        y_val = np.load("/content/drive/MyDrive/Pubmed/Asymmetric/%s_y_val_%i.npy" % (dataset,data_size)).astype(np.int16)
       
        with open('/content/drive/MyDrive/Pubmed/Asymmetric/%s_senlens_val_%i.pkl' % (dataset,data_size),'rb') as f:
            doc_len_val = pickle.load(f)
        doc_len_val = [l if l <=max_len else max_len for l in doc_len_val]
        
        # load conversion dictionaries
        label2idx = {'O':0, 'B':1, 'I': 2}
        idx2label = {0:'O',1:'B-ENT',2:'I-ENT'}
            
        # load test data
        X_test = np.load('/content/drive/MyDrive/Pubmed/%s_X_test.npy' % dataset).astype(np.int16)
        y_test = np.load('/content/drive/MyDrive/Pubmed/%s_y_test.npy' % dataset).astype(np.int16)
        with open('/content/drive/MyDrive/Pubmed/%s_senlens_test.pkl' % dataset,'rb') as f:
            doc_len_test = pickle.load(f)
        doc_len_test = [l if l <=max_len else max_len for l in doc_len_test]
        y_true = []
        for y_true_,l in zip(y_test,doc_len_test):
            y = [idx2label[l] for l in y_true_[:l]]
            y_true.append(y)
        y_true_val = []
        for y_true_,l in zip(y_val,doc_len_val):
            y = [idx2label[l] for l in y_true_[:l]]
            y_true_val.append(y)
        
        tf.reset_default_graph()
        model = lstm_crf(vocab,num_classes,max_len)
        
        # Here load the pretrained model and train it on the new training set obtained after semi-supervised learning
        model.load('/content/drive/MyDrive/Pubmed/PretrainModel/lstm_crf_%s.ckpt' % pretrain_dataset)

        model.train(X_train,y_train,doc_len_train,
                    batch_size=batch_size_as,patience=patience_as,
                    validation_data=(X_val,y_val,doc_len_val),
                    savebest=True,filepath='/content/drive/MyDrive/Pubmed/FinetuneAsymmetric/%s_finetune_%s_%i.ckpt' % (dataset,pretrain_dataset,data_size))
        
        # Finally evaluate on test set
        model.load('/content/drive/MyDrive/Pubmed/FinetuneAsymmetric/%s_finetune_%s_%i.ckpt' % (dataset,pretrain_dataset,data_size))
        y_preds_,_,_ = model.predict(X_test,doc_len_test)
        y_pred = []
        for y_pred_,l in zip(y_preds_,doc_len_test):
            y = [idx2label[l] for l in y_pred_[:l]]
            y_pred.append(y)
        
        evaluator = Evaluator(y_true, y_pred, tags=['ENT'], loader="list")
        results, results_by_tag = evaluator.evaluate()
        
        exact_p = results['exact']['precision']
        exact_r = results['exact']['recall']
        exact_f = results['exact']['f1']
        partial_p = results['partial']['precision']
        partial_r = results['partial']['recall']
        partial_f = results['partial']['f1']
    
        print(dataset, data_size)
        print('exact p: %.4f' % exact_p)
        print('exact r: %.4f' % exact_r)
        print('exact f: %.4f' % exact_f)
        print('partial p: %.4f' % partial_p)
        print('partial r: %.4f' % partial_r)
        print('partial f: %.4f' % partial_f)
        print()



Training in the dataset loop s800 3000
INFO:tensorflow:Restoring parameters from /content/drive/MyDrive/Pubmed/PretrainModel/lstm_crf_medmentions.ckpt


2022-05-01 10:50:45 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/PretrainModel/lstm_crf_medmentions.ckpt


training network on 4684 documents, validation on 600 documents

training time: 11.66
epoch 1 training f1: 0.0260

epoch 1 validation f1: 0.0336

training time: 10.95
epoch 2 training f1: 0.0479

epoch 2 validation f1: 0.1163

training time: 10.84
epoch 3 training f1: 0.0969

epoch 3 validation f1: 0.1390

training time: 10.84
epoch 4 training f1: 0.1262

epoch 4 validation f1: 0.1603

training time: 10.74
epoch 5 training f1: 0.1345

epoch 5 validation f1: 0.1993

training time: 10.88
epoch 6 training f1: 0.1503

epoch 6 validation f1: 0.1955

training time: 11.13
epoch 7 training f1: 0.1676

epoch 7 validation f1: 0.2109

training time: 10.96
epoch 8 training f1: 0.1818

epoch 8 validation f1: 0.2132

training time: 10.96
epoch 9 training f1: 0.1975

epoch 9 validation f1: 0.2340

training time: 11.14
epoch 10 training f1: 0.2159

epoch 10 validation f1: 0.2612

training time: 10.99
epoch 11 training f1: 0.2469

epoch 11 validation f1: 0.2609

training time: 10.80
epoch 12 training f

2022-05-01 10:59:51 tensorflow INFO: Restoring parameters from /content/drive/MyDrive/Pubmed/FinetuneAsymmetric/s800_finetune_medmentions_3000.ckpt



s800 3000
exact p: 0.4538
exact r: 0.2227
exact f: 0.2987
partial p: 0.6005
partial r: 0.2947
partial f: 0.3953

