# Evaluation using the BeFree corpus 

### GAD dataset
To obtain a large benchmark of Gene Disease Associations along with associated sentences from literature, we used the corpus generated by BeFree system based on Genetic Association Database (GAD)



#  -------------------------------------------------------------------------------------------

# imports

In [10]:
import tensorflow as tf
from keras.models import load_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras_tqdm import TQDMNotebookCallback
import numpy as np
np.random.seed(1337)
from keras_tqdm import TQDMNotebookCallback
import nltk
import xml.etree.ElementTree as ET
import pandas as pd
import os
import string
from nltk.tokenize import TreebankWordTokenizer
from numpy.random import random_sample
import re
import pickle
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

from keras.layers import Embedding, Flatten,LSTM
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation,  Input, merge,Conv1D,MaxPooling1D,GlobalMaxPooling1D,Convolution1D
from keras import regularizers
from sklearn.metrics import precision_recall_fscore_support
from sklearn.cross_validation import StratifiedKFold
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
from keras.layers import Concatenate, concatenate
from keras import backend as K
from keras.layers import multiply
from keras.layers import merge
from keras.layers.core import *
from keras.layers.recurrent import LSTM
from keras.models import *
random_seed=1337

###  Define Callback functions to generate Measures

In [11]:
from keras import backend as K

def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


# Experiments to reproduce the results of Table 7 

### Evaluation results for multi-class classification

### Load Prerocssed Data

In [19]:
with open('../data/pickles/befree_3class_crawl-300d-2M.pickle', 'rb') as handle:
    gene_id_list = pickle.load(handle)
    gene_symbol_list = pickle.load(handle)
    disease_id_list = pickle.load(handle)    
    X_train = pickle.load(handle)
    distance1_vectors = pickle.load(handle)
    distance2_vectors = pickle.load(handle)
    Y_train = pickle.load(handle)
    word_list = pickle.load(handle)
    word_vectors = pickle.load(handle)
    word_dict = pickle.load(handle)
    distance1_dict = pickle.load(handle)
    distance2_dict = pickle.load(handle)
    label_dict = pickle.load(handle)
    MAX_SEQUENCE_LENGTH = pickle.load(handle)

    

### Create Position Embedding Vectors

In [6]:
import keras
from keras_pos_embd import TrigPosEmbedding

model = keras.models.Sequential()
model.add(TrigPosEmbedding(
    input_shape=(None,),
    output_dim=20,                      # The dimension of embeddings.
    mode=TrigPosEmbedding.MODE_EXPAND,  # Use `expand` mode
    name='Pos-Embd',
))
model.compile('adam', keras.losses.mae, {})
model.summary()

d1_train_embedded=model.predict(distance1_vectors)

d1_train_embedded.shape

d2_train_embedded=model.predict(distance2_vectors)

d2_train_embedded.shape

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Pos-Embd (TrigPosEmbedding)  (None, None, 20)          0         
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________


(5330, 81, 20)

### Prepare Word Embedding Layer

In [9]:
EMBEDDING_DIM=word_vectors.shape[1]
embedding_matrix=word_vectors

def create_embedding_layer(l2_reg=0.01,use_pretrained=True,is_trainable=False):
    
    if use_pretrained:        
        return Embedding(len(word_dict) ,EMBEDDING_DIM,weights=[embedding_matrix],input_length=MAX_SEQUENCE_LENGTH,trainable=is_trainable,embeddings_regularizer=regularizers.l2(l2_reg))
    
    else:    
        return Embedding(len(word_dict) ,EMBEDDING_DIM,input_length=MAX_SEQUENCE_LENGTH)
            
    

### Prepare Attention Mechanism

In [12]:
INPUT_DIM = 2
TIME_STEPS = MAX_SEQUENCE_LENGTH
def attentionNew(inputs):
    inputs = Lambda(lambda x: tf.keras.backend.tanh(x))(inputs)
    input_dim = int(inputs.shape[2])
    a = Permute((2, 1))(inputs)
    a = Dense(TIME_STEPS, activation='softmax')(a)
    a_probs = Permute((2, 1))(a)
    output_attention_mul = multiply([inputs, a_probs])
    output_attention_mul = Lambda(lambda x: tf.keras.backend.tanh(x))(output_attention_mul)
    return output_attention_mul


### Create the Model

In [16]:
# set parameter for metric calculation, 'macro' for multiclass classification
param='macro' 
def build_model():    
    tf.set_random_seed(1337)
    sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embedding_layer=create_embedding_layer(use_pretrained=True,is_trainable=False)
    embedded_sequences = embedding_layer(sequence_input)

    
    pos_embedd_1=Input(shape=(MAX_SEQUENCE_LENGTH,20), dtype='float32')
    pos_embedd_2=Input(shape=(MAX_SEQUENCE_LENGTH,20), dtype='float32')
    
    embedded_sequences = concatenate([embedded_sequences,pos_embedd_1,pos_embedd_2])
        
    
    x = Conv1D(64, 5, activation='relu')(embedded_sequences)
    x = MaxPooling1D(3)(x)
    x = Dropout(0.1)(x)
    conv_sequence_w5=GlobalMaxPooling1D()(x)    #x = Flatten()(x)


        
    x = Conv1D(64, 4, activation='relu')(embedded_sequences)
    x = MaxPooling1D(3)(x)
    x = Dropout(0.1)(x)
    conv_sequence_w4=GlobalMaxPooling1D()(x)    #x = Flatten()(x)


            
    x = Conv1D(64, 3, activation='relu')(embedded_sequences)
    x = MaxPooling1D(3)(x)
    x = Dropout(0.1)(x)
    conv_sequence_w3=GlobalMaxPooling1D()(x)    #x = Flatten()(x)

    forward = LSTM(100, recurrent_dropout=0.05,return_sequences=True)(embedded_sequences)
    backward = LSTM(100, go_backwards=True,recurrent_dropout=0.05,return_sequences=True)(embedded_sequences)
    attention_forward=attentionNew(forward)
    attention_backward=attentionNew(backward)
    lstm_sequence = concatenate([attention_forward,attention_backward])
   
    lstm_sequence = Flatten()(lstm_sequence)
    merge = concatenate([conv_sequence_w5,conv_sequence_w4,conv_sequence_w3,lstm_sequence])
    x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.1))(merge)
    x = Dropout(0.1)(x)
    preds = Dense(3, activation='softmax')(x)
    model = Model(inputs=[sequence_input, pos_embedd_1,pos_embedd_2],outputs=preds)
    opt=keras.optimizers.Adam(lr=0.0001)
    model.compile(loss='binary_crossentropy',optimizer=opt,metrics=['acc',f1])
    return model
    

### Run the Evaluation using 10 fold Cross Validation  

In [24]:
def train_and_evaluate_model_lstm_position():
    prec_list=[]; reca_list=[]; fscore_list=[] ; fold=0
    skf = StratifiedKFold(n_splits=10, random_state=random_seed)
    Y = [np.argmax(y, axis=None, out=None) for y in Y_train]
    all_histories=[]
    for train_index, test_index in skf.split(X_train,Y):     
        fold+=1
        x_train, X_test = X_train[train_index], X_train[test_index] 
        pos_train1, pos_test1 = d1_train_embedded[train_index], d1_train_embedded[test_index] 
        pos_train2, pos_test2 = d2_train_embedded[train_index], d2_train_embedded[test_index] 
        y_train, y_test = Y_train[train_index], Y_train[test_index]
        model = None # Clearing the NN.
        model = build_model()
        earlystopper = EarlyStopping(monitor='val_loss', patience=3, verbose=1)        
        history=model.fit([x_train,pos_train1,pos_train2], y_train, validation_data=([X_test,pos_test1,pos_test2],y_test) ,epochs=50, batch_size=50,verbose=1,callbacks=[TQDMNotebookCallback(),earlystopper])
        all_histories.append(history)
        predicted = np.argmax(model.predict([X_test,pos_test1,pos_test2]), axis=1)
        y_test_to_label = np.argmax(y_test, axis=1)
        prec, reca, fscore, sup = precision_recall_fscore_support(y_test_to_label, predicted, average=param)
        print("Fold {:d}: Precision:{:.2f}% Recall:{:.2f}% Fscore:{:.2f}% ".format(fold,prec*100, reca*100, fscore*100))
        prec_list.append(prec), reca_list.append(reca) , fscore_list.append(fscore)
    precission=sum(prec_list)/len(prec_list)*100 
    recall=sum(reca_list)/len(reca_list)*100 
    fscore=sum(fscore_list)/len(fscore_list)*100    
    print("Final: Precision:{:.2f}% Recall:{:.2f}% Fscore:{:.2f}% ".format(precission, recall, fscore))
    return all_histories

In [39]:
all_histories=train_and_evaluate_model_lstm_position()

Train on 4796 samples, validate on 534 samples


Epoch 1/50


Epoch 2/50


Epoch 3/50


Epoch 4/50


Epoch 5/50


Epoch 6/50


Epoch 7/50


Epoch 8/50


Epoch 9/50


Epoch 10/50


Epoch 11/50


Epoch 12/50


Epoch 13/50


Epoch 14/50


Epoch 15/50


Epoch 16/50


Epoch 17/50


Epoch 18/50


Epoch 19/50


Epoch 20/50


Epoch 00020: early stopping
Fold 1: Precision:73.08% Recall:76.96% Fscore:74.08% 
Train on 4796 samples, validate on 534 samples


Epoch 1/50


Epoch 2/50


Epoch 3/50


Epoch 4/50


Epoch 5/50


Epoch 6/50


Epoch 7/50


Epoch 8/50


Epoch 9/50


Epoch 10/50


Epoch 11/50


Epoch 12/50


Epoch 13/50


Epoch 14/50


Epoch 15/50


Epoch 16/50


Epoch 17/50


Epoch 18/50


Epoch 19/50


Epoch 20/50


Epoch 21/50


Epoch 22/50


Epoch 23/50


Epoch 24/50


Epoch 25/50


Epoch 26/50


Epoch 00026: early stopping
Fold 2: Precision:73.94% Recall:72.32% Fscore:73.05% 
Train on 4796 samples, validate on 534 samples


Epoch 1/50


Epoch 2/50


Epoch 3/50


Epoch 4/50


Epoch 5/50


Epoch 6/50


Epoch 7/50


Epoch 8/50


Epoch 9/50


Epoch 10/50


Epoch 11/50


Epoch 12/50


Epoch 13/50


Epoch 14/50


Epoch 15/50


Epoch 16/50


Epoch 17/50


Epoch 18/50


Epoch 19/50


Epoch 20/50


Epoch 21/50


Epoch 22/50


Epoch 23/50


Epoch 24/50


Epoch 25/50


Epoch 00025: early stopping
Fold 3: Precision:74.29% Recall:75.00% Fscore:74.44% 
Train on 4796 samples, validate on 534 samples


Epoch 1/50


Epoch 2/50


Epoch 3/50


Epoch 4/50


Epoch 5/50


Epoch 6/50


Epoch 7/50


Epoch 8/50


Epoch 9/50


Epoch 10/50


Epoch 11/50


Epoch 12/50


Epoch 13/50


Epoch 14/50


Epoch 15/50


Epoch 16/50


Epoch 17/50


Epoch 18/50


Epoch 19/50


Epoch 20/50


Epoch 21/50


Epoch 00021: early stopping
Fold 4: Precision:69.09% Recall:70.66% Fscore:68.47% 
Train on 4797 samples, validate on 533 samples


Epoch 1/50


Epoch 2/50


Epoch 3/50


Epoch 4/50


Epoch 5/50


Epoch 6/50


Epoch 7/50


Epoch 8/50


Epoch 9/50


Epoch 10/50


Epoch 11/50


Epoch 12/50


Epoch 13/50


Epoch 14/50


Epoch 15/50


Epoch 16/50


Epoch 17/50


Epoch 18/50


Epoch 19/50


Epoch 00019: early stopping
Fold 5: Precision:68.60% Recall:67.62% Fscore:67.85% 
Train on 4797 samples, validate on 533 samples


Epoch 1/50


Epoch 2/50


Epoch 3/50


Epoch 4/50


Epoch 5/50


Epoch 6/50


Epoch 7/50


Epoch 8/50


Epoch 9/50


Epoch 10/50


Epoch 11/50


Epoch 12/50


Epoch 13/50


Epoch 14/50


Epoch 15/50


Epoch 16/50


Epoch 17/50


Epoch 18/50


Epoch 19/50


Epoch 20/50


Epoch 21/50


Epoch 22/50


Epoch 23/50


Epoch 00023: early stopping
Fold 6: Precision:71.73% Recall:75.43% Fscore:73.01% 
Train on 4797 samples, validate on 533 samples


Epoch 1/50


Epoch 2/50


Epoch 3/50


Epoch 4/50


Epoch 5/50


Epoch 6/50


Epoch 7/50


Epoch 8/50


Epoch 9/50


Epoch 10/50


Epoch 11/50


Epoch 12/50


Epoch 13/50


Epoch 14/50


Epoch 15/50


Epoch 16/50


Epoch 17/50


Epoch 18/50


Epoch 19/50


Epoch 20/50


Epoch 21/50


Epoch 22/50


Epoch 00022: early stopping
Fold 7: Precision:74.72% Recall:73.49% Fscore:74.02% 
Train on 4798 samples, validate on 532 samples


Epoch 1/50


Epoch 2/50


Epoch 3/50


Epoch 4/50


Epoch 5/50


Epoch 6/50


Epoch 7/50


Epoch 8/50


Epoch 9/50


Epoch 10/50


Epoch 11/50


Epoch 12/50


Epoch 13/50


Epoch 14/50


Epoch 15/50


Epoch 16/50


Epoch 17/50


Epoch 18/50


Epoch 19/50


Epoch 20/50


Epoch 21/50


Epoch 00021: early stopping
Fold 8: Precision:67.82% Recall:70.06% Fscore:67.82% 
Train on 4798 samples, validate on 532 samples


Epoch 1/50


Epoch 2/50


Epoch 3/50


Epoch 4/50


Epoch 5/50


Epoch 6/50


Epoch 7/50


Epoch 8/50


Epoch 9/50


Epoch 10/50


Epoch 11/50


Epoch 12/50


Epoch 13/50


Epoch 14/50


Epoch 15/50


Epoch 16/50


Epoch 17/50


Epoch 18/50


Epoch 19/50


Epoch 20/50


Epoch 00020: early stopping
Fold 9: Precision:69.19% Recall:71.34% Fscore:70.08% 
Train on 4799 samples, validate on 531 samples


Epoch 1/50


Epoch 2/50


Epoch 3/50


Epoch 4/50


Epoch 5/50


Epoch 6/50


Epoch 7/50


Epoch 8/50


Epoch 9/50


Epoch 10/50


Epoch 11/50


Epoch 12/50


Epoch 13/50


Epoch 14/50


Epoch 15/50


Epoch 16/50


Epoch 17/50


Epoch 18/50


Epoch 19/50


Epoch 20/50


Epoch 21/50


Epoch 22/50


Epoch 23/50


Epoch 24/50


Epoch 25/50


Epoch 26/50


Epoch 27/50


Epoch 28/50


Epoch 00028: early stopping
Fold 10: Precision:73.75% Recall:73.54% Fscore:73.46% 
Final: Precision:71.62% Recall:72.64% Fscore:71.63% 
