In [None]:
!pip install tensorflow_text
import tensorflow_text as text
import pandas as pd
import numpy as np
import keras
import tensorflow as tf
from keras.layers import *
import tensorflow_hub as hub
from keras.models import Model
from sklearn.model_selection import train_test_split

In [3]:
train = pd.read_csv('TRAIN.csv')
test = pd.read_csv('TEST.csv')
train = train.astype(str)
test = test.astype(str)

train = train.drop(['index', 'Unnamed: 0'], axis=1)
test = test.drop(['index', 'Unnamed: 0'], axis=1)

In [4]:
X_train_description = train['Description'].to_numpy()
x_description = np.asarray(X_train_description).astype(str)

X_train_patient_dialogue = train['Patient dialogue'].to_numpy()
x_train_patient_dialogue = np.asarray(X_train_patient_dialogue).astype(str)

X_train_doctor_dialogue = train['Doctor dialogue'].to_numpy()
X_train_doctor_dialogue = np.asarray(X_train_doctor_dialogue).astype(str)


X_test_description = test['Description'].to_numpy()
x_test_description = np.asarray(X_test_description).astype(str)

X_test_patient_dialogue = test['Patient dialogue'].to_numpy()
x_test_patient_dialogue = np.asarray(X_test_patient_dialogue).astype(str)

X_test_doctor_dialogue = test['Doctor dialogue'].to_numpy()
X_test_doctor_dialogue = np.asarray(X_test_doctor_dialogue).astype(str)

In [5]:
lst = list(train['Tag 1'].unique())

In [6]:
y_train = pd.get_dummies(train['Tag 1'])
y_test = pd.get_dummies(test['Tag 1'])

y_train_ = np.asarray(y_train.values).astype('float32')
y_test_ = np.asarray(y_test.values).astype('float32')

In [7]:
preprocessor = hub.load("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
text_inputs = [tf.keras.layers.Input(shape=(), dtype=tf.string)]
tokenize = hub.KerasLayer(preprocessor.tokenize)
tokenized_inputs = [tokenize(segment) for segment in text_inputs]
seq_length = 512
bert_pack_inputs = hub.KerasLayer(preprocessor.bert_pack_inputs, arguments=dict(seq_length=seq_length))
encoder_inputs = bert_pack_inputs(tokenized_inputs)
encoder = hub.KerasLayer("https://tfhub.dev/google/experts/bert/pubmed/2")
outputs = encoder(encoder_inputs)
pooled_output = outputs["sequence_output"]
embedding_model = tf.keras.Model(text_inputs, pooled_output)

In [8]:
from keras.layers import Layer
import keras.backend as K

# Add attention layer to the deep learning network
class attention(Layer):
    def __init__(self,**kwargs):
        super(attention,self).__init__(**kwargs)

    def build(self,input_shape):
        self.W=self.add_weight(name='attention_weight', shape=(input_shape[-1],1), 
                               initializer='random_normal', trainable=True)
        self.b=self.add_weight(name='attention_bias', shape=(input_shape[1],1), 
                               initializer='zeros', trainable=True)        
        super(attention, self).build(input_shape)

    def call(self,x):
        # Alignment scores. Pass them through tanh function
        e = K.tanh(K.dot(x,self.W)+self.b)
        # Remove dimension of size 1
        e = K.squeeze(e, axis=-1)   
        # Compute the weights
        alpha = K.softmax(e)
        # Reshape to tensorFlow format
        alpha = K.expand_dims(alpha, axis=-1)
        # Compute the context vector
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

In [None]:
text_input_description = Input(shape=(),dtype=tf.string,name='Description')
text_input_patient_dialogue = Input(shape=(),dtype=tf.string, name='Patient dialogue')
text_input_doctor_dialogue = Input(shape=(), dtype=tf.string, name='Doctor dialogue')

encode_output_description = embedding_model(text_input_description)
encode_output_patient_dialogue = embedding_model(text_input_patient_dialogue)
encoded_output_doctor_dialogue = embedding_model(text_input_doctor_dialogue)
'''
bilstm_description = Bidirectional(CuDNNLSTM(units=256,return_sequences=False))(encode_output_description)
bilstm_patient_dialogue = Bidirectional(CuDNNLSTM(units=256,return_sequences=False))(encode_output_patient_dialogue)
bilstm_doctor_dialogue = Bidirectional(CuDNNLSTM(units=256, return_sequences=False))(encoded_output_doctor_dialogue)
'''
concat = Concatenate()([encode_output_description, encode_output_patient_dialogue, encode_output_doctor_dialogue])

#att = attention()(concat)

output = Dense(13, activation='softmax')(concat)
model = Model(inputs=[text_input_description, text_input_patient_dialogue, text_input_doctor_dialogue], 
              outputs=output)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()

In [None]:
tf.keras.utils.plot_model(
    model,
    show_shapes=True,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    layer_range=None,
    show_layer_activations=True,
)

# Training...

In [None]:
checkpoint_filepath = 'model.h5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='accuracy',
    mode='max',
    save_best_only=True)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='accuracy', 
                                               mode='auto', 
                                               patience=5, 
                                               verbose=1)

hist = model.fit([X_train_description, X_train_patient_dialogue, X_doctor_dialogue],
                y_train_, 
                epochs=100, 
                batch_size=4, 
                verbose=1,
                callbacks=[early_stopping,model_checkpoint_callback])

# Loading trained model

In [None]:
model.load_weights('model.h5')

In [None]:
model.evaluate([X_test_description,X_test_patient_dialogue, X_test_doctor_dialogue], y_test_, batch_size=1)

In [None]:
pred = model.predict([X_test_description,X_test_patient_dialogue, X_test_doctor_dialogue])

In [None]:
lst = sorted(list(train['Tag 1'].unique()))

In [None]:
text = []
pred_values=[]
p_dialogue=[]
d_dialogue=[]
y_true=[]
for i in range(len(X_test_description)):
  text.append(X_test_description[i])
  d_dialogue.append(X_test_doctor_dialogue[i])
  pred_values.append(lst[np.argmax(pred[i])])
  y_true.append(lst[np.argmax(y_test_[i])])

output = pd.DataFrame(list(zip(text, y_true, pred_values)),
               columns =['Description', 'True value', 'Predicted value'])

In [None]:
output

In [None]:
y_pred = np.argmax(pred, axis=1)
y_test = np.argmax(y_test_, axis=1)

from sklearn.metrics import f1_score
print("F1 score(macro)",f1_score(y_test, y_pred, average='macro'))
print("F1 score(micro)",f1_score(y_test, y_pred, average='micro'))

# Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
confusion = confusion_matrix(y_test, y_pred)
import matplotlib.pyplot as plt
import seaborn as sns

ax = sns.heatmap(confusion, annot=True, cmap='Blues')
ax.xaxis.set_ticklabels(lst)
ax.yaxis.set_ticklabels(lst)
ax.set_title('Confusion matrix')
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ')

plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.show()