In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import random as rn
import math 
from PIL import Image, ImageDraw 
from PIL import ImagePath
from skimage.transform import resize
from numpy import array
from numpy import asarray
from numpy import zeros
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.applications.densenet import preprocess_input
from keras.preprocessing.text import one_hot
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, Input, Embedding, Conv2D, Concatenate, Flatten, Add, Dropout
from nltk.translate.bleu_score import sentence_bleu
import random
import pickle
import os
from tqdm import tqdm

In [2]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive

Mounted at /content/drive
/content/drive


In [3]:
file = open('/content/drive/My Drive/CS2/features_enc_2048.pickle','rb')
features = pickle.load(file)
file.close()

**Tokenization**

In [15]:
token = Tokenizer()

token.fit_on_texts(features['y_vals'])
vocab_size = len(token.word_index) + 1
#text_to_sequence method will convert report into vector
encoded_findings_train = token.texts_to_sequences(features['y_vals'])
max_length = np.max([len(li) for li in encoded_findings_train])
print(vocab_size,max_length)

1663 167


In [16]:
embeddings_index = dict()
f = open('/content/drive/My Drive/CS2/glove.6B.100d.txt')
for line in f:
	values = line.split()
	word = values[0]
	coefs = asarray(values[1:], dtype='float32')
	embeddings_index[word] = coefs
f.close()

#Weight Matrix we will use this weights in embedding layer
embedding_matrix = zeros((vocab_size, 100))
for word, i in token.word_index.items():
	embedding_vector = embeddings_index.get(word)
	if embedding_vector is not None:
		embedding_matrix[i] = embedding_vector

**Encoder-Decoder**

In [17]:
os.environ['PYTHONHASHSEED'] = '0'

tf.keras.backend.clear_session()

#random seed values to regenerate the model.
np.random.seed(0)
rn.seed(0)

#Image input Layer
input_image = Input(shape=(2048,), name='Image')
dense_image = Dense(256, kernel_initializer=tf.keras.initializers.glorot_uniform, name='dense_image')(input_image)

#Text input layer
input_text = Input(shape=(167,), name='Text')
Embedding_layer = Embedding(input_dim = 1663, output_dim = 100, input_length=167, mask_zero=True, trainable=False, 
                weights=[embedding_matrix], name="Embedding_layer")
emb_layer = Embedding_layer(input_text)

#LSTM layer
lstm_layer = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True,kernel_initializer=tf.keras.initializers.glorot_uniform,recurrent_initializer=tf.keras.initializers.orthogonal,
            bias_initializer=tf.keras.initializers.zeros(), return_sequences=True, name="lstm_layer")(emb_layer)

lstm_layer_1 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True,kernel_initializer=tf.keras.initializers.glorot_uniform,recurrent_initializer=tf.keras.initializers.orthogonal,
            bias_initializer=tf.keras.initializers.zeros(), name="lstm_layer_1")
lstm_layer_1_output = lstm_layer_1(lstm_layer)
#Droput 
dropout_layer = Dropout(0.4, name='dropout_layer')(lstm_layer_1_output)

add_layer =  tf.keras.layers.Add()([dense_image, dropout_layer])

#Fully connected layer
fully_connected = Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.he_normal, name='fully_connected')
fc1_output = fully_connected(add_layer)

dropout_layer_1 = Dropout(0.3, name='dropout_layer_1')(fc1_output)
output_layer = Dense(vocab_size, activation='softmax', name='Output_layer')
output = output_layer(dropout_layer_1)

#Final Model
encoder_decoder = Model(inputs = [input_image, input_text], outputs = output)
encoder_decoder.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Text (InputLayer)               [(None, 167)]        0                                            
__________________________________________________________________________________________________
Embedding_layer (Embedding)     (None, 167, 100)     166300      Text[0][0]                       
__________________________________________________________________________________________________
lstm_layer (LSTM)               (None, 167, 256)     365568      Embedding_layer[0][0]            
__________________________________________________________________________________________________
Image (InputLayer)              [(None, 2048)]       0                                            
______________________________________________________________________________________________

**Loading Model**

In [18]:
encoder_decoder.load_weights('/content/drive/My Drive/CS2/save_model.h5')

**Greedy Search**

In [19]:
def greedySearch(image_vec):
  #In the first time step we provide the start token so that the decoder starts generating the next token.
  report = 'start'
  for index in range(max_length):
    input_token = [token.word_index[word] for word in report.split()]
    input_padded = pad_sequences([input_token], maxlen=max_length)
    prediction = encoder_decoder.predict([image_vec,input_padded], verbose=0)
    #greedily select the word with maximum probability
    #selects the most likely word at each step in the output sequence
    prediction = np.argmax(prediction)
    word = token.index_word[prediction]
    report += ' ' + word
    if word == 'end':
      break
  
  finding = report.split()[1:-1]
  return ' '.join(finding)

**Beam Search**

In [20]:
def beamsearch(img_features, beam_width_param):
  #Converting start word into integer array using tokenizer which is trained on findings
  start_word = [token.word_index['start']]
  #Sequence contains report array and score.  Here 0.0 is a score.
  sequences = [[start_word, 0.0]]
  final_sequence = []

  for index in range(max_length):
    candidate_array = []
    seq_array = []
    #Iterate over each step in sequence
    #At each step, each candidate sequence is expanded with all possible next steps.
    for step in sequences:
      input_sentence = pad_sequences([step[0]], max_length, padding='post')
      predictions = encoder_decoder.predict([img_features,input_sentence],verbose=0)
      beam_width_words = np.argsort(predictions[0])[-beam_width_param:]
      seq, score = step
      #Iterate over top n beam width words 
      #Using Conditional probability(Each candidate step is scored by multiplying the probabilities together)
      for word in beam_width_words:
        #To avoid underflowing the floating point numbers, the natural logarithm of the probabilities are added together
        candidates = [seq + [word], score - np.log(predictions[0][word])]
        candidate_array.append(candidates)
    #sort all candidate sequences in ascending order by their score and select the first k(bean width param) as the most likely candidate sequences            
    sequences = sorted(candidate_array, key = lambda val: val[1])[:beam_width_param]

    count = 0
    #Iterate over expanded sequences in each step, if sequence contains 'end' add it to final array otherwise give that sequnce as a input to next step
    for seq,score in sequences:
      #If report contains final word as end add it to final sequence array.we use this array as our final report. it contains K((bean width param)) no of arrays.
      if seq[len(seq)-1] == token.word_index['end']:
        score = score/len(seq)
        final_sequence.append([seq, score])
        count+=1
      else:
        seq_array.append([seq, score])
    beam_width_param -= count
    
    #Break loop if length of seq_array is zero.here zero length means sequence reaches end.
    if len(seq_array)!=0:
      sequences = seq_array
    else:
      break

  #From final_sequence array we have to pick last index value because it contains actual report.
  sequences = final_sequence[-1] 
  report = sequences[0][1:len(sequences[0])-1]
  
  #Returning report and score
  return ' '.join(token.index_word[word] for word in report), sequences[1]


**Prediction**

**Report - 67**

In [4]:
features['y_test'][67]

'start the heart pulmonary and mediastinum are within normal limits there is no pleural effusion or pneumothora there is no focal air space opacity to suggest a pneumonia there are mild degenerative changes of the spine end'

In [7]:
greedySearch(features['image_test'][67])

'the heart pulmonary and mediastinum are within normal limits is no pleural effusion or pneumothora there is no focal air opacity to suggest a pneumonia of the spine'


In [31]:
beamsearch(features['image_test'][67],3)

('the heart pulmonary and mediastinum are within normal limits there no pleural effusion or pneumothora there no focal air space to suggest pneumonia mild changes of spine',
0.1782400974189031)


In [25]:
beamsearch(features['image_test'][67],5)

('the heart pulmonary and mediastinum are within normal limits there no pleural effusion or pneumothora there no focal air opacity to suggest pneumonia mild changes spine is unremarkable',
0.183694479797972)


**Report-85**

In [None]:
features['y_test'][85]

'start the cardiomediastinal silhouette is normal in size and contour hyperepanded lungs without focal consolidation pneumothora or large pleural effusion right chest wall surgical clips compatible with prior lumpectomy negative for acute bone abnormality end'

In [27]:
greedySearch(features['image_test'][85])

'the cardiomediastinal silhouette is normal size contour hyperepanded lungs focal pneumothora or pleural effusion right chest wall surgical prior lumpectomy'


In [30]:
beamsearch(features['image_test'][85],3)

('the cardiomediastinal silhouette is normal size and contour lungs without focal pneumothora or pleural effusion right chest clips compatible lumpectomy negative acute bone',
0.17056136571959541)


In [None]:
beamsearch(features['image_test'][85],5)

('the cardiomediastinal silhouette is normal size and contour lungs without focal pneumothora or pleural effusion cardio right chest wall clips is lumpectomy negative for without acute abnormality specifically',
0.18776584005461245)


**Report-128**

In [33]:
features['y_test'][128]

'start cardiomediastinal silhouette is within normal limits of size and appearance the pulmonary vascularity is unremarkable there are opacities in the left subsegmental atelectasis or scar otherwise the lungs are epanded and clear of airspace disease negative for pneumothora or pleural effusion limited bone evaluation reveals no acute abnormality end'

In [36]:
greedySearch(features['image_test'][128])

'cardiomediastinal silhouette is normal size and appearance pulmonary vascularity is unremarkable opacities in left subsegmental atelectasis lungs clear for pneumothora or pleural effusion'


In [39]:
beamsearch(features['image_test'][128],3)

('cardiomediastinal silhouette is normal size and appearance the pulmonary vascularity is unremarkable there opacities in left subsegmental atelectasis the lungs are epanded airspace disease pneumothora or pleural effusion no acute abnormality',
0.1796071334771966)


In [42]:
beamsearch(features['image_test'][128],5)

('cardiomediastinal silhouette is within limits of size and appearance pulmonary vascularity is unremarkable there are opacities in left subsegmental atelectasis otherwise lungs are airspace disease for pneumothora or pleural effusion limited bone acute abnormality',
0.2031660586232448)
