In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# %cd /content/drive/MyDrive
# !zip -r VQA_RAD.zip VQA_RAD/

In [None]:
!unzip /content/drive/MyDrive/VQA_RAD.zip -d /content

In [None]:
# !ls /content/data/knowledge_embeddings | wc -l

32761


In [None]:
!unzip /content/drive/MyDrive/capstone5703-cs23-4/Embedding/VQA_RAD_BioBert.zip -d /content/VQA_RAD

In [None]:
!unzip /content/drive/MyDrive/capstone5703-cs23-4/Embedding/VQA_RAD_Bert.zip -d /content/VQA_RAD

In [2]:
%cd /content/drive/MyDrive/VQACode


import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
import pandas as pd
import time
import pathlib
from utils.load_data_overall import RadDataLoader
from utils.evaluation import AnswerEvaluator
from utils.training_toolkit import CustomSchedule, loss_function
from models.Transformer.transformers import VQATransformer
from models.Transformer.masks import create_masks

/content/drive/MyDrive/VQACode


### Set up Arguments

In [3]:
num_layers=2
d_model=512
num_heads=8
dff=2048
maximum_position_encoding=10000
EPOCHS = 50
batch_size = 64
cnn_type = 'resnet'
embedding = 'bert'  # choose from ['w2v', 'bioelmo', 'biobert', 'bluebert', 'large_biobert', 'elmo']
data_augmentation = True


In [4]:
####### DO NOT CHANGE VALUES OF THIS BLOCK IF YOU ARE NOT THE DEVELOPER ##########

check_point_path = '/content/drive/MyDrive/VQA_RAD_model' +'/' + cnn_type + '_' + str(num_layers)
saving_folder = '/content/drive/MyDrive/VQA_RAD_model/results/' + embedding + '/'
save_result_path = saving_folder + cnn_type + '_' + str(num_layers) + '.csv'

emb_size = 1024
pe_output = 22 + 1
MAX_LENGTH = pe_output
if cnn_type == 'inception':
    img_shape = [299, 299]
    img_padding = tf.TensorShape([299, 299, 3])
if cnn_type in ['resnet', 'resnet_v2', 'dense_net', 'vgg19']:
    img_shape = None
    img_padding = tf.TensorShape([224, 224, 3])

if embedding == 'bioelmo':
    pe_input = 38
elif embedding == 'elmo':
    pe_input = 42
elif embedding == 'biobert':
    pe_input = 72
    emb_size = 768
elif embedding == 'bluebert':
    pe_input = 69
elif embedding == 'large_biobert':
    pe_input = 60  
elif embedding == 'w2v':
    pe_input = 48
    emb_size = 200
elif embedding == 'bert':
    pe_input = 72
    emb_size = 1024
else:
    raise TypeError("Wrong embedding type")
    
if data_augmentation:
    aug = tf.keras.Sequential([tf.keras.layers.experimental.preprocessing.RandomFlip(),
                               tf.keras.layers.experimental.preprocessing.RandomRotation(0.05)])

#### 

### Create Datasets

In [5]:
# create train, val, test dataset
pe_input = 74
kn_input = 74

data_loader = RadDataLoader('/content/drive/MyDrive/VQA_RAD', emb_folder=embedding)
full_dataset, tokenizer = data_loader.create_dataset('QA')
vocab_size=len(tokenizer.index_word) + 1
Data_SET_SIZE = len(full_dataset)
train_size = int(0.52 * Data_SET_SIZE)
val_size = int(0.30 * Data_SET_SIZE)
test_size = int(0.18 * Data_SET_SIZE)
train_set = full_dataset.take(train_size)
val_test_ds = full_dataset.skip(train_size)
val_set = val_test_ds.take(val_size)
test_ds = val_test_ds.skip(val_size)
test_set = test_ds.take(test_size)

batch_train_set = train_set.padded_batch(batch_size, padded_shapes=((img_padding, tf.TensorShape([pe_input, emb_size]), tf.TensorShape([kn_input, 1024])),
                                            tf.TensorShape([pe_output-1]), []), drop_remainder=True)
batch_val_set = val_set.padded_batch(1, padded_shapes=((img_padding, tf.TensorShape([pe_input, emb_size]), tf.TensorShape([kn_input, 1024])),
                                                                    tf.TensorShape([pe_output-1]), []), drop_remainder=True)
batch_test_set = test_set.padded_batch(1, padded_shapes=((img_padding, tf.TensorShape([pe_input, emb_size]), tf.TensorShape([kn_input, 1024])),
                                                         tf.TensorShape([pe_output-1]), []), drop_remainder=True)

QA: 2248
yes_no: 1193
open_ended 1055
Load: QA


In [None]:
# for i in enumerate(full_dataset.take(5)):
#   (batch, (img_question, tar, q_id)) = i
#   print(batch,img_question[0].shape,img_question[1].shape,img_question[2].shape,tar.shape,q_id.shape)

In [None]:
# maxl = 0
# maxq = 0
# for (img_question, tar, q_id) in full_dataset.as_numpy_iterator():
#   # print(img_question[0].shape,img_question[1].shape,tar.shape,q_id)
#   # print(img_question[2].shape)
#   l = img_question[2].shape[0]
#   q = img_question[1].shape[0]
#   if l > maxl:
#     maxl = l
#   if q > maxq:
#     maxq = q
# print(maxl)
# print(maxq)

In [None]:
# import numpy as np
# # validate saved features and calculate max length of all questions
# ques_id = 48
# emb = np.load('/content/data/ques_embeddings/bioelmo/'+str(ques_id)+'.npy')
# # length = emb.shape[0]
# print(ques_id,'shape is', emb.shape)        


### 

### Define Models and Related Functions 

In [6]:
transformer = VQATransformer(num_layers, d_model, num_heads, dff, vocab_size, pe_input, pe_output,
                          pretrained_cnn_type=cnn_type)

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

ckpt = tf.train.Checkpoint(transformer=transformer,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, check_point_path, max_to_keep=5)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
@tf.function()
def train_step(img, question, kn, tar):
    if data_augmentation:
        img = aug(img)
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]

    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(question, tar_inp)
    with tf.GradientTape() as tape:
        predictions, _ = transformer(question, img, kn, tar_inp,
                                     True,
                                     enc_padding_mask,
                                     combined_mask,
                                     dec_padding_mask)
        loss = loss_function(tar_real, predictions)

    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))

    train_loss(loss)
    train_accuracy(tar_real, predictions)

In [7]:
def evaluate(question, img, kn):
    end_token = tf.constant(tokenizer.texts_to_sequences(['<end>']), tf.int32)
    output = dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)
    for i in range(MAX_LENGTH):
        enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
            question, output)
        predictions, attention_weights = transformer(question,
                                    img,
                                    kn,
                                    output,
                                    False,
                                    enc_padding_mask,
                                    combined_mask,
                                    dec_padding_mask)

        predictions = predictions[:, -1:, :]
        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
        if predicted_id == end_token:
            return tf.squeeze(output, axis=0), attention_weights
        output = tf.concat([output, predicted_id], axis=-1)
    return tf.squeeze(output, axis=0), attention_weights

In [None]:
################ADD#################################################################################
def get_score(batch_data_set, csv_saving_path): 
    true_answers_list = []
    predicted_answers_list = []
    ques_id_list = []
    for (batch, (img_question, target, ques_id)) in enumerate(batch_data_set):
        target = target.numpy()
        target = target[0]
        true_answer = []
        for i in target:
            if i == 0:
                break
            else:
                true_answer.append(tokenizer.index_word[i])
        true_answer = " ".join(true_answer[1: -1])
        prediction, attention = evaluate(img_question[1], img_question[0], img_question[2])
        p = prediction.numpy()
        # print('an1:',p)
        predict_answer = [tokenizer.index_word[i] for i in p][1:]
        # print('an2:',predict_answer)
        predict_answer = " ".join(predict_answer)
        true_answers_list.append(true_answer)
        predicted_answers_list.append(predict_answer)
        ques_id_list.append(ques_id)
    # print('answer list:',predicted_answers_list)
    data = {"true answer": true_answers_list, "predicted answer": predicted_answers_list, "ques_id": ques_id_list}
    df = pd.DataFrame(data)
    if not pathlib.Path(saving_folder).exists():
        pathlib.Path(saving_folder).mkdir(parents=True, exist_ok=True)
    df.to_csv(csv_saving_path)
    # print("complete writing", csv_saving_path)
    return AnswerEvaluator(csv_saving_path).evaluate()
##################ADD#################################################################################

### 

### Train the Model 

In [None]:
ls /content/drive/MyDrive/VQA_RAD/ques_embeddings/bert

ls: cannot access '/content/drive/MyDrive/VQA_RAD/ques_embeddings/bert': No such file or directory


In [8]:
ckpt_manager.restore_or_initialize()

'/content/drive/MyDrive/VQA_RAD_model/resnet_2/ckpt-1'

In [None]:
for epoch in range(EPOCHS):
    start = time.time()
    train_loss.reset_states()
    train_accuracy.reset_states()
    for (batch, (img_question, tar, _)) in enumerate(batch_train_set):
        train_step(img_question[0], img_question[1], img_question[2], tar)
        # if batch % 50 == 0:
        #     print ('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format(
        #         epoch + 1, batch, train_loss.result(), train_accuracy.result()))
    print ('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format(epoch + 1,
                                        train_loss.result(),
                                        train_accuracy.result()))
  ##################Change#################################################################################
    # if (epoch+1) % 10 == 0:
    #   csv_saving_path = saving_folder + 'val' + str(epoch) + '.csv'
    #   score = get_score(batch_val_set, csv_saving_path)
    #   model_accuracy = score['Accuracy']
    #   # if model_accuracy > accuracy:
    #   print('Validation Accuracy',model_accuracy)
    #   # ckpt_save_path = ckpt_manager.save()
    #   # accuracy = model_accuracy       
##################Change#################################################################################

    # if (epoch + 1) % 2 == 0:
    #     ckpt_save_path = ckpt_manager.save()
    #     print ('Saving checkpoint for epoch {} at {}'.format(epoch+1,
    #                                                         ckpt_save_path))

    # print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

Epoch 1 Loss 0.8339 Accuracy 0.0010
Epoch 2 Loss 0.6158 Accuracy 0.0434
Epoch 3 Loss 0.5123 Accuracy 0.0475
Epoch 4 Loss 0.4717 Accuracy 0.0509
Epoch 5 Loss 0.4369 Accuracy 0.0570
Epoch 6 Loss 0.3937 Accuracy 0.0643
Epoch 7 Loss 0.3630 Accuracy 0.0658
Epoch 8 Loss 0.3375 Accuracy 0.0681
Epoch 9 Loss 0.3101 Accuracy 0.0705
Epoch 10 Loss 0.2833 Accuracy 0.0731
Epoch 11 Loss 0.2588 Accuracy 0.0753
Epoch 12 Loss 0.2391 Accuracy 0.0792
Epoch 13 Loss 0.2292 Accuracy 0.0798
Epoch 14 Loss 0.1967 Accuracy 0.0838
Epoch 15 Loss 0.1735 Accuracy 0.0895
Epoch 16 Loss 0.1578 Accuracy 0.0917
Epoch 17 Loss 0.1359 Accuracy 0.0975
Epoch 18 Loss 0.1186 Accuracy 0.1031
Epoch 19 Loss 0.0994 Accuracy 0.1088
Epoch 20 Loss 0.0906 Accuracy 0.1095
Epoch 21 Loss 0.0796 Accuracy 0.1107
Epoch 22 Loss 0.0730 Accuracy 0.1122
Epoch 23 Loss 0.0596 Accuracy 0.1160
Epoch 24 Loss 0.0461 Accuracy 0.1198
Epoch 25 Loss 0.0431 Accuracy 0.1203
Epoch 26 Loss 0.0389 Accuracy 0.1205
Epoch 27 Loss 0.0325 Accuracy 0.1219
Epoch 28 L

In [None]:
ckpt_save_path = ckpt_manager.save()

In [None]:
ckpt_save_path

'./check_point/transformer/QA /biobert/resnet_2/ckpt-1'

### 

### Predicting and Evaluating 

In [None]:
true_answers_list = []
predicted_answers_list = []
ques_id_list = []
print('Start predicting...')
for (batch, (img_question, target, ques_id)) in enumerate(batch_test_set):
    target = target.numpy()
    target = target[0]
    true_answer = []
    for i in target:
        if i == 0:
            break
        else:
            true_answer.append(tokenizer.index_word[i])
    true_answer = " ".join(true_answer[1: -1])

    prediction, attention = evaluate(img_question[1], img_question[0], img_question[2])
    p = prediction.numpy()
    predict_answer = [tokenizer.index_word[i] for i in p][1:]
    predict_answer = " ".join(predict_answer)
    true_answers_list.append(true_answer)
    predicted_answers_list.append(predict_answer)
    ques_id_list.append(ques_id)
    print("predicted answer: " + str(batch), end='\r', flush=True)

Start predicting...


In [None]:
data = {"true answer": true_answers_list, "predicted answer": predicted_answers_list, "ques_id": ques_id_list}
df = pd.DataFrame(data)
if not pathlib.Path(saving_folder).exists():
    pathlib.Path(saving_folder).mkdir(parents=True, exist_ok=True)
name = save_result_path
df.to_csv(name)
print("complete writing", name)

complete writing ./QA_results/transformer/biobert/resnet_2.csv


In [None]:
scores = AnswerEvaluator(name).evaluate()

{'testlen': 646, 'reflen': 735, 'guess': [646, 242, 125, 49], 'correct': [218, 25, 13, 8]}
ratio: 0.8789115646246546
Accuracy: 38.21
Exact Match: 33.64
F1 Score: 36.57
BLEU-1: 0.29
BLEU-2: 0.16
BLEU-3: 0.13
BLEU-4: 0.14


 **Analysis**

In [12]:
import numpy as np
image_size = [224, 224]
def load_and_preprocess_image(path):
    image = tf.io.read_file(path) # read image file
    image = tf.image.decode_jpeg(image, channels=3) # decode image
    image = tf.image.resize(image, image_size)
    return image
def load_question_features(path):
    return np.load(path)

In [25]:
image_id = 'synpic55245'
ques_embd = 'bert/'
question_id = '128'

In [26]:
img_input = tf.convert_to_tensor(load_and_preprocess_image('/content/drive/MyDrive/VQA_RAD/pic/'+str(image_id)+'.jpg'))
ques_input = tf.convert_to_tensor(load_question_features('/content/drive/MyDrive/VQA_RAD/ques_embeddings/'+str(ques_embd)+str(question_id)+'.npy'))
kg_input = tf.convert_to_tensor(load_question_features('/content/drive/MyDrive/VQA_RAD/knowledge_embeddings/'+str(question_id)+'.npy'))

ques_input = tf.pad(ques_input,[[0,pe_input-ques_input.shape[0]],[0,0]],"CONSTANT")
kg_input = tf.pad(kg_input,[[0,kn_input-kg_input.shape[0]],[0,0]],"CONSTANT")

img_input = tf.expand_dims(img_input, axis=0)
ques_input = tf.expand_dims(ques_input, axis=0)
kg_input = tf.expand_dims(kg_input, axis=0)

# Answer
prediction, attention_tm = evaluate(ques_input, img_input, kg_input)

p = prediction.numpy()
predict_answer = [tokenizer.index_word[i] for i in p][1:]
predict_answer = " ".join(predict_answer)
print(predict_answer)

yes


# Visualization

In [None]:
pip install tensor2tensor

In [27]:
E_SIZE = 7
D_SIZE = 2
def resize(np_mat, tp):
  # Sum across heads
  if tp == 'enc':
    np_mat = np_mat[0, :, :E_SIZE, :E_SIZE]
  elif tp == 'dec':
    np_mat = np_mat[0, :, :D_SIZE, :D_SIZE]
  else:
    np_mat = np_mat[0, :, :D_SIZE, :E_SIZE]
  # np_mat = np_mat[0]
  print(np_mat.shape)
  row_sums = np.sum(np_mat, axis=0)
  # Normalize
  layer_mat = np_mat / row_sums[np.newaxis, :]
  lsh = layer_mat.shape
  # Add extra dim for viz code to work.
  layer_mat = np.reshape(layer_mat, (1, lsh[0], lsh[1], lsh[2]))
  return layer_mat

enc_att = []
dec_att = []
encdec_att = []
for i in range(num_layers):
  enc_att.append(resize(attention_tm[0]['encoder_layer{}_block'.format(i + 1)],'enc'))
  dec_att.append(resize(attention_tm[1]['decoder_layer{}_block1'.format(i + 1)],'dec'))
  encdec_att.append(resize(attention_tm[1]['decoder_layer{}_block2'.format(i + 1)],'encdec'))

(8, 7, 7)
(8, 2, 2)
(8, 2, 7)
(8, 7, 7)
(8, 2, 2)
(8, 2, 7)


###878

In [20]:
from tensor2tensor.visualization import attention
from nltk.tokenize import RegexpTokenizer

def tokenize(sentence):
    tokenizer = RegexpTokenizer(r'\w+')
    return tokenizer.tokenize(sentence.lower())

def call_html():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              "d3": "https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min",
              jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',
            },
          });
        </script>
        '''))
  
inp_text = tokenize('What modality is this ?')
out_text = ['<start>','chest', 'x', 'Ray']

# while len(inp_text) < 38:
#   inp_text.append('<PAD>')

call_html()
attention.show(inp_text, out_text, enc_att, dec_att, encdec_att)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

###1474

In [24]:
from tensor2tensor.visualization import attention
from nltk.tokenize import RegexpTokenizer

def tokenize(sentence):
    tokenizer = RegexpTokenizer(r'\w+')
    return tokenizer.tokenize(sentence.lower())

def call_html():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              "d3": "https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min",
              jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',
            },
          });
        </script>
        '''))
  
inp_text = tokenize('What organ system is visualized?')
out_text = ['<start>','chest']

# while len(inp_text) < 38:
#   inp_text.append('<PAD>')

call_html()
attention.show(inp_text, out_text, enc_att, dec_att, encdec_att)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### 128

In [28]:
from tensor2tensor.visualization import attention
from nltk.tokenize import RegexpTokenizer

def tokenize(sentence):
    tokenizer = RegexpTokenizer(r'\w+')
    return tokenizer.tokenize(sentence.lower())

def call_html():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              "d3": "https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min",
              jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',
            },
          });
        </script>
        '''))
  
inp_text = tokenize('Is the left and right MCA present?')
out_text = ['<start>','yes']

# while len(inp_text) < 38:
#   inp_text.append('<PAD>')

call_html()
attention.show(inp_text, out_text, enc_att, dec_att, encdec_att)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>