In [2]:
%load_ext autoreload
%autoreload 2
from pipelines import build_pipeline, build_coco_pipeline
import tensorflow as tf
import re
import numpy as np
import matplotlib.pyplot as plt
import time
import datetime
import textwrap
import json
import pandas as pd
import os
import pickle
# Tokenizers
from tokenizers import KerasTokenizer
# Model
from models.CaptioningTransformers import CaptioningTransformer
from models.Callbacks import CaptioningCallback
from models.model_utils import load_captioning_model
# Results
from prediction import Predict
from tqdm import tqdm
from evaluation import Evaluate

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Setup saving path for the model 

In [3]:
# Setup Callbacks---------------------------------------------------------------------------------------------- ------------- ------------
# Define checkpoints path
date_str=datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Get the date/ model name
model_path='../models/' + date_str

# Define logs path
log_dir='../model_logs/pretraining/captioning/'+date_str # The logs directory

# Obtain and tokenize data

In [7]:
# Load data
# data = build_coco_pipeline()
data = build_pipeline()

# Setup configuration-------------------------------------------------------
config={
    'database':'medpix',
    'model_name':date_str,
    'seq_len':10,
    'num_heads':3,
    'model_dim':50,
    'dropout':0.2,
    'img_backbone_trainable':False,
    'img_size':(299,299,3),
    'img_backbone':'efficientnetb0',
    'intermediate_size':50*4,
    'vocab_size':None,
    'capts_per_img':1,
    'num_encoder_blocks':3,
    'num_decoder_blocks':3,
    'normalization':'pre'
}
# Tokenize-------------------------------------------------------------------------------------
# Get the training texts
tokenizer_data=data['captioning']['train_captions']

# Initialize tokenizer
tokenizer=KerasTokenizer(seq_len=config['seq_len'])
tokenizer.build_vocabulary(tokenizer_data)
config['vocab_size']=tokenizer.vocab_size
# Save tokenizer vocabulary
tokenizer.save_vocabulary(model_path=model_path)

# Tokenize dataset
# Add tokenizing step to pipeline
def tokenize(ds_row):
    tokens=tokenizer.tokenize(ds_row['text'])
    return {'image':ds_row['image'],
           'text':ds_row['text'],
           'tokens':tokens}
batch_size=32
# Obtain data for training, validation and test 
train_data = (data['captioning']['train']
              .map(tokenize,num_parallel_calls=tf.data.AUTOTUNE)
              .batch(batch_size))

val_data = (data['captioning']['val']
            .map(tokenize,num_parallel_calls=tf.data.AUTOTUNE)
            .batch(batch_size))

test_data = (data['captioning']['test']
             .map(tokenize,num_parallel_calls=tf.data.AUTOTUNE)
            )




# Setup Callbacks

In [5]:
# Define callbacks

# Since model is subclassed we save weights only.
# Callback to save the model every epoch end
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( 
        filepath=model_path+'/'+model_path.split('/')[-1],
        save_weights_only=True,
        monitor='val_acc',
        mode='max',
        save_best_only=True)

 # Callback to log loss and accuracy every epoch
tensorboard_callback=tf.keras.callbacks.TensorBoard( 
    log_dir=log_dir,
    histogram_freq=None,
    write_graph=True,
    write_images=False,
    write_steps_per_second=False,
    update_freq='epoch',
    profile_batch=0,
    embeddings_freq=1,
    embeddings_metadata=None)

# monitor: Quantity to be monitored.
# min_delta: Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute 
#            change of less than min_delta, will count as no improvement.
# patience: Number of epochs with no improvement after which training will be stopped.
# verbose: Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 displays messages when the callback takes an action.
# mode: One of {"auto", "min", "max"}. In min mode, training will stop when the quantity monitored has stopped 
#       decreasing; in "max" mode it will stop when the quantity monitored has stopped increasing; in "auto" 
#       mode, the direction is automatically inferred from the name of the monitored quantity.
# baseline: Baseline value for the monitored quantity. Training will stop if the model doesn't show improvement
#           over the baseline.
# restore_best_weights: Whether to restore model weights from the epoch with the best value of the monitored 
#                       quantity. If False, the model weights obtained at the last step of training are used.
#                       An epoch will be restored regardless of the performance relative to the baseline. If 
#                       no epoch improves on baseline, training will run for patience epochs and restore weights 
#                       from the best epoch in that set.
# start_from_epoch: Number of epochs to wait before starting to monitor improvement. This allows for a warm-up 
#                   period in which no improvement is expected and thus training will not be stopped.

early_stopping_callback=tf.keras.callbacks.EarlyStopping(
                                monitor="val_acc",
                                min_delta=.001,
                                patience=10,
                                verbose=1,
                                mode="max",
                                baseline=None,
                                restore_best_weights=True)
                    

captioning_callback= CaptioningCallback(
    config=config,
    log_dir=log_dir)



# Instantiate and train model

In [8]:
# Train the model
model= CaptioningTransformer(config)
loss=tf.keras.losses.SparseCategoricalCrossentropy(reduction='none',
                                                   from_logits=False)
opt = tf.keras.optimizers.Adam()
# opt = keras.optimizers.Adam(learning_rate=lr_schedule)
model.compile(loss=loss, optimizer=opt)

history=model.fit(
    train_data.take(1),
    validation_data=val_data.take(1),
    epochs=1,
    callbacks=[
        checkpoint_callback,
        tensorboard_callback,
        early_stopping_callback,
        captioning_callback])

2023-02-01 04:29:44.329994: W tensorflow/core/common_runtime/bfc_allocator.cc:462] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.12MiB (rounded to 2220032)requested by op AddV2
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-02-01 04:29:44.330075: I tensorflow/core/common_runtime/bfc_allocator.cc:1010] BFCAllocator dump for GPU_0_bfc
2023-02-01 04:29:44.330096: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (256): 	Total Chunks: 154, Chunks in use: 154. 38.5KiB allocated for chunks. 38.5KiB in use in bin. 14.9KiB client-requested in use in bin.
2023-02-01 04:29:44.330107: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (512): 	Total Chunks: 89, Chunks in use: 89. 59.2KiB allocated for chunks. 59.2KiB in use in bin. 47.3KiB client-requested in use in bin.
2023-02-01 04:29:44.330118: I te

ResourceExhaustedError: in user code:

    File "/home/jupyter/MedClip/venv/lib/python3.7/site-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/home/jupyter/MedClip/venv/lib/python3.7/site-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/jupyter/MedClip/venv/lib/python3.7/site-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/home/jupyter/MedClip/src/models/CaptioningTransformers.py", line 470, in train_step
        y_pred=self.decoder(pred_tokens,image_inputs,mask) # Decode next word probabilities
    File "/home/jupyter/MedClip/venv/lib/python3.7/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None

    ResourceExhaustedError: Exception encountered when calling layer "pre_norm_full_decoder" (type PreNormFullDecoder).
    
    in user code:
    
        File "/home/jupyter/MedClip/src/models/CaptioningTransformers.py", line 423, in call  *
            probs = self.softmax(x)
        File "/home/jupyter/MedClip/venv/lib/python3.7/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "/home/jupyter/MedClip/venv/lib/python3.7/site-packages/keras/backend.py", line 1922, in random_uniform
            seed=self.make_legacy_seed())
    
        ResourceExhaustedError: failed to allocate memory [Op:AddV2]
    
    
    Call arguments received:
      • text_tokens=tf.Tensor(shape=(None, 9), dtype=int64)
      • image_inputs=tf.Tensor(shape=(None, 100, 50), dtype=float32)
      • mask=tf.Tensor(shape=(None, 9), dtype=bool)


# Make predictions

In [None]:
Predict.dataset_captions(test_data,model,tokenizer, model_path=model_path)


# Evaluate

In [None]:
evaluate.scores

In [None]:
evaluate = Evaluate(model_path)
evaluate.evaluate_all()

# Load a model and its tokenizer's vocab

In [3]:
model_path='../models/exp1'
model=load_captioning_model(model_path)
tokenizer=KerasTokenizer(seq_len=model.config['seq_len'])
tokenizer.load_vocabulary(model_path)

2023-01-29 00:51:29.163184: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-29 00:51:29.175857: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-29 00:51:29.177708: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-29 00:51:29.179781: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

TypeError: '<=' not supported between instances of 'NoneType' and 'int'