In [1]:
from keras.layers import Input, Lambda, Dense, Flatten
from keras.models import Model
from keras.applications.resnet_v2 import preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import adam_v2

import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
import tensorflow.keras as tfk
import tensorflow.keras.layers as tfkl
import random
import os
import tensorflow as tf
from PIL import Image
import logging
from tqdm import tqdm
import cv2

logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
print(tf.__version__)
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

2.11.0


In [2]:
concepts_path = 'data/caption_prediction_train.csv'

df_captions = pd.read_csv(concepts_path, sep=';', names=['Id', 'Caption'])
df_captions['Id'] = df_captions['Id'].str[-6:]
captions = df_captions.to_numpy()
np.save('data/captions.npy', captions)
df_captions.head()

Unnamed: 0,Id,Caption
0,1,pericardial tamponade with clear distinction o...
1,2,angiography of the aortic arch show delay visu...
2,3,balloonocclude retrograde transvenous oblitera...
3,4,film after glue embolization show no filling i...
4,5,peripheral in posteroanterior projection angio...


In [87]:

def convert_to_numpy(in_dir, out_dir, filename_transform=None):
    ids = []
    for filename in tqdm(os.listdir(in_dir)):
        if (filename.endswith(".jpg")):
            image = cv2.imread(filename=os.path.join(in_dir, filename), flags=cv2.IMREAD_COLOR)
            if image.shape != (128, 128, 3):
                print('Error')
            filename = filename[:-4]
            if filename_transform:
                filename = filename_transform(filename)
            assert len(filename) == 6
            np.save(os.path.join(out_dir, filename), np.array(image))
            ids.append(filename)
    return ids


transform = lambda x: x.split('_')[-1]

train_dir = 'data/train/'
val_dir = 'data/validation/'
test_dir = 'data/test/'
train_numpy_dir = 'data/numpy/train/'
val_numpy_dir = 'data/numpy/validation/'
test_numpy_dir = 'data/numpy/test/'

In [6]:

train_ids = []
val_ids = []
test_ids = []
if os.path.exists(train_numpy_dir) and len(os.listdir(train_numpy_dir)) == len(os.listdir(train_dir)):
    in_dir = train_numpy_dir
    for filename in tqdm(os.listdir(in_dir)):
        if filename.endswith(".npy"):
            train_ids.append(filename[:-4])
else:
    in_dir = train_numpy_dir
    for f in os.listdir(in_dir):
        if os.path.exists(os.path.join(in_dir, f)):
            os.remove(os.path.join(in_dir, f))
    train_ids = convert_to_numpy(train_dir, train_numpy_dir, transform)

if os.path.exists(val_numpy_dir) and len(os.listdir(val_numpy_dir)) == len(os.listdir(val_dir)):
    in_dir = val_numpy_dir
    for filename in tqdm(os.listdir(in_dir)):
        if filename.endswith(".npy"):
            val_ids.append(filename[:-4])
else:
    in_dir = val_numpy_dir
    for f in os.listdir(in_dir):
        if os.path.exists(os.path.join(in_dir, f)):
            os.remove(os.path.join(in_dir, f))
    val_ids = convert_to_numpy(val_dir, val_numpy_dir, transform)

if os.path.exists(test_numpy_dir) and len(os.listdir(test_numpy_dir)) == len(os.listdir(test_dir)):
    in_dir = test_numpy_dir
    for filename in tqdm(os.listdir(in_dir)):
        if filename.endswith(".npy"):
            test_ids.append(filename[:-4])
else:
    in_dir = test_numpy_dir
    for f in os.listdir(in_dir):
        if os.path.exists(os.path.join(in_dir, f)):
            os.remove(os.path.join(in_dir, f))
    test_ids = convert_to_numpy(test_dir, test_numpy_dir, transform)

100%|██████████| 67842/67842 [00:28<00:00, 2419.30it/s]
100%|██████████| 7983/7983 [00:00<00:00, 2927358.70it/s]
100%|██████████| 7452/7452 [00:02<00:00, 2512.20it/s]


In [36]:

captions_train = captions[:len(train_ids)]
captions_val = captions[len(train_ids):len(train_ids) + len(val_ids)]
captions_test = captions[len(train_ids) + len(val_ids):]
train_ids.sort()
val_ids.sort()
test_ids.sort()

In [37]:
for i in range(len(captions_train)):
    if captions_train[i][0] != train_ids[i]:
        print(captions_train[i][0], train_ids[i])
    assert captions_train[i][0] == train_ids[i]

for i in range(len(captions_val)):
    if captions_val[i][0] != val_ids[i]:
        print(captions_val[i][0], val_ids[i])
    assert captions_val[i][0] == val_ids[i]

for i in range(len(captions_test)):
    if captions_test[i][0] != test_ids[i]:
        print(captions_test[i][0], test_ids[i])
    assert captions_test[i][0] == test_ids[i]

captions_train = np.array(captions_train[:, 1])
captions_val = np.array(captions_val[:, 1])
captions_test = np.array(captions_test[:, 1])


# Model Definition

In [21]:
import numpy as np
import tensorflow as tf
#import tensorflow_hub as hub
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Lambda

tfds.disable_progress_bar()

In [22]:
# Embedding size for each token
embed_dim = 512
# Dimention of the latent space
latent_dim = 1024
# Number of attention heads
num_heads = 4
sequence_length = 512
vocab_size = 20000

In [23]:
def project_embeddings(
    embeddings, num_projection_layers, projection_dims, dropout_rate
):
    projected_embeddings = tfkl.Dense(units=projection_dims)(embeddings)
    for _ in range(num_projection_layers):
        x = tf.nn.gelu(projected_embeddings)
        x = tfkl.Dense(projection_dims)(x)
        x = tfkl.Dropout(dropout_rate)(x)
        x = tfkl.Add()([projected_embeddings, x])
        projected_embeddings = tfkl.LayerNormalization()(x)
    return projected_embeddings

In [24]:
def build_resnet(size_embedding):
    # Load the ResNet50 model with pre-trained weights
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))

    # Freeze the layers in the base model
    for layer in base_model.layers:
        layer.trainable = False

    # Add a GlobalAveragePooling2D layer
    inputs = tfkl.Input(shape=(224, 224, 3), name="image_input")
    embeddings = base_model(inputs)
    embeddings = tfkl.GlobalAveragePooling2D()(embeddings)
    output = tfkl.Dense(size_embedding, activation='relu')(embeddings)

    # Build the ResNet50 model
    resnet = Model(inputs=inputs, outputs=output)
    resnet.summary()

    return resnet

In [25]:
class TokenAndPositionEmbedding(tfkl.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim, **kwargs):
        super().__init__(**kwargs)
        # Embedding layer for the token
        self.token_emb = tfkl.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        # Embedding layer for the position
        self.pos_emb = tfkl.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        # Find the maximum length of the input
        maxlen = tf.shape(x)[-1]
        # Create a tensor with positions from 0 to maxlen-1
        positions = tf.range(start=0, limit=maxlen, delta=1)
        # Embed the positions
        positions = self.pos_emb(positions)
        # Embed the tokens
        x = self.token_emb(x)
        # Add the token and position embeddings
        return x + positions

In [26]:
class TransformerEncoderBlock(tfkl.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super().__init__(**kwargs)
        self.att = tfkl.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tfk.Sequential(
            [
                tfkl.Dense(ff_dim, activation="relu"),
                tfkl.Dense(embed_dim)
            ]
        )
        self.layernorm1 = tfkl.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tfkl.LayerNormalization(epsilon=1e-6)
        self.dropout1 = tfkl.Dropout(rate)
        self.dropout2 = tfkl.Dropout(rate)

    def call(self, inputs, training):
        # Self-attention
        attn_output = self.att(inputs, inputs)
        # Apply dropout to the attention output
        attn_output = self.dropout1(attn_output, training=training)
        # Add the attention output to the input and normalize
        out1 = self.layernorm1(inputs + attn_output)
        # Feed-forward
        ffn_output = self.ffn(out1)
        # Apply dropout to the feed-forward output
        ffn_output = self.dropout2(ffn_output, training=training)
        # Add the feed-forward output to the previous output and normalize
        return self.layernorm2(out1 + ffn_output)

In [27]:
import keras_nlp as nlp
def build_encoder():
    encoder_inputs = tfk.Input(shape=(512), dtype="int64", name="encoder_inputs")
    # Adding token and position embedding layer
    x = nlp.layers.TokenAndPositionEmbedding(vocab_size, sequence_length, embed_dim)(encoder_inputs)
    # Adding transformer encoder block
    encoder_outputs = TransformerEncoderBlock(embed_dim, num_heads, latent_dim)(x)

    # compute the mean of the non-masked embeddings
    pooled_output = tf.reduce_mean(encoder_outputs, axis=1)
    print(pooled_output.shape)


    #output = project_embeddings(pooled_output, 1, embed_dim, 0.1)

    # Defining the encoder model
    encoder = tfk.Model(encoder_inputs, pooled_output)
    # Print the summary of the encoder model
    encoder.summary()
    # Visualize the encoder model
    tfk.utils.plot_model(encoder)

    return encoder

In [28]:
def build_clip(resnet, transformer):
    # Create the input layer
    input_layer_transformer = Input(shape=(None,), dtype="int64", name="encoder_inputs")
    input_layer_resnet = Input(shape=(128, 128, 3))

    # Pass the input through the ResNet50 model
    input_layer_resized = tfkl.Resizing(224, 224)(input_layer_resnet)
    image_features = resnet(input_layer_resized)

    # Pass the input through the Transformer model
    text_features = transformer(input_layer_transformer)

    print(text_features.shape)
    print(image_features.shape)

    joint_features = tf.concat([image_features, text_features], axis=1)


    clip = Model(inputs=[input_layer_resnet, input_layer_transformer], outputs=joint_features)

    return clip

In [29]:
# Build the ResNet50 and Transformer models
resnet = build_resnet(512)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image_input (InputLayer)    [(None, 224, 224, 3)]     0         
                                                                 
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_average_pooling2d_1   (None, 2048)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_1 (Dense)             (None, 512)               1049088   
                                                                 
Total params: 24,636,800
Trainable params: 1,049,088
Non-trainable params: 23,587,712
_________________________________________________________________


In [30]:
#Build encoder
encoder = build_encoder()

(None, 512)
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_inputs (InputLayer)  [(None, 512)]            0         
                                                                 
 token_and_position_embeddin  (None, 512, 512)         10502144  
 g (TokenAndPositionEmbeddin                                     
 g)                                                              
                                                                 
 transformer_encoder_block (  (None, 512, 512)         5253120   
 TransformerEncoderBlock)                                        
                                                                 
 tf.math.reduce_mean (TFOpLa  (None, 512)              0         
 mbda)                                                           
                                                                 
Total params: 15,755,264
Trainable params: 15,7

In [31]:
clip = build_clip(resnet, encoder)

(None, 512)
(None, 512)


In [32]:
clip.summary()
tfk.utils.plot_model(clip)

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 128, 128, 3  0           []                               
                                )]                                                                
                                                                                                  
 resizing_1 (Resizing)          (None, 224, 224, 3)  0           ['input_4[0][0]']                
                                                                                                  
 encoder_inputs (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 model_1 (Functional)           (None, 512)          24636800    ['resizing_1[0][0]']       

In [52]:
#Loss function finally working!!
from tensorflow.keras.losses import cosine_similarity

def clip_loss_test(y_true, y_pred, temperature=0.1):

    image_embeddings, caption_embeddings = tf.split(y_pred, num_or_size_splits=2, axis=1)

    # logits[i][j] is the dot_similarity(caption_i, image_j).
    logits = (
        tf.matmul(caption_embeddings, image_embeddings, transpose_b=True)
        / temperature
    )
    # images_similarity[i][j] is the dot_similarity(image_i, image_j).
    images_similarity = tf.matmul(
        image_embeddings, image_embeddings, transpose_b=True
    )
    # captions_similarity[i][j] is the dot_similarity(caption_i, caption_j).
    captions_similarity = tf.matmul(
        caption_embeddings, caption_embeddings, transpose_b=True
    )
    # targets[i][j] = avarage dot_similarity(caption_i, caption_j) and dot_similarity(image_i, image_j).
    targets = tfk.activations.softmax(
        (captions_similarity + images_similarity) / (2 * temperature)
    )
    # Compute the loss for the captions using crossentropy
    captions_loss = tfk.losses.categorical_crossentropy(
        y_true=targets, y_pred=logits, from_logits=True
    )
    # Compute the loss for the images using crossentropy
    images_loss = tfk.losses.categorical_crossentropy(
        y_true=tf.transpose(targets), y_pred=tf.transpose(logits), from_logits=True
    )
    # Return the mean of the loss over the batch.
    return (captions_loss + images_loss) / 2

In [53]:
learning_rate = 1e-4
optimizer = tfk.optimizers.Adam(learning_rate)
metrics = []

In [90]:
class ClipDataLoader(tfk.utils.Sequence):
    def __init__(self, list_IDs, captions, batch_size=32, dim=(128, 128, 3),
                 shuffle=True, directory='data/train'):
        """Initialization"""
        self.directory = directory
        self.indexes = None
        self.captions = captions
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.shuffle = shuffle
        self.on_epoch_end()

    def __data_generation(self, list_IDs_temp):
        """Generates data containing batch_size samples"""  # X : (n_samples, *dim)
        # Initialization
        X = np.empty((self.batch_size, *self.dim))
        y = np.zeros(self.batch_size, dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = np.load(os.path.join(self.directory + ID + '.npy'))

        return X, y

    def __len__(self):
        """Denotes the number of batches per epoch"""
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        """Generate one batch of data"""
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        captions = self.captions.numpy()[indexes]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return (X, captions), y

    def on_epoch_end(self):
        """Updates indexes after each epoch"""
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle:
            np.random.shuffle(self.indexes)

In [0]:
#Preprocessing of the data
vectorization = tfkl.TextVectorization(
    max_tokens=vocab_size, output_mode="int", output_sequence_length=sequence_length,
)
captions_train_vect = captions_train.copy()
captions_val_vect = captions_val.copy()
captions_test_vect = captions_test.copy()

vectorization.adapt(np.append(captions_train_vect, captions_val_vect))
captions_train_vect = vectorization(captions_train_vect)
captions_val_vect = vectorization(captions_val_vect)
captions_test_vect = vectorization(captions_test_vect)

In [91]:

train_generator = ClipDataLoader(train_ids, captions_train_vect, batch_size=64, directory=train_numpy_dir)
val_generator = ClipDataLoader(val_ids, captions_val_vect, batch_size=64, directory=val_numpy_dir)
test_generator = ClipDataLoader(test_ids, captions_train_vect, batch_size=64, directory=test_numpy_dir)

In [92]:
clip.compile(optimizer=optimizer, loss=clip_loss_test, metrics=metrics)

EPOCHS = 50


history = clip.fit(
    x = train_generator,
    validation_data = val_generator,
    epochs=EPOCHS,
)

<class 'tensorflow.python.framework.ops.EagerTensor'>
Epoch 1/50
<class 'tensorflow.python.framework.ops.EagerTensor'>
<class 'tensorflow.python.framework.ops.EagerTensor'>


NotFoundError: Graph execution error:

Detected at node 'StatefulPartitionedCall' defined at (most recent call last):
    File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 725, in start
      self.io_loop.start()
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
      await result
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2961, in run_cell
      result = self._run_cell(
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3016, in _run_cell
      result = runner(coro)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3221, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3400, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3460, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/0y/xb7crskj6j96cb2cmd4bsvm00000gn/T/ipykernel_48704/3730436055.py", line 6, in <module>
      history = clip.fit(
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1027, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 527, in minimize
      self.apply_gradients(grads_and_vars)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1140, in apply_gradients
      return super().apply_gradients(grads_and_vars, name=name)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 634, in apply_gradients
      iteration = self._internal_apply_gradients(grads_and_vars)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1166, in _internal_apply_gradients
      return tf.__internal__.distribute.interim.maybe_merge_call(
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1216, in _distributed_apply_gradients_fn
      distribution.extended.update(
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1211, in apply_grad_to_update_var
      return self._update_step_xla(grad, var, id(self._var_key(var)))
Node: 'StatefulPartitionedCall'
Detected at node 'StatefulPartitionedCall' defined at (most recent call last):
    File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 725, in start
      self.io_loop.start()
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
      await result
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2961, in run_cell
      result = self._run_cell(
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3016, in _run_cell
      result = runner(coro)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3221, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3400, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3460, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/0y/xb7crskj6j96cb2cmd4bsvm00000gn/T/ipykernel_48704/3730436055.py", line 6, in <module>
      history = clip.fit(
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1027, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 527, in minimize
      self.apply_gradients(grads_and_vars)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1140, in apply_gradients
      return super().apply_gradients(grads_and_vars, name=name)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 634, in apply_gradients
      iteration = self._internal_apply_gradients(grads_and_vars)
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1166, in _internal_apply_gradients
      return tf.__internal__.distribute.interim.maybe_merge_call(
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1216, in _distributed_apply_gradients_fn
      distribution.extended.update(
    File "/Users/forna/Documents/1.Politecnico/ATDL/ATDLproject/venv/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1211, in apply_grad_to_update_var
      return self._update_step_xla(grad, var, id(self._var_key(var)))
Node: 'StatefulPartitionedCall'
2 root error(s) found.
  (0) NOT_FOUND:  could not find registered platform with id: 0x1116ff910
	 [[{{node StatefulPartitionedCall}}]]
	 [[gradient_tape/model_3/model_2/transformer_encoder_block/multi_head_attention/attention_output/add/Reshape/_84]]
  (1) NOT_FOUND:  could not find registered platform with id: 0x1116ff910
	 [[{{node StatefulPartitionedCall}}]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_62827]