In [None]:
!pip install --upgrade keras
!pip install -U keras-nlp
!pip install keras-cv
!pip install tensorflow[and-cuda]
!apt-get install wget 

Collecting keras
  Downloading keras-3.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading keras-3.7.0-py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras
  Attempting uninstall: keras
    Found existing installation: keras 3.6.0
    Uninstalling keras-3.6.0:
      Successfully uninstalled keras-3.6.0
Successfully installed keras-3.7.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Collecting keras-nlp
  Downloading keras_nlp-0.17.0-py3-none-any.whl.metadata (1.2 kB)
Collecting keras-hub==0.17.0 (from keras-nlp)
  Downloading keras_hub-0.17.0-py3-none-any.whl.metadata (7.4 kB)
Collecting regex (from keras-hub==0.17.0->keras-nlp)
  Downloadi


KeyboardInterrupt



In [None]:
IMG_HEIGHT = 240
IMG_WIDTH = 240
BATCH_SIZE = 3
EPOCHS = 2000
checkpoint_filepath = "weights-improvement-{epoch:02d}-{loss:.2f}.keras"

In [None]:
from keras import ops
from keras_cv.models.feature_extractor import CLIPImageEncoder
from keras.callbacks import ModelCheckpoint
from keras_cv.models import CLIP
from keras.layers import Input,Concatenate, Layer,Dense,Reshape,MultiHeadAttention,LayerNormalization,Add,Flatten
from keras import Sequential,Model
from keras.optimizers import Adam
import tensorflow as tf
import os
os.environ["KERAS_BACKEND"] = "tensorflow"

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))

In [None]:
model = CLIP.from_preset("clip-vit-base-patch32")
image_encoder = model.get_layer("image_encoder")
image_encoder.trainable = False

In [None]:
class AttentionDecoder(Layer):
    def __init__(self, num_heads, embed_dim, ff_dim):
        super().__init__()
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.norm1 = LayerNormalization(epsilon=1e-6)
        self.add1 = Add()
        
        self.ffn = Sequential([
            Dense(ff_dim, activation="gelu"),
            Dense(embed_dim),
        ])
        self.norm2 = LayerNormalization(epsilon=1e-6)
        self.add2 = Add()

    def call(self, inputs):
        attn_output = self.attention(inputs, inputs)
        out1 = self.add1([inputs, attn_output])
        out1 = self.norm1(out1)
        ffn_output = self.ffn(out1)
        out2 = self.add2([out1, ffn_output])
        return self.norm2(out2)


In [None]:
object_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
background_input =  Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
encoded_image_1 = image_encoder(object_input)
encoded_image_2 = image_encoder(background_input)
concatenated_inputs  = Concatenate()([encoded_image_1,encoded_image_2])
reshaped_features = Reshape((16, 64))(concatenated_inputs)
decoder = AttentionDecoder(num_heads=8, embed_dim=64, ff_dim=1024)
x = decoder(reshaped_features)
x = decoder(x)
x = decoder(x)
x = decoder(x)
x = decoder(x)
x = decoder(x)
x = decoder(x)
x = decoder(x)
x = decoder(x)
x = decoder(x)
x = decoder(x)
x = decoder(x)
x = Flatten()(x)
x = Dense(64, activation="gelu")(x)
x = Dense(32, activation="gelu")(x)
x = Dense(240 * 240 * 3)(x)
output_image =  Reshape((240, 240, 3))(x)
model = Model(inputs=[object_input, background_input], outputs=output_image)
checkpoint = ModelCheckpoint(checkpoint_filepath,monitor='loss',verbose=1,save_best_only=True)
callbacks_list = [checkpoint]
model.compile(optimizer=Adam(learning_rate=1e-4), loss="mse")
model.summary()

In [None]:
import pickle

data = None
with open('images_dict.pkl', 'rb') as file:
    data = pickle.load(file)

In [None]:
object_dir = '/tf/notebooks/targets'
background_dir = '/tf/notebooks/backgrounds'
original_image_dir = '/tf/notebooks/awsaf49/coco-2017-dataset/versions/2/coco2017/train2017'

In [None]:
def load_and_preprocess_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_image(img, channels=3)
    img.set_shape([None, None, 3])
    img = tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])
    img = tf.cast(img, tf.float32) / 255.0
    return img

In [None]:
object_paths = list()
background_paths = list()
original_image_paths = list()

In [None]:
for key,value in data.items():
    if len(value["targets"]) == len(value["backgrounds"]):
        original_image_path = original_image_dir+"/" + key +".jpg"
        for object_path,bg_path in zip(value["targets"],value["backgrounds"]):
            if isinstance(object_path, str) and isinstance(bg_path, str) and isinstance(original_image_path,str):
                
                object_paths.append(os.getcwd()+"/"+object_path)
                background_paths.append(os.getcwd()+"/"+bg_path)
                original_image_paths.append(original_image_path)

In [None]:
if not (len(object_paths) == len(background_paths) == len(original_image_paths)):
    raise ValueError("All directories must have the same number of images.")

In [None]:
object_dataset = tf.data.Dataset.from_tensor_slices(object_paths).map(load_and_preprocess_image)

In [None]:
background_dataset = tf.data.Dataset.from_tensor_slices(background_paths).map(load_and_preprocess_image)
original_image_dataset = tf.data.Dataset.from_tensor_slices(original_image_paths).map(load_and_preprocess_image)

In [None]:
dataset = tf.data.Dataset.zip(((object_dataset, background_dataset), original_image_dataset)).batch(BATCH_SIZE,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
model.fit(dataset,epochs=EPOCHS,callbacks=callbacks_list,verbose=1)