In [77]:
import json

import numpy as np
import tensorflow as tf

In [2]:
all_samples_product  = json.load(open("data.json","r"))["data"]
list_ds =  tf.data.Dataset.from_tensor_slices(all_samples_product)
list_ds = list_ds.shuffle(len(list_ds), reshuffle_each_iteration=False)

2022-12-18 10:39:40.020796: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-12-18 10:39:40.020853: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (dimo-81ne): /proc/driver/nvidia/version does not exist
2022-12-18 10:39:40.022840: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
val_size = int(len(list_ds) * 0.2)
train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)


In [4]:
AUTOTUNE = tf.data.AUTOTUNE
img_dims = [32,32,3]
batch_size = 32
shuffle_buffer_size = 1000

In [5]:

def decode_img(img):
    # Convert the compressed string to a 3D uint8 tensor
    img = tf.io.decode_jpeg(img, channels=img_dims[2])
    img = tf.image.convert_image_dtype(img, tf.float32)
    # Resize the image to the desired size
    return tf.image.resize(img, img_dims[:2])


def process_path(file_path):
    # Load the raw data from the file as a string
    img:tf.Tensor = tf.io.read_file(file_path)
    img = decode_img(img)
    return img

def process_triplets(file_items0):
    return process_path(file_items0[0]),process_path(file_items0[1]),process_path(file_items0[2])

def configure_for_performance(ds):
    ds = ds.cache()
    ds = ds.shuffle(buffer_size=shuffle_buffer_size)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=AUTOTUNE)
    return ds



In [6]:

train_ds = configure_for_performance(train_ds.map(process_triplets, num_parallel_calls=AUTOTUNE))
val_ds = configure_for_performance(val_ds.map(process_triplets, num_parallel_calls=AUTOTUNE))

In [7]:
k = train_ds.take(1)
print(k)

<TakeDataset element_spec=(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None))>


In [58]:
from keras import layers

class Encoder(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.Sequential([
            layers.Conv2D(16, 3, activation='relu',name="Conv16x7"),
            layers.MaxPool2D()
        ],name="conv1")
        self.conv3 = tf.keras.Sequential([
            layers.Conv2D(32, 3,activation='relu',name="Conv32x3_1"),
            layers.MaxPool2D()
        ],name="conv3")
        self.conv4 = tf.keras.Sequential([
            layers.Conv2D(64, 3, activation='relu',name="Conv64x3"),
            layers.MaxPool2D()
        ],name="conv4")
        self.tail = tf.keras.Sequential([
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(512,activation="relu")],name="tail")
    def call(self, imgs):
        imgs = self.conv1(imgs)
        imgs = self.conv3(imgs)
        imgs = self.conv4(imgs)
        return self.tail(imgs)
def image_encode_creator():
    model = Encoder()
    return model


Encoder = image_encode_creator()
Encoder.summary()

ValueError: This model has not yet been built. Build the model first by calling `build()` or by calling the model on a batch of data.

In [59]:
class DistanceLayer(tf.keras.layers.Layer):
    """
    This layer is responsible for computing the distance between the anchor
    embedding and the positive embedding, and the anchor embedding and the
    negative embedding.
    """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, anchor, positive, negative):
        ap_distance = tf.reduce_sum(tf.square(anchor - positive), -1)
        an_distance = tf.reduce_sum(tf.square(anchor - negative), -1)
        return ap_distance, an_distance


In [60]:

anchor_input = tf.keras.layers.Input(shape=(32, 32, 3))
positive_input = tf.keras.layers.Input(shape=(32, 32, 3))
negative_input = tf.keras.layers.Input(shape=(32, 32, 3))

distances = DistanceLayer()(
    Encoder(anchor_input),
    Encoder(positive_input),
    Encoder(negative_input),
)

siamese_network = tf.keras.Model(
    inputs=[anchor_input, positive_input, negative_input], outputs=distances
)


In [61]:
class SiameseModel(tf.keras.Model):
    """The Siamese Network model with a custom training and testing loops.

    Computes the triplet loss using the three embeddings produced by the
    Siamese Network.

    The triplet loss is defined as:
       L(A, P, N) = max(‖f(A) - f(P)‖² - ‖f(A) - f(N)‖² + margin, 0)
    """

    def __init__(self, network, margin=0.2):
        super(SiameseModel, self).__init__()
        self.siamese_network = siamese_network
        self.margin = margin
        self.loss_tracker = tf.keras.metrics.Mean(name="loss")

    def call(self, inputs):
        return self.siamese_network(inputs)

    def train_step(self, data):
        # GradientTape is a context manager that records every operation that
        # you do inside. We are using it here to compute the loss so we can get
        # the gradients and apply them using the optimizer specified in
        # `compile()`.
        with tf.GradientTape() as tape:
            loss = self._compute_loss(data)

        # Storing the gradients of the loss function with respect to the
        # weights/parameters.
        gradients = tape.gradient(loss, self.siamese_network.trainable_weights)

        # Applying the gradients on the model using the specified optimizer
        self.optimizer.apply_gradients(
            zip(gradients, self.siamese_network.trainable_weights)
        )

        # Let's update and return the training loss metric.
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def test_step(self, data):
        loss = self._compute_loss(data)

        # Let's update and return the loss metric.
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def _compute_loss(self, data):
        # The output of the network is a tuple containing the distances
        # between the anchor and the positive example, and the anchor and
        # the negative example.
        ap_distance, an_distance = self.siamese_network(data)

        # Computing the Triplet Loss by subtracting both distances and
        # making sure we don't get a negative value.
        loss = ap_distance - an_distance
        loss = tf.maximum(loss + self.margin, 0.0)
        return loss

    @property
    def metrics(self):
        # We need to list our metrics here so the `reset_states()` can be
        # called automatically.
        return [self.loss_tracker]


In [62]:
datasets = []
skiperFrog = train_ds.skip(0)
skiper = 300
while len(skiperFrog) !=0:
    datasets.append(skiperFrog.take(skiper))
    skiperFrog = skiperFrog.skip(skiper)


In [63]:
print("Bazinga")
siamese_model = SiameseModel(siamese_network)
siamese_model.compile(optimizer=tf.keras.optimizers.Adam())



Bazinga


In [87]:
checkpointPath = "training_2"

In [88]:


latest = tf.train.latest_checkpoint(checkpointPath)
siamese_model.load_weights(latest)


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f3aadcc7b50>

In [89]:
#DVC




valuation1 = val_ds.take(300)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpointPath,
                                                 save_weights_only=True,
                                                 verbose=1)

for i in datasets:
    siamese_model.fit(i, epochs=2, validation_data=valuation1,callbacks=[cp_callback])
for i in datasets:
    siamese_model.fit(i, epochs=2, validation_data=valuation1,callbacks=[cp_callback])



2022-12-18 11:22:23.460354: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.



Epoch 2: saving model to training_2


2022-12-18 11:22:27.608732: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 1/2

KeyboardInterrupt: 

In [16]:
Encoder.save("modelEncoderLarge0.02%_Margin_0.3.m5")





INFO:tensorflow:Assets written to: modelEncoder0.01%_Margin_0.2.m5/assets


INFO:tensorflow:Assets written to: modelEncoder0.01%_Margin_0.2.m5/assets
