In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

Environment Setup

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os

os.makedirs('utils/', exist_ok=True)
os.chdir('utils')

! wget https://raw.githubusercontent.com/Ata-Pab/Machine_Learning/master/utils/models.py
! wget https://raw.githubusercontent.com/Ata-Pab/Machine_Learning/master/utils/losses.py
! wget https://raw.githubusercontent.com/Ata-Pab/Machine_Learning/master/utils/vision.py
! wget https://raw.githubusercontent.com/Ata-Pab/Machine_Learning/master/utils/callbacks.py
! wget https://raw.githubusercontent.com/Ata-Pab/Machine_Learning/master/utils/utils.py

os.chdir('/content')
print("Current working directory", os.getcwd())

--2023-11-17 12:07:33--  https://raw.githubusercontent.com/Ata-Pab/Machine_Learning/master/utils/models.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 21013 (21K) [text/plain]
Saving to: ‘models.py’


2023-11-17 12:07:33 (23.7 MB/s) - ‘models.py’ saved [21013/21013]

--2023-11-17 12:07:33--  https://raw.githubusercontent.com/Ata-Pab/Machine_Learning/master/utils/losses.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7280 (7.1K) [text/plain]
Saving to: ‘losses.py’


2023-11-17 12:07:33 (98.3 MB/s) - ‘losses.py’ saved [7280/7

In [4]:
from utils import vision
from utils import utils
from utils import losses

In [7]:
! pip install -q kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download -d ruchi798/totally-looks-like-dataset

mkdir: cannot create directory ‘/root/.kaggle’: File exists
Downloading totally-looks-like-dataset.zip to /content
 94% 187M/199M [00:01<00:00, 167MB/s]
100% 199M/199M [00:01<00:00, 178MB/s]


In [8]:
utils.unzip_data('/content/totally-looks-like-dataset.zip')
! rm /content/totally-looks-like-dataset.zip

In [9]:
ANCHOR_IMG_PATH = '/content/left/left'
POSITIVE_IMG_PATH = '/content/right/right'

Environment Setup End

In [30]:
experiment = {
    'TYPE': 'train',        # Experiment type: 'train', 'test'
    'ACCELERATOR': 'GPU',   # 'CPU', 'GPU' or 'TPU'

    'IMAGE_SIZE': (200, 200),
    'INPUT_SHAPE': (200, 200, 3),
    'VALID_SIZE': 0.2,      # Validation data size: (Valid Data) / (All Data)
    'DATA_AUG': True,       # Apply data augmentation

    'BACKBONE': 'custom',        # 'custom', 'VGG16', 'VGG19' - default 'custom'
    'LAST_TRANIABLE_LAYERS': 5,  # number of last trainable layers of pre-trained backbone models, fine-tuning
    'BATCH_SIZE': 16,            # IF TPU is active set 4, otherwise set anything
    'EPOCHS': 5,
    'OPTIMIZER': 'Adam',  # TODO: Try 'rmsprop' optimizer
    'LEARNING_RATE': 1e-4,

    'RECONS_LOSS': 'PERCEPTUAL',  # Reconstruction loss ('SSIM', 'MSE', 'MAE', 'PERCEPTUAL')
    'PERCEPTUAL_LAYERS': [5,8,13,18],    # [5,8,13,18], None
    'PERCEP_LOSS_MODEL': 'VGG19', # custom', 'VGG16', 'VGG19' - default 'VGG16'
    'PERP_LOSS_LAMBDA': 1,
    'LRELU_SLOPE': 0.2,       # Leaky ReLU activation function slope value
    'MSE_LOSS_LAMBDA': 0.01,  # MSE coeff

    # set the dimensionality of the latent space to a plane for visualization later
    'LATENT_DIM': 500,

    'SAVE_WEIGHTS_PER_EPOCH': 10,
    # Record directory of model's trained weights
    'TRAINING_WEIGHT_DIR': "autoencoder_model/BD67_dataset/training_weights",
    # Record directory of generated images
    'IMGS_DIR': "autoencoder_model/BD67_dataset/images",
}

In [11]:
# We need to make sure both the anchor and positive images are loaded in
# sorted order so we can match them together.
anchor_images = sorted([str(ANCHOR_IMG_PATH + "/" + f) for f in os.listdir(ANCHOR_IMG_PATH)])
positive_images = sorted([str(POSITIVE_IMG_PATH + "/" + f) for f in os.listdir(POSITIVE_IMG_PATH)])

image_count = len(anchor_images)

In [12]:
anchor_images[:5]

['/content/left/left/00000.jpg',
 '/content/left/left/00001.jpg',
 '/content/left/left/00002.jpg',
 '/content/left/left/00003.jpg',
 '/content/left/left/00004.jpg']

In [13]:
anchor_dataset = tf.data.Dataset.from_tensor_slices(anchor_images)
positive_dataset = tf.data.Dataset.from_tensor_slices(positive_images)

In [14]:
# To generate the list of negative images, let's randomize the list of
# available images and concatenate them together.
rng = np.random.RandomState(seed=42)  # We need to apply deterministic randomness for providing to locate the similar images in same indexes
rng.shuffle(anchor_images)
rng.shuffle(positive_images)

In [15]:
negative_images = anchor_images + positive_images
np.random.RandomState(seed=32).shuffle(negative_images)

negative_dataset = tf.data.Dataset.from_tensor_slices(negative_images)
negative_dataset = negative_dataset.shuffle(buffer_size=4096)

In [16]:
# Creates a Dataset by zipping together the given datasets.
# This method has similar semantics to the built-in zip() function in Python,
# with the main difference being that the datasets argument can be a (nested)
# structure of Dataset objects
dataset = tf.data.Dataset.zip((anchor_dataset, positive_dataset, negative_dataset))
dataset = dataset.shuffle(buffer_size=1024)

In [17]:
# Load and preprocess the three images given the corresponding filenames
def preprocess_triplets(anchor, positive, negative):
    # Load the specified file as a JPEG image and preprocess it
    def preprocess_image(filename, size=None):
        image_string = tf.io.read_file(filename)
        image = tf.image.decode_jpeg(image_string, channels=3)  # Decode a JPEG-encoded image to a uint8 tensor
        image = tf.image.convert_image_dtype(image, tf.float32)
        if size != None:
            image = tf.image.resize(image, size)
        return image

    return (
        preprocess_image(anchor, experiment['IMAGE_SIZE']),
        preprocess_image(positive, experiment['IMAGE_SIZE']),
        preprocess_image(negative, experiment['IMAGE_SIZE']),
    )

In [18]:
dataset = dataset.map(preprocess_triplets)   # Dataset has 3 different image arrays (anchor, positive, negative)

In [19]:
print("Total Image Count: ", image_count)

Total Image Count:  6016


In [20]:
train_dataset = dataset.take(round(image_count * (1-experiment['VALID_SIZE']))).batch(experiment['BATCH_SIZE'], drop_remainder=False).prefetch(tf.data.experimental.AUTOTUNE)

In [21]:
valid_dataset = dataset.skip(round(image_count * (1-experiment['VALID_SIZE']))).batch(experiment['BATCH_SIZE'], drop_remainder=False).prefetch(tf.data.experimental.AUTOTUNE)

In [22]:
print("Train dataset size: ", len(train_dataset))
print("Valid dataset size: ", len(valid_dataset))

Train dataset size:  301
Valid dataset size:  76


In [23]:
anchor, positive, negative = [[1,2],[3,4],[5,6]]

In [24]:
anchor

[1, 2]

In [31]:
class SiameseResNet50Model(Model):
    """The Siamese Network model with a custom training and testing loops.

    Computes the triplet loss using the three embeddings produced by the
    Siamese Network.

    The triplet loss is defined as:
       L(A, P, N) = max(‖f(A) - f(P)‖² - ‖f(A) - f(N)‖² + margin, 0)
    """
    def __init__(self, input_shape, margin=0.5):
        super(SiameseResNet50Model, self).__init__()
        self._input_shape = input_shape
        self._margin = margin
        self._siamese_resnet50_network = self.build_resnet50_siam_network()
        self._loss_tracker = tf.keras.metrics.Mean(name="loss")

    def call(self, inputs):
        anchor, positive, negative = inputs
        return [self._siamese_resnet50_network(anchor, training=False),
                self._siamese_resnet50_network(positive, training=False),
                self._siamese_resnet50_network(negative, training=False)
        ]

    def summary(self):
        return self._siamese_resnet50_network.summary()

    def build_resnet50_siam_network(self):
        resnet50_model = tf.keras.applications.resnet.ResNet50(weights='imagenet', input_shape=self._input_shape, include_top=False)
        # input_shape: optional shape tuple, only to be specified if include_top is False
        # (otherwise the input shape has to be (224, 224, 3).  It should have exactly 3
        # inputs channels, and width and height should be no smaller than 32.
        # E.g. (200, 200, 3) would be one valid value.

        trainable = False
        for layer in resnet50_model.layers:
          if layer.name == 'conv5_block1_out':
            trainable = True
          layer.trainable = trainable

        flatten = keras.layers.Flatten()(resnet50_model.output)
        dense1 = keras.layers.Dense(512, activation="relu")(flatten)
        dense1 = keras.layers.BatchNormalization()(dense1)
        dense2 = keras.layers.Dense(256, activation="relu")(dense1)
        dense2 = keras.layers.BatchNormalization()(dense2)
        output = keras.layers.Dense(256)(dense2)

        return tf.keras.Model(resnet50_model.input, output, name="siamese_resnet50_model")

    def train_step(self, data):
        anchor, positive, negative = data

        anchor_input = tf.keras.applications.resnet.preprocess_input(anchor)
        positive_input = tf.keras.applications.resnet.preprocess_input(positive)
        negative_input = tf.keras.applications.resnet.preprocess_input(negative)

        with tf.GradientTape() as tape:
            anchor_features = self._siamese_resnet50_network(anchor_input, training=True)
            positive_features = self._siamese_resnet50_network(positive_input, training=True)
            negative_features = self._siamese_resnet50_network(negative_input, training=True)
            loss = self._compute_loss(anchor_features, positive_features, negative_features)

        # List of variables to be included in backpropagation
        gradients = tape.gradient(loss, self._siamese_resnet50_network.trainable_weights)
        self.optimizer.apply_gradients(zip(gradients, self._siamese_resnet50_network.trainable_weights))

        # Let's update and return the training loss metric.
        self._loss_tracker.update_state(loss)
        return {"loss": self._loss_tracker.result()}

    def test_step(self, data):
        anchor, positive, negative = data
        loss = self._compute_loss(anchor, positive, negative)

        # Let's update and return the loss metric.
        self._loss_tracker.update_state(loss)
        return {"loss": self._loss_tracker.result()}

    # Triplet Loss: L(A, P, N) = max(‖f(A) - f(P)‖² - ‖f(A) - f(N)‖² + margin, 0)
    # In Distance Layer: ap_distance = ‖f(A) - f(P)‖², an_distance = ‖f(A) - f(N)‖²
    def _compute_loss(self, anchor_features, positive_features, negative_features):
        ap_distance = tf.reduce_sum(tf.square(anchor_features - positive_features), -1)
        an_distance = tf.reduce_sum(tf.square(anchor_features - negative_features), -1)

        # Computing the Triplet Loss by subtracting both distances and
        # making sure we don't get a negative value.
        loss = ap_distance - an_distance
        loss = tf.maximum(loss + self._margin, 0.0)
        return loss

    @property
    def metrics(self):
        # We need to list our metrics here so the `reset_states()` can be
        # called automatically.
        return [self._loss_tracker]

In [32]:
siamese_resnet50_model = SiameseResNet50Model(input_shape=experiment['INPUT_SHAPE'])
siamese_resnet50_model.summary()

Model: "siamese_resnet50_model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 200, 200, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 206, 206, 3)          0         ['input_2[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 100, 100, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 100, 100, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                         

In [33]:
siamese_resnet50_model.compile(optimizer=tf.keras.optimizers.Adam(0.0001))
siamese_resnet50_model.fit(train_dataset, epochs=experiment['EPOCHS'], validation_data=valid_dataset)

Epoch 1/5



Epoch 2/5



Epoch 3/5



Epoch 4/5



Epoch 5/5





<keras.src.callbacks.History at 0x7933e87bf190>

In [39]:
valid_dataset_batch = next(iter(valid_dataset))

anchor_embedding, positive_embedding, negative_embedding = siamese_resnet50_model(valid_dataset_batch)

We should expect the similarity between the anchor and positive images to be larger than the similarity between the anchor and the negative images.

In [40]:
cosine_similarity = tf.keras.metrics.CosineSimilarity()

positive_similarity = cosine_similarity(anchor_embedding, positive_embedding)
print("Positive similarity:", positive_similarity.numpy())

negative_similarity = cosine_similarity(anchor_embedding, negative_embedding)
print("Negative similarity", negative_similarity.numpy())

Positive similarity: 0.99880415
Negative similarity 0.99882925
