In [1]:
import random 
import numpy as np
import tensorflow as tf
import pandas as pd
import cv2
import os
import tensorflow_addons as tfa

from matplotlib import pyplot as plt
from tensorflow import keras
from pathlib import Path
from sklearn.metrics import f1_score

from typing import List

In [2]:
epochs = 10
margin = 1

In [3]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

# Dataset

In [4]:
df = pd.read_csv("../data/prepared_train_with_missed.csv")
df = df[df["image_url1"].notnull() & df["image_url2"].notnull()]
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 90412 entries, 0 to 90637
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   image_url1  90412 non-null  object
 1   image_url2  90412 non-null  object
 2   is_same     90412 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 2.8+ MB


In [5]:
downloaded_files = os.listdir(r"E:\datasets\csc-2023-lun\train")

In [6]:
all_files = set(df["image_url1"]).union(set(df["image_url2"]))

In [7]:
missed = all_files.difference(downloaded_files)
len(missed)

440

In [8]:
for miss in missed:
    df.drop(df[df["image_url1"] == miss].index.values, inplace=True)
    df.drop(df[df["image_url2"] == miss].index.values, inplace=True)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 89925 entries, 0 to 90637
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   image_url1  89925 non-null  object
 1   image_url2  89925 non-null  object
 2   is_same     89925 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 2.7+ MB


In [10]:
class DataLoader(keras.utils.Sequence):
    IMG_W = 128
    IMG_H = 128
    
    def __init__(self, df:pd.DataFrame, images_folder:Path, batch_size:int=32):
        self.df = df
        self.images_folder = Path(images_folder)
        self.batch_size = batch_size
    
    def get_single_record(self, index):
        img_path_1 = self.images_folder / df.iloc[index, df.columns.get_loc("image_url1")]
        img_path_2 = self.images_folder / df.iloc[index, df.columns.get_loc("image_url2")]
        
        image_1 = cv2.imread(str(img_path_1))
        image_2 = cv2.imread(str(img_path_2))
        assert image_1 is not None and image_2 is not None
        
        image_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2RGB)
        image_2 = cv2.cvtColor(image_2, cv2.COLOR_BGR2RGB)
        
        image_1 = cv2.resize(image_1, (self.IMG_W, self.IMG_H)).astype(np.float32)
        image_2 = cv2.resize(image_2, (self.IMG_W, self.IMG_H)).astype(np.float32)
        
        image_1 /= 255.
        image_2 /= 255.
        
        target = df.iloc[index, df.columns.get_loc("is_same")]
        
        return tf.convert_to_tensor(image_1), tf.convert_to_tensor(image_2), target
    
    def __len__(self):
        return len(self.df) // self.batch_size
    
    def __getitem__(self, index):
        x_batch_1 = []
        x_batch_2 = []
        labels = []
        
        for i in range(index*batch_size, (index+1)*batch_size):
            img_1, img_2, label = self.get_single_record(i)
            x_batch_1.append(img_1)
            x_batch_2.append(img_2)
            labels.append(label)
        
        return [tf.convert_to_tensor(x_batch_1, dtype=float), tf.convert_to_tensor(x_batch_2, dtype=float)], \
                tf.convert_to_tensor(labels, dtype=float)
        
    def on_epoch_end(self):
        self.df = self.df.sample(frac=1).reset_index(drop=True)


In [11]:
df.shape

(89925, 3)

In [12]:
train_df = df.iloc[:80000]
valid_df = df.iloc[80000:]

train_loader = DataLoader(train_df, r"E:\datasets\csc-2023-lun\train", batch_size=16)
valid_loader = DataLoader(valid_df, r"E:\datasets\csc-2023-lun\train", batch_size=16)

In [13]:
sum(train_df["is_same"] == 1)

25603

In [14]:
sum(train_df["is_same"] == 0)

54397

# Model

In [15]:
def euclidean_distance(vects:List):
    v1, v2 = vects
    
    sum_squared = tf.math.reduce_sum(tf.math.square(v1 - v2), axis=1, keepdims=True)
    return tf.math.sqrt(tf.math.maximum(sum_squared, tf.keras.backend.epsilon()))

In [16]:
euclidean_distance((
    tf.constant([[1, 2, 3]], dtype=float),
    tf.constant([[4, 5, 6]], dtype=float)
))

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[5.196152]], dtype=float32)>

In [17]:
def build_embedding_model(input_shape, embeddings_dim):
    inp = keras.layers.Input(input_shape)
    
    x = keras.layers.Conv2D(filters=5, kernel_size=5, activation="tanh")(inp)
    x = keras.layers.AveragePooling2D(pool_size=2)(x)
    
    x = keras.layers.Conv2D(filters=16, kernel_size=5, activation="tanh")(x)
    x = keras.layers.AveragePooling2D(pool_size=2)(x)
    
    x = keras.layers.Conv2D(filters=32, kernel_size=5, activation="tanh")(x)
    x = keras.layers.AveragePooling2D(pool_size=2)(x)
    
    x = keras.layers.Flatten()(x)
    x = keras.layers.BatchNormalization()(x)
    embeddings = keras.layers.Dense(embeddings_dim, activation="tanh")(x)
    
    embedding_net = keras.models.Model(inp, embeddings, name="embeddings_backbone")
    return embedding_net

def build_model(input_shape, embedding_dim=128):
    embedding_net = build_embedding_model(input_shape, embedding_dim)
    
    input_1 = keras.layers.Input(input_shape, name="input_1")
    input_2 = keras.layers.Input(input_shape, name="input_2")

    tower_1 = embedding_net(input_1)
    tower_2 = embedding_net(input_2)
    
    merge_layer = keras.layers.Lambda(euclidean_distance)([tower_1, tower_2])
    bn_layer = keras.layers.Normalization()(merge_layer)
    out = keras.layers.Dense(1, activation="sigmoid")(bn_layer)
    
    siamese = keras.models.Model(inputs=[input_1, input_2], outputs=out)
    return siamese

In [18]:
embeddings_model = build_embedding_model((128, 128, 3), 512)
embeddings_model.summary()

Model: "embeddings_backbone"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 128, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 124, 124, 5)       380       
                                                                 
 average_pooling2d (AverageP  (None, 62, 62, 5)        0         
 ooling2D)                                                       
                                                                 
 conv2d_1 (Conv2D)           (None, 58, 58, 16)        2016      
                                                                 
 average_pooling2d_1 (Averag  (None, 29, 29, 16)       0         
 ePooling2D)                                                     
                                                                 
 conv2d_2 (Conv2D)           (None, 25, 25, 32)

In [19]:
class Loss(keras.losses.Loss):
    def __init__(self, margin:int=1):
        super().__init__()
        self.margin = margin
        
    def call(self, y_true, y_pred):
        pred_squared = tf.math.square(y_pred)
        margin_squared = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
        return tf.math.reduce_mean(
            (1 - y_true) * pred_squared + (y_true) * margin_squared
        )


In [20]:
model = build_model((128, 128, 3), 512)
model.compile(
    loss=Loss(margin=margin),
    optimizer="Adam",
    metrics=[keras.metrics.Accuracy()]
)

In [21]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 128, 128, 3  0           []                               
                                )]                                                                
                                                                                                  
 input_2 (InputLayer)           [(None, 128, 128, 3  0           []                               
                                )]                                                                
                                                                                                  
 embeddings_backbone (Functiona  (None, 512)         2393468     ['input_1[0][0]',                
 l)                                                               'input_2[0][0]']            

In [22]:
checkpoint_callback = keras.callbacks.ModelCheckpoint("models/checkpoints/simple-siam-checkpoint-best.h5", save_best_only=True)

In [None]:
history = model.fit(
    train_loader,
    validation_data=valid_loader,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[checkpoint_callback]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10

In [None]:
tf.config.list_physical_devices()

In [None]:
model.save("models/simple-siam-v1")

In [None]:
def plt_metric(history, metric, title, has_valid=True):
    try:
        history = history.history
    except:
        pass
    
    plt.plot(history[metric])
    if has_valid:
        plt.plot(history["val_" + metric])
        plt.legend(["train", "validation"], loc="upper left")
    plt.title(title)
    plt.ylabel(metric)
    plt.xlabel("epoch")
    plt.show()

plt_metric(history=history, metric="loss", title="Contrastive Loss")
