In [20]:
import os
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import MaxPooling2D

from tensorflow.keras import layers, models, Input

from PIL import Image, ImageFont, ImageDraw
import random
from keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split

data_directory = 'sketch_small'
TARGET_SIZE = (64, 64, 1)
EPOCHS = 10
BATCH_SIZE = 32

In [12]:
def create_pairs(directory):
    #directory is the path of the dataset (main folder)
    #folders are classes in dataset
    pairs = []
    labels = []
    
    folders = os.listdir(directory)
    for folder_name in folders:
        folder_path = os.path.join(directory, folder_name)
        
        if os.path.isdir(folder_path):
            images = os.listdir(folder_path)
            
            folder_length = len(images)
            for i in range(folder_length):
                for j in range(folder_length):
                    if i != j:
                        image_path = os.path.join(folder_path, images[i])
                        pairs.append([image_path, os.path.join(folder_path, images[j])])
                        labels.append(1)#positive pairs
                        
                        dif_folder = random.choice([x for x in folders if x != folder_name])
                        dif_folder_path = os.path.join(directory, dif_folder)
                        dif_image_path = os.path.join(dif_folder_path, random.choice(os.listdir(dif_folder_path)))

                        pairs.append([image_path, dif_image_path])
                        labels.append(0)#negative pairs
                        
    return np.array(pairs), np.array(labels) 

In [21]:
def process_images(pairs, target_size):
    images = []
     
    for pair in pairs:
        img1 = load_img(pair[0], target_size=target_size, color_mode='grayscale')
        img2 = load_img(pair[1], target_size=target_size, color_mode='grayscale')

        images.append((img1, img2))
        
    return images

In [22]:
def euclidean_distance(vecs):
    (imgA, imgB) = vecs
    ss = K.sum(K.square(imgA - imgB), axis = 1, keepdims=True)
    return K.sqrt(K.maximum(ss, K.epsilon()))

In [23]:
def siamese_model(input_shape, embeddingDim = 48):
    inputs = Input(input_shape)
    x = Conv2D(128, (2, 2), padding = "same", activation = "relu")(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.4)(x)

    x = Conv2D(128, (2, 2), padding = "same", activation = "relu")(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.4)(x)


    pooling = GlobalAveragePooling2D()(x)
    outputs = Dense(embeddingDim)(pooling)
    model = Model(inputs, outputs)


    return model

In [24]:
def contrastiveLoss(y, y_preds, margin=1):
    y = tf.cast(y, y_preds.dtype)
    y_preds_squared = K.square(y_preds)
    margin_squared = K.square(K.maximum(margin - y_preds, 0))
    loss = K.mean(y * y_preds_squared + (1 - y) * margin_squared)
    return loss

In [25]:
X, y = create_pairs(data_directory)
X = process_images(X, TARGET_SIZE)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [18]:
X_train, X_test, y_train, y_test = np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)

In [19]:
X_train.shape

(17850, 2, 64, 64, 3)