In [1]:
import os
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import MaxPooling2D
from sklearn.model_selection import train_test_split

data_directory = 'sketch_small'
TARGET_SIZE = (64, 64)
EPOCHS = 10
BATCH_SIZE = 32

In [2]:
def create_pairs(directory):
    #directory is the path of the dataset (main folder)
    #folders are classes in dataset
    pairs = []
    labels = []
    
    folders = os.listdir(directory)
    for folder_name in folders:
        folder_path = os.path.join(directory, folder_name)
        
        if os.path.isdir(folder_path):
            images = os.listdir(folder_path)
            
            folder_length = len(images)
            for i in range(folder_length):
                for j in range(folder_length):
                    if i != j:
                        image_path = os.path.join(folder_path, images[i])
                        pairs.append([image_path, os.path.join(folder_path, images[j])])
                        labels.append(1)#positive pairs
                        
                        dif_folder = random.choice([x for x in folders if x != folder_name])
                        dif_folder_path = os.path.join(directory, dif_folder)
                        dif_image_path = os.path.join(dif_folder_path, random.choice(os.listdir(dif_folder_path)))

                        pairs.append([image_path, dif_image_path])
                        labels.append(0)#negative pairs
                        
    return np.array(pairs), np.array(labels) 

In [3]:
def siamese_model(input_shape, embeddingDim = 48):
  inputs = Input(input_shape)
  x = Conv2D(128, (2, 2), padding = "same", activation = "relu")(inputs)
  x = MaxPooling2D(pool_size=(2, 2))(x)
  x = Dropout(0.4)(x)

  x = Conv2D(128, (2, 2), padding = "same", activation = "relu")(inputs)
  x = MaxPooling2D(pool_size=(2, 2))(x)
  x = Dropout(0.4)(x)


  pooling = GlobalAveragePooling2D()(x)
  outputs = Dense(embeddingDim)(pooling)
  model = Model(inputs, outputs)


  return model

In [4]:
def process_images(pairs, target_size):
    images = []
     
    for pair in pairs:
        img1 = load_img(pair[0], target_size=target_size, color_mode='grayscale')
        img2 = load_img(pair[1], target_size=target_size, color_mode='grayscale')

        images.append((img1, img2))
        
    return images

In [5]:
def contrastiveLoss(y, y_preds, margin=1):
    y = tf.cast(y, y_preds.dtype)
    y_preds_squared = K.square(y_preds)
    margin_squared = K.square(K.maximum(margin - y_preds, 0))
    loss = K.mean(y * y_preds_squared + (1 - y) * margin_squared)
    return loss

In [7]:
def create_pairs(images, labels):
  imagePairs = []
  labelPairs = []

  #Getting the indices of each class
  numclasses = len(np.unique(labels))
  idx = [np.where(labels ==i)[0] for i in range(numclasses)]


  for ind in range(len(images)):
    #Getting current image with index
    currImage = images[ind]
    #getting the label of the image from labels.
    label = labels[ind]


    #Randomly choosing another labels from the same class
    indB = np.random.choice(idx[label])
    #corresponding image for this randomly selected label
    indImage = images[indB]


    imagePairs.append([currImage, indImage])


    labelPairs.append([1])


    #Getting a label where label is different than the current image
    diss_idx = np.where(labels != label)[0]


    #finding an image for this label
    diss_image = images[np.random.choice(diss_idx)]


    imagePairs.append([currImage, diss_image])
    labelPairs.append([0])


  return (np.array(imagePairs), np.array(labelPairs))

In [8]:
def euclidean_distance(vecs):
  (imgA, imgB) = vecs
  ss = K.sum(K.square(imgA - imgB), axis = 1, keepdims=True)
  return K.sqrt(K.maximum(ss, K.epsilon()))

In [19]:
from tensorflow.keras.layers import Lambda
from tensorflow.keras.datasets import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [21]:
x_train.shape

(60000, 28, 28)

In [13]:
x_train = x_train/255.0
x_test = x_test/255.0

x_train = np.expand_dims(x_train, axis = -1)
x_test = np.expand_dims(x_test, axis=-1)


(training_pairs, training_labels) = create_pairs(x_train, y_train)
(test_pairs, test_labels) = create_pairs(x_test, y_test)

In [23]:
training_labels.shape

(120000, 1)

In [16]:
image_shape = (28, 28, 1)
# specify the batch size and number of epochs
batch_size = 64
epochs = 70

imageA = Input(shape = image_shape)
imageB = Input(shape = image_shape)


model_build = siamese_model(image_shape)
modelA = model_build(imageA)
modelB = model_build(imageB)


distance = Lambda(euclidean_distance)([modelA, modelB])
model = Model(inputs=[imageA, imageB], outputs=distance)




In [None]:
model.compile(loss = contrastiveLoss, optimizer="adam")
history = model.fit(
    [training_pairs[:, 0], training_pairs[:, 1]], training_labels[:],
    validation_data=([test_pairs[:, 0], test_pairs[:, 1]], test_labels[:]),
    batch_size = batch_size,
    epochs = epochs)