In [24]:
from tensorflow.keras import layers, models, Input

import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageFont, ImageDraw
import random
from keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split

data_directory = 'sketch_small'
TARGET_SIZE = (128, 128, 1)
EPOCHS = 10
BATCH_SIZE = 32

In [2]:
def create_pairs(directory):
    #directory is the path of the dataset (main folder)
    #folders are classes in dataset
    pairs = []
    labels = []
    
    folders = os.listdir(directory)
    for folder_name in folders:
        folder_path = os.path.join(directory, folder_name)
        
        if os.path.isdir(folder_path):
            images = os.listdir(folder_path)
            
            folder_length = len(images)
            for i in range(folder_length):
                for j in range(folder_length):
                    if i != j:
                        image_path = os.path.join(folder_path, images[i])
                        pairs.append([image_path, os.path.join(folder_path, images[j])])
                        labels.append(1)#positive pairs
                        
                        dif_folder = random.choice([x for x in folders if x != folder_name])
                        dif_folder_path = os.path.join(directory, dif_folder)
                        dif_image_path = os.path.join(dif_folder_path, random.choice(os.listdir(dif_folder_path)))

                        pairs.append([image_path, dif_image_path])
                        labels.append(0)#negative pairs
                        
    return np.array(pairs), np.array(labels) 

In [16]:
def process_images(pairs, target_size):
    images = []
     
    for pair in pairs:
      
        img1 = np.array(load_img(pair[0], target_size=target_size))
        img2 = np.array(load_img(pair[1], target_size=target_size))

        images.append((img1, img2))
        
    return images

In [44]:
def prepare_data(directory):
    X, y = create_pairs(directory)
    X = process_images(X, TARGET_SIZE)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = prepare_data(data_directory)  

In [None]:
def create_siamese(input_shape=TARGET_SIZE):  # Assuming RGB images with height and width of 128 pixels
    # Define the base CNN architecture
    base_model = models.Sequential()
    base_model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    base_model.add(layers.MaxPooling2D((2, 2)))
    base_model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    base_model.add(layers.MaxPooling2D((2, 2)))
    base_model.add(layers.Flatten())
    base_model.add(layers.Dense(128, activation='relu'))

    # Define the left input and right input
    left_input = layers.Input(shape=input_shape)
    right_input = layers.Input(shape=input_shape)

    # Encode each input using the base CNN
    encoded_left = base_model(left_input)
    encoded_right = base_model(right_input)

    # Compute the L1 distance between the encoded outputs
    l1_distance = layers.Lambda(lambda tensors: abs(tensors[0] - tensors[1]))([encoded_left, encoded_right])

    # Add a dense layer with sigmoid activation to output similarity score
    prediction = layers.Dense(1, activation='sigmoid')(l1_distance)

    # Connect the inputs with the prediction
    siamese_model = models.Model(inputs=[left_input, right_input], outputs=prediction)

    return siamese_model


In [16]:
siamese_model = create_siamese()
siamese_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(


In [20]:
print(X_train)

[<generator object process_images.<locals>.<genexpr> at 0x000001D7621391C0>, <generator object process_images.<locals>.<genexpr> at 0x000001D762201850>, <generator object process_images.<locals>.<genexpr> at 0x000001D762152730>, <generator object process_images.<locals>.<genexpr> at 0x000001D762280740>, <generator object process_images.<locals>.<genexpr> at 0x000001D7622800B0>, <generator object process_images.<locals>.<genexpr> at 0x000001D76069AC00>, <generator object process_images.<locals>.<genexpr> at 0x000001D7601CA420>, <generator object process_images.<locals>.<genexpr> at 0x000001D760567990>, <generator object process_images.<locals>.<genexpr> at 0x000001D760EDE570>, <generator object process_images.<locals>.<genexpr> at 0x000001D7620BD070>, <generator object process_images.<locals>.<genexpr> at 0x000001D760D2CB30>, <generator object process_images.<locals>.<genexpr> at 0x000001D760296340>, <generator object process_images.<locals>.<genexpr> at 0x000001D760CE5F50>, <generator 

In [None]:
siamese_model.fit(X_train, y_train,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS)
#TODO add validation data

In [None]:
img