In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split

In [5]:
url = "https://github.com/brendenlake/omniglot/raw/master/python/images_background.zip"
path_to_zip = tf.keras.utils.get_file("images_background.zip", origin=url, extract=True)
path_to_zip

'C:\\Users\\lariyan.pouya\\.keras\\datasets\\images_background_extracted'

In [6]:
IMG_SIZE = 105  # Image size for resizing
BATCH_SIZE = 32
EPOCHS = 50

In [4]:
def load_image(image_path):
    if not os.path.isfile(image_path):
        print(f"Skipped: {image_path} is a directory.")
        return None  # Skip directories
    
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = image / 255.0  # Normalize to [0, 1]
    return image

def get_image_paths_and_labels(base_dir):
    image_paths = []
    labels = []
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith('.png') or file.endswith('.jpg'):
                image_paths.append(os.path.join(root, file))
                labels.append(root.split('/')[-2])  # Character as label
    return image_paths, labels

# Update the path to your dataset directory
base_dir = "/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/train"
image_paths, labels = get_image_paths_and_labels(base_dir)
print(f"Total images: {len(image_paths)}, Total labels: {len(set(labels))}")


Total images: 0, Total labels: 0


In [5]:
from sklearn.preprocessing import LabelEncoder

def prepare_data(image_paths, labels):
    images = []
    valid_paths = []
    for path in image_paths:
        img = load_image(path)
        if img is not None:
            images.append(img)
            valid_paths.append(path)
    return np.array(images), valid_paths

X, valid_paths = prepare_data(image_paths, labels)

label_encoder = LabelEncoder()
y = label_encoder.fit_transform([path.split('/')[-3] for path in valid_paths])
print(y)

I0000 00:00:1740881052.509260    8704 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-03-02 05:34:12.549197: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


[3 3 3 ... 0 0 0]


In [6]:
def create_pairs(images, labels):
    pairs = []
    pair_labels = []
    num_classes = len(np.unique(labels))
    
    # Create pairs
    for idx in range(len(images)):
        current_img = images[idx]
        current_label = labels[idx]
        
        # Positive pair
        pos_idx = np.random.choice(np.where(labels == current_label)[0])
        pos_img = images[pos_idx]
        pairs.append([current_img, pos_img])
        pair_labels.append(1)
        
        # Negative pair
        neg_label = (current_label + np.random.randint(1, num_classes)) % num_classes
        neg_idx = np.random.choice(np.where(labels == neg_label)[0])
        neg_img = images[neg_idx]
        pairs.append([current_img, neg_img])
        pair_labels.append(0)
    
    return np.array(pairs), np.array(pair_labels)

pairs, pair_labels = create_pairs(X, y)


In [7]:
def build_feature_extractor():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.GlobalAveragePooling2D()
    ])
    return model


In [8]:
def build_siamese_network():
    feature_extractor = build_feature_extractor()
    
    input_a = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    input_b = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    
    feat_a = feature_extractor(input_a)
    feat_b = feature_extractor(input_b)
    
    # Compute L1 distance between features
    l1_layer = layers.Lambda(lambda tensors: tf.abs(tensors[0] - tensors[1]))
    l1_distance = l1_layer([feat_a, feat_b])
    
    # Fully connected layer for similarity score
    output = layers.Dense(1, activation='sigmoid')(l1_distance)
    
    siamese_network = models.Model(inputs=[input_a, input_b], outputs=output)
    siamese_network.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return siamese_network


In [9]:
X_a = pairs[:, 0]
X_b = pairs[:, 1]

X_a = np.stack(X_a, axis=0)
X_b = np.stack(X_b, axis=0)

X_train_a, X_val_a, X_train_b, X_val_b, y_train, y_val = train_test_split(X_a, X_b, pair_labels, test_size=0.2, random_state=42)


In [10]:
siamese_network = build_siamese_network()

history = siamese_network.fit(
    [X_train_a, X_train_b], y_train,
    validation_data=([X_val_a, X_val_b], y_val),
    batch_size=BATCH_SIZE,
    epochs=EPOCHS
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50




[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 288ms/step - accuracy: 0.5029 - loss: 0.6915 - val_accuracy: 0.5550 - val_loss: 0.6685
Epoch 2/50
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 284ms/step - accuracy: 0.5794 - loss: 0.6658 - val_accuracy: 0.6282 - val_loss: 0.6498
Epoch 3/50
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 284ms/step - accuracy: 0.6282 - loss: 0.6387 - val_accuracy: 0.6239 - val_loss: 0.6268
Epoch 4/50
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 287ms/step - accuracy: 0.6320 - loss: 0.6351 - val_accuracy: 0.6853 - val_loss: 0.5955
Epoch 5/50
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 285ms/step - accuracy: 0.6530 - loss: 0.6077 - val_accuracy: 0.6832 - val_loss: 0.5973
Epoch 6/50
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 283ms/step - accuracy: 0.6878 - loss: 0.5887 - val_accuracy: 0.7177 - val_loss: 0.5800
Epoch 7/50
[1m116/11

In [11]:
loss, accuracy = siamese_network.evaluate([X_val_a, X_val_b], y_val)
print(f"Validation Loss: {loss}, Validation Accuracy: {accuracy}")


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 64ms/step - accuracy: 0.7996 - loss: 0.5377
Validation Loss: 0.5191612839698792, Validation Accuracy: 0.8049569129943848


In [12]:
siamese_network.save("siamese_cnn_model.h5")



In [13]:
######### inference ########


import tensorflow as tf


NUM_WAYS = 3     # 3 classes per episode
NUM_SHOTS = 2    # 2 examples per class for support set
NUM_QUERIES = 3  # 1 query example per class (you'll use your 3 new images)


# Load the trained model
siamese_net = siamese_network


# Manually selected paths for the support set (2 images per class, 3 classes)
support_image_paths = [
    # Class 0 images / Balinese
    "/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Balinese/character01/0108_01.png",
    "/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Balinese/character01/0108_02.png",

    # Class 1 images /  Bengali
    "/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Bengali/character08/0139_01.png",
    "/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Bengali/character08/0139_12.png",

    # Class 2 images / Blackfoot
    "/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Blackfoot_(Canadian_Aboriginal_Syllabics)/character04/0181_04.png",
    "/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Blackfoot_(Canadian_Aboriginal_Syllabics)/character06/0183_06.png"
]

# Corresponding labels for the support set
support_labels = [
    0, 0,  # Class 0
    1, 1,  # Class 1
    2, 2   # Class 2
]

In [14]:
def load_and_preprocess(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = image / 255.0  # Normalize to [0, 1]
    return image

def prepare_manual_support_set(support_image_paths, support_labels):
    support_images = [load_and_preprocess(img) for img in support_image_paths]
    support_images = np.stack(support_images)
    support_labels = np.array(support_labels)
    return support_images, support_labels

support_images, support_labels = prepare_manual_support_set(support_image_paths, support_labels)
print(f"Support Images Shape: {support_images.shape}, Labels Shape: {support_labels.shape}")


Support Images Shape: (6, 105, 105, 3), Labels Shape: (6,)


In [17]:
# Manually selected paths for the query set (3 new images to classify)
query_image_paths = [
    
    # Class 2 images / Blackfoot
   "/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Blackfoot_(Canadian_Aboriginal_Syllabics)/character04/0181_05.png",

   # Class 1 images / Bengali
   "/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Bengali/character08/0139_03.png",

   
   # Class 0 images / Balinese
   "/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Balinese/character01/0108_08.png"
   

]

def prepare_query_images(query_image_paths):
    print(query_image_paths)
    query_images = [load_and_preprocess(img) for img in query_image_paths]
    return np.stack(query_images)

query_images = prepare_query_images(query_image_paths)
print(f"Query Images Shape: {query_images.shape}")


['/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Blackfoot_(Canadian_Aboriginal_Syllabics)/character04/0181_05.png', '/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Bengali/character08/0139_03.png', '/home/amin/Desktop/fsl/datasets/omniglot_extracted/images_background/val/Balinese/character01/0108_08.png']
Query Images Shape: (3, 105, 105, 3)


In [16]:
def classify_images(support_images, support_labels, query_images, model):
    predictions = []
    for query_img in query_images:
        query_img = tf.expand_dims(query_img, axis=0)  # Expand dims for batch compatibility
        # Repeat query image for each support image
        tiled_query = np.tile(query_img, (support_images.shape[0], 1, 1, 1))
        # Predict similarity scores
        scores = model.predict([tiled_query, support_images])
        
        # Aggregate scores per class
        aggregated_scores = np.zeros(NUM_WAYS)
        for i in range(NUM_WAYS):
            aggregated_scores[i] = np.mean(scores[i * NUM_SHOTS:(i + 1) * NUM_SHOTS])
        
        predicted_class = np.argmax(aggregated_scores)
        predictions.append(predicted_class)
    return predictions

predictions = classify_images(support_images, support_labels, query_images, siamese_net)

for i, pred in enumerate(predictions):
    print(f"Image {i + 1}: Predicted Class -> {pred}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Image 1: Predicted Class -> 1
Image 2: Predicted Class -> 0
Image 3: Predicted Class -> 1
