In [2]:
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
import cv2
from tensorflow.keras.applications import VGG16

def load_image(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0
    return img

train_df = pd.read_csv("COMP90086_2023_TLLdataset/train.csv")
test_df = pd.read_csv('COMP90086_2023_TLLdataset/test_candidates.csv')


train_left_images = [load_image(f"COMP90086_2023_TLLdataset/train/left/{img_name}.jpg") for img_name in train_df['left']]
train_right_images = [load_image(f"COMP90086_2023_TLLdataset/train/right/{img_name}.jpg") for img_name in train_df['right']]






In [7]:
## This prediction only use cosine similarity
from numpy import dot
from numpy.linalg import norm

def cosine_similarity(a, b):
    cos_sim = dot(a, b) / (norm(a) * norm(b))
    return cos_sim
import cv2
import numpy as np


# Flatten and normalize images
def preprocess_image(img):
    return img.flatten() / 255.0

# Compute cosine similarity for each pair in the test set
similarities = []

for _, row in test_df.iterrows():
    left_img = preprocess_image(load_image(f"COMP90086_2023_TLLdataset/test/left/{row['left']}.jpg"))
    confidences = []

    for i in range(20):  # Assuming there are 20 candidates for each "left" image
        right_img = preprocess_image(load_image(f"COMP90086_2023_TLLdataset/test/right/{row[f'c{i}']}.jpg"))
        confidences.append(cosine_similarity(left_img, right_img))
    similarities.append(confidences)

def top_2_accuracy_per_row(distances):
    # Convert list of lists to a numpy array for easier indexing
    distances_array = np.array(distances)

    # Get indices of top 2 confidences for each row
    top_2_indices = np.argsort(distances_array, axis=1)[:, -2:]
    
    # Check if the true match (index 0) is within the top 2 for each row
    matches = np.any(top_2_indices == 0, axis=1)
    
    return matches

matches = top_2_accuracy_per_row(similarities)
overall_top_2_accuracy = np.mean(matches)
print(f"Overall Top-2 Accuracy: {overall_top_2_accuracy * 100:.2f}%")

def top_1_accuracy_per_row(distances):
    # For each row, check if the first value (correct pairing) is the maximum
    matches = [1 if row[0] == max(row) else 0 for row in distances]
    return matches

matches = top_1_accuracy_per_row(similarities)
overall_top_1_accuracy = np.mean(matches)
print(f"Overall Top-1 Accuracy: {overall_top_1_accuracy * 100:.2f}%")

def top_5_accuracy_per_row(distances):
    # For each row, check if the first value (correct pairing) is among the top 5
    matches = [1 if row[0] in sorted(row, reverse=True)[:5] else 0 for row in distances]
    return matches

matches = top_5_accuracy_per_row(similarities)
overall_top_5_accuracy = np.mean(matches)
print(f"Overall Top-5 Accuracy: {overall_top_5_accuracy * 100:.2f}%")

Overall Top-2 Accuracy: 9.35%
Overall Top-1 Accuracy: 4.60%
Overall Top-5 Accuracy: 24.25%


In [5]:
# Flatten and normalize images
def preprocess_image(img):
    return img.flatten() / 255.0
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Compute L2 distance for each pair in the test set
distances = []
def l2_distance(a, b):
    return np.linalg.norm(a - b)

for _, row in test_df.iterrows():
    left_img = preprocess_image(load_image(f"COMP90086_2023_TLLdataset/test/left/{row['left']}.jpg"))
    confidences = []

    for i in range(20):  # Assuming there are 20 candidates for each "left" image
        right_img = preprocess_image(load_image(f"COMP90086_2023_TLLdataset/test/right/{row[f'c{i}']}.jpg"))
    # Apply a negative scale factor, e.g., -0.01, inside the sigmoid
        distance = l2_distance(left_img, right_img)
        scaled_similarity = sigmoid(-0.01 * distance)
        confidences.append(scaled_similarity)
    distances.append(confidences)


def top_2_accuracy_per_row(distances):
    # Convert list of lists to a numpy array for easier indexing
    distances_array = np.array(distances)

    # Get indices of top 2 confidences for each row
    top_2_indices = np.argsort(distances_array, axis=1)[:, -2:]
    
    # Check if the true match (index 0) is within the top 2 for each row
    matches = np.any(top_2_indices == 0, axis=1)
    
    return matches

matches = top_2_accuracy_per_row(distances)
overall_top_2_accuracy = np.mean(matches)
print(f"Overall Top-2 Accuracy: {overall_top_2_accuracy * 100:.2f}%")

def top_1_accuracy_per_row(distances):
    # For each row, check if the first value (correct pairing) is the maximum
    matches = [1 if row[0] == max(row) else 0 for row in distances]
    return matches

matches = top_1_accuracy_per_row(distances)
overall_top_1_accuracy = np.mean(matches)
print(f"Overall Top-1 Accuracy: {overall_top_1_accuracy * 100:.2f}%")

def top_5_accuracy_per_row(distances):
    # For each row, check if the first value (correct pairing) is among the top 5
    matches = [1 if row[0] in sorted(row, reverse=True)[:5] else 0 for row in distances]
    return matches

matches = top_5_accuracy_per_row(distances)
overall_top_5_accuracy = np.mean(matches)
print(f"Overall Top-5 Accuracy: {overall_top_5_accuracy * 100:.2f}%")


Overall Top-2 Accuracy: 9.90%
Overall Top-1 Accuracy: 4.50%
Overall Top-5 Accuracy: 24.65%


In [2]:

def create_pairs(left_images, right_images):
    pairs = []
    labels = []

    for i in range(len(left_images)):
        # Positive pairing
        pairs.append([left_images[i], right_images[i]])
        labels.append(1)

        # Negative pairing
        random_idx = np.random.randint(0, len(left_images))
        while random_idx == i:  # Ensure it's not the same image
            random_idx = np.random.randint(0, len(left_images))
            
        pairs.append([left_images[i], right_images[random_idx]])
        labels.append(0)

    return np.array(pairs), np.array(labels)

pairs, labels = create_pairs(train_left_images, train_right_images)

# Splitting pairs for training
train_pairs = [pairs[:, 0], pairs[:, 1]]




In [6]:
def cosine_similarity(vectors):
    x, y = vectors
    x = tf.keras.backend.l2_normalize(x, axis=-1)
    y = tf.keras.backend.l2_normalize(y, axis=-1)
    return tf.keras.backend.sum(x * y, axis=-1, keepdims=True)
def siamese_with_cosine_similarity(input_shape):
    left_input = layers.Input(input_shape)
    right_input = layers.Input(input_shape)

    base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers:
        layer.trainable = False

    encoded_left = base_model(left_input)
    encoded_right = base_model(right_input)

    # Flatten the encoded outputs (or you can add more layers if needed)
    encoded_left = layers.Flatten()(encoded_left)
    encoded_right = layers.Flatten()(encoded_right)

    # Use cosine similarity to compute the similarity score
    similarity_score = layers.Lambda(cosine_similarity)([encoded_left, encoded_right])

    # If you want a classification layer (e.g., for binary classification)
   
    siamese_net = Model(inputs=[left_input, right_input], outputs=similarity_score)
    return siamese_net

input_shape = (245, 200, 3)
model_vgg16 = siamese_with_cosine_similarity(input_shape)
model_vgg16.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_vgg16.fit(train_pairs, labels, epochs=5, batch_size=16, validation_split=0.1)


TypeError: ('Keyword argument not understood:', 'inputs')

In [None]:

def predict_similarity1(left_img, right_img):
    return model_vgg16.predict([np.array([left_img]), np.array([right_img])])[0][0]

predictions = []

# Iterate through each row in the test dataframe
for _, row in test_df.iterrows():
    left_img = load_image(f"COMP90086_2023_TLLdataset/test/left/{row['left']}.jpg")
    confidences = []

    # Predict the similarity score for each candidate "right" image
    for i in range(20): # Assuming there are 20 candidates for each "left" image
        right_img = load_image(f"COMP90086_2023_TLLdataset/test/right/{row[f'c{i}']}.jpg")
        confidences.append(predict_similarity1(left_img, right_img))
        print(confidences)
    predictions.append(confidences)

# Create a DataFrame to store the predictions
submission_df = pd.DataFrame(predictions, columns=[f'c{i}' for i in range(20)])

submission_df['left'] = test_df['left']

# Rearrange columns to put 'left' at the beginning
cols = ['left'] + [col for col in submission_df if col != 'left']
submission_df = submission_df[cols]
# Write the DataFrame to a CSV file
submission_df.to_csv('kaggle_submission_random_1.csv', index=False)



In [23]:
def l2_distance_tensors(vectors):
    x, y = vectors
    return tf.sqrt(tf.reduce_sum(tf.square(x - y), axis=1, keepdims=True))
def siamese_with_l2_distance(input_shape):
    left_input = layers.Input(input_shape)
    right_input = layers.Input(input_shape)

    base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers:
        layer.trainable = False

    encoded_left = base_model(left_input)
    encoded_right = base_model(right_input)

    # Flatten the encoded outputs (or you can add more layers if needed)
    encoded_left = layers.Flatten()(encoded_left)
    encoded_right = layers.Flatten()(encoded_right)

    # Use L2 distance to compute the similarity score
    distance = layers.Lambda(l2_distance_tensors)([encoded_left, encoded_right])

    # Apply sigmoid activation to the L2 distance
    prediction = layers.Dense(1, activation='sigmoid')(distance)

    siamese_net = Model(inputs=[left_input, right_input], outputs=prediction)
    return siamese_net

input_shape = (245, 200, 3)
model_vgg16_l2 = siamese_with_l2_distance(input_shape)
model_vgg16_l2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_vgg16_l2.fit(train_pairs, labels, epochs=5, batch_size=16, validation_split=0.1)


Epoch 1/5

KeyboardInterrupt: 