In [4]:

import tensorflow as tf
from tensorflow.keras import layers, Model
import cv2
import pandas as pd
import numpy as np

def load_image(img_path, target_size=(224, 224)):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, target_size)  # Resize the image
    img = img.astype(np.float32) / 255.0
    return img
train_df = pd.read_csv("COMP90086_2023_TLLdataset/train.csv")
test_df = pd.read_csv('COMP90086_2023_TLLdataset/test_candidates.csv')
train_left_images = [load_image(f"COMP90086_2023_TLLdataset/train/left/{img_name}.jpg") for img_name in train_df['left']]
train_right_images = [load_image(f"COMP90086_2023_TLLdataset/train/right/{img_name}.jpg") for img_name in train_df['right']]


def create_pairs(left_images, right_images):
    pairs = []
    labels = []

    for i in range(len(left_images)):
        # Positive pairing
        pairs.append([left_images[i], right_images[i]])
        labels.append(1)

        # Negative pairing
        random_idx = np.random.randint(0, len(left_images))
        while random_idx == i:  # Ensure it's not the same image
            random_idx = np.random.randint(0, len(left_images))
            
        pairs.append([left_images[i], right_images[random_idx]])
        labels.append(0)

    return np.array(pairs), np.array(labels)

pairs, labels = create_pairs(train_left_images, train_right_images)

# Splitting pairs for training
train_pairs = [pairs[:, 0], pairs[:, 1]]









In [6]:
import tensorflow as tf
from tensorflow.keras import layers, Model

def cosine_similarity_tensors(vectors):
    x, y = vectors
    x = tf.keras.backend.l2_normalize(x, axis=-1)
    y = tf.keras.backend.l2_normalize(y, axis=-1)
    return tf.keras.backend.sum(x * y, axis=-1, keepdims=True)

def siamese_with_dense121_and_cosine_similarity(input_shape):
    left_input = layers.Input(input_shape)
    right_input = layers.Input(input_shape)

    # Use DenseNet121 as the base model
    base_model = tf.keras.applications.DenseNet121(weights=None, include_top=False, input_shape=input_shape)
    base_model.load_weights('/Users/yangyusong/Desktop/CV final project/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5')
    for layer in base_model.layers:
        layer.trainable = False

    encoded_left = base_model(left_input)
    encoded_right = base_model(right_input)

    # Flatten the encoded outputs (or you can add more layers if needed)
    encoded_left = layers.Flatten()(encoded_left)
    encoded_right = layers.Flatten()(encoded_right)

    # Use cosine similarity to compute the similarity score
    similarity_score = layers.Lambda(cosine_similarity_tensors)([encoded_left, encoded_right])

    siamese_net = Model(inputs=[left_input, right_input], outputs=similarity_score)
    return siamese_net

input_shape = (224, 224, 3)  # Default input shape for DenseNet121
model_dense121 = siamese_with_dense121_and_cosine_similarity(input_shape)
model_dense121.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_dense121.fit(train_pairs, labels, epochs=10, batch_size=32, validation_split=0.1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
  1/113 [..............................] - ETA: 2:08 - loss: 0.6580 - accuracy: 0.5938

KeyboardInterrupt: 

In [8]:

def predict_similarity1(left_img, right_img):
    return model_dense121.predict([np.array([left_img]), np.array([right_img])])[0][0]

predictions = []

# Iterate through each row in the test dataframe
for _, row in test_df.iterrows():
    left_img = load_image(f"COMP90086_2023_TLLdataset/test/left/{row['left']}.jpg")
    confidences = []

    # Predict the similarity score for each candidate "right" image
    for i in range(20): # Assuming there are 20 candidates for each "left" image
        right_img = load_image(f"COMP90086_2023_TLLdataset/test/right/{row[f'c{i}']}.jpg")
        confidences.append(predict_similarity1(left_img, right_img))
    predictions.append(confidences)

# Create a DataFrame to store the predictions
submission_df = pd.DataFrame(predictions, columns=[f'c{i}' for i in range(20)])

submission_df['left'] = test_df['left']

# Rearrange columns to put 'left' at the beginning
cols = ['left'] + [col for col in submission_df if col != 'left']
submission_df = submission_df[cols]
# Write the DataFrame to a CSV file
submission_df.to_csv('kaggle_submission_cos_den121.csv', index=False)



