In [1]:
import pickle
import os
import numpy as np
import pandas as pd
import random

In [2]:
def load_pkl(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

# Process data

In [3]:
from components.DataProcessor import DataProcessor


file_path = 'COMP90086_2023_TLLdataset/train.csv'
test_file_path = 'COMP90086_2023_TLLdataset/test_candidates.csv'
save_path = 'dictionaries'
dataset_base_path = 'COMP90086_2023_TLLdataset'

processor = DataProcessor(file_path, test_file_path, save_path, dataset_base_path)
processor.load_data()
processor.split_data()
processor.generate_dictionaries()
processor.save_dictionaries()
processor.process_test_candidates()


Dictionaries saved to 'dictionaries/train_dict.pkl' and 'dictionaries/val_dict.pkl'
Dictionaries saved to dictionaries/test_dict.pkl


In [4]:
train_data = load_pkl('dictionaries/train_dict.pkl')
val_data = load_pkl('dictionaries/val_dict.pkl')
test_data = load_pkl('dictionaries/test_dict.pkl')

print('train data size: ', len(train_data))
print('val data size: ', len(val_data))
print('test data size: ', len(test_data))

train data size:  1600
val data size:  400
test data size:  2000


In [5]:
# print('train data sample: ', val_data)

# Extract features

In [6]:
from components.FeatureExtractor import FeatureExtractor

# define the output dir & models you want to train here
base_output_dir = "feat"
models_to_train = ["resnet50", "resnet101", "resnet152", "densenet201", "vgg16", "mobilenet"]

train_data = load_pkl('dictionaries/train_dict.pkl')
val_data = load_pkl('dictionaries/val_dict.pkl')
test_data = load_pkl('dictionaries/test_dict.pkl')

# for model_name in models_to_train:
#     extractor = FeatureExtractor(model_name, base_output_dir)
#     for set_name, data_dict in [("train_data", train_data), ("val_data", val_data)]:
#         extractor.extract_and_save_features(data_dict, set_name)

# model_name = "resnet50"
# extractor = FeatureExtractor(model_name, base_output_dir)
# extractor.extract_and_save_features_for_test(test_data, "test_data")

In [None]:
from components.VectorComparator import VectorComparator



# Create anchor, positive and negative examples

In [7]:
import os
import pickle
import numpy as np

def load_saved_features(model_name, set_name, side, base_dir="feat"):
    file_path = os.path.join(base_dir, model_name, set_name, f"{set_name}_{side}_features.pkl")
    with open(file_path, 'rb') as f:
        features = pickle.load(f)
    return features

def generate_negative_samples(loaded_features, num_negative_samples):
    all_features = np.concatenate(list(loaded_features.values()))
    negative_samples = []
    for _ in range(num_negative_samples):
        random_idx = np.random.choice(len(all_features))
        negative_samples.append(all_features[random_idx])
    return np.array(negative_samples)

def prepare_data(model_name, set_name, base_dir="feat"):
    # Load anchor and positive samples
    anchor_features = load_saved_features(model_name, set_name, "left", base_dir)
    positive_features = load_saved_features(model_name, set_name, "right", base_dir)

    # Generate negative samples
    num_negative_samples = len(anchor_features)
    negative_features = generate_negative_samples(anchor_features, num_negative_samples)

    # Prepare data
    anchors = np.concatenate(list(anchor_features.values()))
    positives = np.concatenate(list(positive_features.values()))
    negatives = negative_features

    return anchors, positives, negatives

base_dir = "feat"
model_name = "resnet50"  

# Prepare training data
train_set_name = "train_data"
train_anchors, train_positives, train_negatives = prepare_data(model_name, train_set_name, base_dir)

# Prepare validation data
val_set_name = "val_data"
val_anchors, val_positives, val_negatives = prepare_data(model_name, val_set_name, base_dir)


# Get predictions

In [14]:
test_left = load_saved_features("resnet50", "test_data", "left", base_dir)
test_right = load_saved_features("resnet50", "test_data", "right", base_dir)

In [15]:
import csv

# Create a dictionary to store similarity scores
similarity_scores = {}

# Iterate through the rows of the CSV file
with open('COMP90086_2023_TLLdataset/test_candidates.csv', 'r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        left_image = row['left']
        right_images = [row[f'c{i}'] for i in range(20)]  # Include only 20 right images
        
        scores = []  # List to store similarity scores for the current left image
        
        # Iterate through the right images
        for right_image in right_images:
            left_image_path = os.path.join('COMP90086_2023_TLLdataset/test/left', left_image)
            right_image_path = os.path.join('COMP90086_2023_TLLdataset/test/right', right_image)
            
            left_features = test_left[left_image_path + '.jpg']
            right_features = test_right[right_image_path + '.jpg']
            
            # Compute the similarity between the left and right images
            similarity = image_similarity_model.compute_similarity(left_features, right_features)
            print(similarity)
            scores.append(similarity)
        
        # Add the scores to the dictionary with the left image as the key
        similarity_scores[left_image] = scores

# Write the similarity scores to a new CSV file
with open('similarity_scores.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Write the header row
    header_row = ['left'] + [f'c{i}' for i in range(20)]  # Include only 20 columns
    writer.writerow(header_row)

    # Write the similarity scores for each left image, limiting to 20 columns
    for left_img, scores in similarity_scores.items():
        writer.writerow([left_img] + scores)


0.45152953
0.35092297
0.5570038
0.40399304
0.33299038
0.42943525
0.39064223
0.5064738
0.4621124
0.46725237
0.30685183
0.47569865
0.55192584
0.5241328
0.5647114
0.44035682
0.4171899
0.306356
0.51900816
0.35254118
0.35365087
0.32227093
0.33093914
0.39458174
0.36569712
0.303411
0.29435945
0.39059713
0.32160866
0.4075467
0.289143
0.3803281
0.39550894
0.32911608
0.3440176
0.32836196
0.38865238
0.42117038
0.2962748
0.37149388
0.27685705
0.23628703
0.2790021
0.33828124
0.35735935
0.38929358
0.29658866
0.39179996
0.3021765
0.34899315
0.3183214
0.28181475
0.27071422
0.3602051
0.45443186
0.34699178
0.2816672
0.36576197
0.39831004
0.37655136
0.5493882
0.36268717
0.3683857
0.3323535
0.3723268
0.5136255
0.4363582
0.5273702
0.281476
0.44429147
0.34989566
0.36229005
0.4809027
0.31539273
0.4122915
0.37707826
0.42775482
0.3503406
0.43444017
0.42638445
0.53846776
0.54818135
0.50695086
0.56846464
0.6290516
0.5601408
0.22854973
0.44534004
0.32288146
0.35331875
0.64106005
0.3461853
0.5692627
0.5144149
0.47