In [1]:
import os
import shutil
import numpy as np
from tools.score_calculation import score_calculation

In [2]:
class ARGS:
    folder1 = ""
    folder2 = ""
    model_type = "clip"
    model_id_clip = "openai/clip-vit-base-patch32"
    model_id_dino = "facebook/dino-vits16"  # dinov2-base for v2
    show_sim_detail = False  # Set to True to show detailed similarity matrix
    get_sim_detail = True  # Set to True to get detailed similarity matrix

In [3]:
def find_most_similar_pair(similarity_matrix, name_lists):
    """
    Find the most similar pair of images based on the similarity matrix.
    
    Parameters:
    similarity_matrix (numpy.ndarray): Matrix containing similarity scores
    name_lists (tuple of list): Tuple of List of image names
    
    Returns:
    tuple: (image1, image2, similarity_score)
    """
    # Make a copy of the matrix to avoid modifying the original
    sim_matrix = np.copy(similarity_matrix)
    
    # Replace the diagonal NaN values with -1 to ignore self-comparisons
    np.fill_diagonal(sim_matrix, -1)
    
    # Find the indices of the maximum value
    max_idx = np.unravel_index(np.argmax(sim_matrix), sim_matrix.shape)
    
    # Get the corresponding image names and the similarity score
    image1 = name_lists[0][max_idx[0]]
    image2 = name_lists[1][max_idx[1]]
    similarity_score = sim_matrix[max_idx]
    
    return image1, image2, similarity_score

In [None]:
input_path = "/home/jack/Code/Research/instree_analysis/experiment_data/v3"
output_path = "/home/jack/Code/Research/instree_analysis/experiment_data/v3_sub_clip"

for cpt_name in os.listdir(input_path):
    print(cpt_name)
    ARGS.get_sim_detail = True
    ARGS.folder1 = ARGS.folder2 = os.path.join(input_path, cpt_name, "v0")
    avg_sim, sim_mat, names = score_calculation(ARGS)
    filea, fileb, sim_max = find_most_similar_pair(sim_mat, names)
    print("Max Similarity:", sim_max)

    # copy to output
    output_dir = os.path.join(output_path, cpt_name, "v0")
    os.makedirs(output_dir, exist_ok=True)
    shutil.copy(os.path.join(ARGS.folder1, filea), os.path.join(output_dir, filea))
    shutil.copy(os.path.join(ARGS.folder1, fileb), os.path.join(output_dir, fileb))

    # Check output similarity
    ARGS.get_sim_detail = False
    ARGS.folder1 = ARGS.folder2 = os.path.join(output_path, cpt_name, "v0")
    
    avg_sim = score_calculation(ARGS)
    assert round(sim_max, 4) == round(avg_sim, 4)
    print('=====')

wearable_sunglasses1
Average Similarity: 0.7962
Max Similarity: 0.9519115
Average Similarity: 0.9519
=====
toy_bear
Average Similarity: 0.9466
Max Similarity: 0.9764654
Average Similarity: 0.9765
=====
furniture_chair2
Average Similarity: 0.9276
Max Similarity: 0.9386026
Average Similarity: 0.9386
=====
plushie_pink
Average Similarity: 0.8019
Max Similarity: 0.9192985
Average Similarity: 0.9193
=====
scene_sculpture1
Average Similarity: 0.8964
Max Similarity: 0.9424306
Average Similarity: 0.9424
=====
transport_bike
Average Similarity: 0.9661
Max Similarity: 0.9711714
Average Similarity: 0.9712
=====
scene_barn
Average Similarity: 0.9071
Max Similarity: 0.9474813
Average Similarity: 0.9475
=====
grey_sloth_plushie
Average Similarity: 0.8230
Max Similarity: 0.9070721
Average Similarity: 0.9071
=====
decoritems_lamp1
Average Similarity: 0.8966
Max Similarity: 0.9261743
Average Similarity: 0.9262
=====
backpack_dog
Average Similarity: 0.8121
Max Similarity: 0.8974079
Average Similarity: 0

: 