In [13]:
import os
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array

### 1st Task

#### Rank Normalization

In [19]:
# Directory containing the images
image_folder = "animals"

# Feature extraction model
model = ResNet50(weights="imagenet", include_top=False, pooling="avg")

def feature_extraction(image_path):
    """
    Extract feature vector from an image.
    :param image_path: Path to the image
    :return: Feature vector
    """
    image = load_img(image_path, target_size=(224, 224))  # Resize image to match model input size
    image_array = img_to_array(image)  # Convert to array
    image_array = np.expand_dims(image_array, axis=0)  # Add batch dimension
    image_array = preprocess_input(image_array)  # Preprocess for ResNet50
    feature_vector = model.predict(image_array)[0]  # Extract features
    return feature_vector

def compute_distance(feature_vector_1, feature_vector_2):
    """
    Compute the Euclidean distance between two feature vectors.
    :param feature_vector_1: First feature vector
    :param feature_vector_2: Second feature vector
    :return: Euclidean distance
    """
    return euclidean_distances([feature_vector_1], [feature_vector_2])[0][0]

def compute_similarity(feature_vector_1, feature_vector_2):
    """
    Compute similarity based on the Euclidean distance.
    :param feature_vector_1: First feature vector
    :param feature_vector_2: Second feature vector
    :return: Similarity score (higher means more similar)
    """
    distance = compute_distance(feature_vector_1, feature_vector_2)
    return 1 / (1 + distance)  # Invert distance for similarity score

# Extract features for all images in the folder
image_paths = [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith('.jpg')]
features = {}

for image_path in image_paths:
    features[image_path] = feature_extraction(image_path)
    
# Compute pairwise similarity
similarity_matrix = {}

#for (img1, img2), similarity in similarity_matrix.items():
#    print(f"Similarity between {img1} and {img2}: {similarity:.4f}")

# Extract image paths from the dictionary keys
image_paths = list(features.keys())

# Initialize a 2D list (numpy array) for the similarity matrix
num_images = len(image_paths)
similarity_2d_list = np.zeros((num_images, num_images))

# Fill the 2D list with similarity scores
for i, path1 in enumerate(image_paths):
    for j, path2 in enumerate(image_paths):
        similarity = similarity_matrix.get((path1, path2), 0)  # Default to 0 if no similarity found
        similarity_2d_list[i][j] = similarity

# Optionally, convert the numpy array to a regular list (if needed)
similarity_2d_list = similarity_2d_list.tolist()

# Print the 2D list (optional)
for row in similarity_2d_list:
    print(row)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[0.0, 0.0, 0.0]
[0.0, 0.0, 0.0]
[0.0, 0.0, 0.0]


In [15]:
def rank_normalization(similarity_matrix, L):
    """
    Perform reciprocal rank normalization on the given similarity matrix.

    Args:
    - similarity_matrix (list or np.array): A square matrix where each element A[i][j] represents the similarity between objects i and j.
    - L (int): The top-L positions to consider in the rank normalization.

    Returns:
    - normalized_matrix (np.array): The updated similarity matrix after rank normalization.
    """
    # Convert the similarity matrix into a numpy array for easier manipulation
    similarity_matrix = np.array(similarity_matrix)
    
    # Initialize the normalized similarity matrix with the same shape
    normalized_matrix = np.zeros_like(similarity_matrix)
    
    num_objects = similarity_matrix.shape[0]
    
    # Iterate through each pair (i, j) in the matrix
    for i in range(num_objects):
        for j in range(i + 1, num_objects):  # Avoid redundant calculations
            # Rank position of j in the list of neighbors of i
            rank_ij = np.argsort(similarity_matrix[i, :])[::-1].tolist().index(j)
            
            # Rank position of i in the list of neighbors of j
            rank_ji = np.argsort(similarity_matrix[j, :])[::-1].tolist().index(i)
            
            # Apply the rank normalization formula
            normalized_value = 2 * L - (rank_ij + rank_ji)
            
            # Set the normalized similarity values
            normalized_matrix[i, j] = normalized_value
            normalized_matrix[j, i] = normalized_value  # Symmetric assignment
    
    # Update the top-L positions using a stable sort (sorted in descending order)
    for i in range(num_objects):
        # Get indices of the top-L neighbors (after rank normalization)
        top_L_indices = np.argsort(normalized_matrix[i, :])[::-1][:L]
        
        # Ensure the stability of sorting by updating only the top-L similarities
        normalized_matrix[i, :] = 0  # Zero out all but top-L neighbors
        normalized_matrix[i, top_L_indices] = similarity_matrix[i, top_L_indices]
    
    return normalized_matrix

L = 2  # Top 2 neighbors
normalized_matrix = rank_normalization(similarity_matrix, L)
print("Normalized Similarity Matrix:")
print(normalized_matrix)

IndexError: tuple index out of range

In [None]:
# Load a pre-trained SqueezeNet model
squeezenet = models.squeezenet1_1(pretrained=True)

In [18]:
# Original 1D list with 9 items
original_list = [1, 2, 3, 4, 5, 6, 7, 8, 9]

# Convert to 2D list with 3 items per row
two_d_list = [original_list[i:i+3] for i in range(0, len(original_list), 3)]

print(two_d_list)


[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
