In [1]:
import math
import numpy as np
import random as rnd
import torchvision.transforms as transforms
from torch.utils.data.dataset import Subset
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

ModuleNotFoundError: No module named 'torchvision'

## Rank Normalization

### **get_ranked_list(similarity_measures)**

This function ranks images based on the similarity measures between the target images and all other images.

- Parameters similarity_measures:  Similarity measures between the target images and all other images.

- Returns: A list of ranked images:  The format is the same as `similarity_measures`:   `[[ (image_index, similarity_measure), ...], ...]`.

### **normalize_ranked_list(T)**

This function normalizes the ranked list T by using the formula: normalized_value = 2 * L - (r_i_j + r_j_i)

- Parameters T:  A list of ranked images, format: [[(image_index, weight), ...], ...]

- Returns:
 A list of normalized ranked images:  The format is `normalized ranked images`:   `[[(image_index, normalized_weight), ...], ...]`.


In [11]:
def get_ranked_list(similarity_measures):
    T = []
    for row in similarity_measures:
        rank_list = sorted(row, key=lambda x: x[1], reverse=True)
        T.append(rank_list)
    return T


def normalize_ranked_list(T):
    L = len(T)  # Number of images
    normalized_T = [[] for _ in range(L)]
    for i in range(L):
        for j in range(L):
            r_i_j = T[i][j][1]
            r_j_i = T[j][i][1]
            normalized_value = 2 * L - (r_i_j + r_j_i)
            normalized_T[i].append((T[i][j][0], normalized_value))
    return normalized_T

## Hypergraph Construction
This function creates the neighborhood set matrix N
    :param T: Ranked list of images based on the weights
    :param k: Number of images to be considered in the neighborhood set
    :return: A neighborhood set, format (same as T): [[(image_index, weight), ...], ...]

In [None]:
def create_neighborhood_set_matrix(T, k):
    return [t[:k] for t in T]

In [None]:
def calculate_continuous_incidence_matrix(N, k):
    """
    Calculate the continuous incidence matrix H
    :param N: the neighborhood set matrix N
    :param k: the number of most similar images
    :return: the continuous incidence matrix H
    """
    H = np.zeros((len(N), len(N)))
    for i in range(len(N)):
        for (j, r_i_j) in N[i]:
            H[i][j] = 1 - math.log(r_i_j, k + 1)
    return H

## Cartesian Product

In [None]:
def calculate_hyperedge_weight(q, H):
    """
    Calculate the weight of a hyperedge
    :param q: hyperedge index
    :param H: continuous incidence matrix
    :return: weight of the hyperedge
    """
    w_e_q = 0
    for i in range(len(H[q])):
        w_e_q += H[q][i]
    return w_e_q


def calculate_pairwise_similarity_relationship(H, q, i, j):
    """
    Calculate the pairwise similarity relationship between two images of a hyperedge
    :param H: continuous incidence matrix
    :param q: hyperedge index
    :param i: first image index
    :param j: second image index
    :return: pairwise similarity relationship
    """
    return calculate_hyperedge_weight(q, H) * H[q][i] * H[q][j]


def calculate_cartesian_product(H):
    """
    Calculate the Cartesian product C
    :param H: continuous incidence matrix
    :return: Cartesian product C
    """
    C = np.zeros((len(H), len(H)))
    for q in range(len(H)):
        for i in range(len(H[q])):
            for j in range(len(H[q])):
                C[i][j] += calculate_pairwise_similarity_relationship(H, q, i, j)
    return C

In [None]:
def calculate_pairwise_similarity_matrix(H):
    """
    Calculate the pairwise similarity matrix S
    :param H: continuous incidence matrix
    :return: pairwise similarity matrix S
    """
    H_transpose = H.T  # transpose of H
    S_h = H @ H_transpose  # matrix multiplication H * H_transpose
    S_v = H_transpose @ H  # matrix multiplication H_transpose * H
    S = S_h * S_v  # element-wise multiplication (Hadamard product)
    return S

# ΕΡΩΤΗΜΑ 3

In [None]:
def extract_feature_vectors(subset_dataset):
    """
    Extract the feature vectors for all images in the subset dataset using the pre-trained ResNet-50 model
    :param subset_dataset: the subset dataset
    :return: the feature vectors for all images in the subset dataset
    """
    # Load the pre-trained model
    model = get_pretrained_model()

    # Create a DataLoader for the subset dataset
    # We set batch_size and num_workers based on our hardware resources
    subset_loader = DataLoader(dataset=subset_dataset, batch_size=64, shuffle=False, num_workers=4)

    # Move the model to the GPU if available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # Compute the feature vectors for all images in the subset dataset using the pre-trained model
    feature_vectors = []

    with torch.no_grad():
        for images, _ in subset_loader:
            # Move the images to the appropriate device
            images = images.to(device)
            # Forward pass through the model to obtain feature vectors
            outputs = model(images)
            # Append the outputs to the feature_vectors list
            feature_vectors.append(outputs)

    # Concatenate the feature vectors into a single tensor
    feature_vectors = torch.cat(feature_vectors, dim=0)

    return feature_vectors


def get_pretrained_model():
    """
    Returns a pre-trained ResNet-50 model with the final layer removed
    :return: the pre-trained model
    """
    # Load the pre-trained model
    model = models.resnet50(weights='ResNet50_Weights.DEFAULT')

    # Replace the final layer with an empty Sequential module, so that we can obtain the feature vectors
    model.fc = nn.Sequential()

    # Set the model to evaluation mode
    model.eval()

    return model

## Dataset

In [None]:
import random as rnd

import torchvision.transforms as transforms
from torch.utils.data.dataset import Subset
from torchvision.datasets import ImageFolder


def load_dataset():
    """
    Load the dataset and return it
    :return: the dataset
    """
    # Define image transformations
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize images to 224x224 pixels
        transforms.ToTensor()  # Convert images to PyTorch tensors
    ])

    return ImageFolder(root='images', transform=transform)


def get_subset_dataset(dataset):
    """
    Create a subset from the dataset and return it
    :param dataset: the dataset to create the subset from
    :return: the subset dataset
    """
    # Create a subset of the dataset
    subset_size = 300  # Use 300 images as a subset
    subset_indices = rnd.sample(range(len(dataset.imgs)), subset_size)  # Randomly sample subset_size indices
    subset_dataset = Subset(dataset, subset_indices)
    return subset_dataset


def get_target_indices(subset_dataset):
    """
    Randomly select 5 images from the subset as target images and return their indices
    :param subset_dataset: the subset dataset
    :return: the indices of the target images in the subset
    """
    # Define the target images
    no_of_images = 5  # Use 5 images as target images
    subset_indices = list(range(len(subset_dataset)))
    target_indices = [subset_indices.pop(rnd.randint(0, len(subset_indices) - 1)) for _ in
                      range(no_of_images)]  # Randomly sample no_of_images indices
    return target_indices

## Ερώτημα 5

In [None]:
def calculate_similarity_measures(feature_vectors):
    """
    Calculate the pairwise similarity measures for all images in the subset dataset
    :param feature_vectors: the feature vectors of all images in the subset dataset
    :return: the pairwise similarity measures ρ(o_i, o_j), format: [ [(j, similarity_measure), ...], ...],
    where j is the index of the image in the subset dataset and similarity_measure is the pairwise similarity measure
    """
    # Initialize a numpy array for the pairwise similarity measures
    similarity_measures = []

    # Compute the pairwise similarity measure between each target image and all other images
    for i in range(len(feature_vectors)):
        similarity_measures.append([])
        for j in range(len(feature_vectors)):
            similarity_measure = 1 / (np.linalg.norm(feature_vectors[i] - feature_vectors[j]) + 1)
            similarity_measures[i].append((j, similarity_measure))
    return similarity_measures

## Extra

In [None]:
def calculate_continuous_incidence_matrix(N, k):
    """
    Calculate the continuous incidence matrix H
    :param N: the neighborhood set matrix N
    :param k: the number of most similar images
    :return: the continuous incidence matrix H
    """
    H = np.zeros((len(N), len(N)))
    for i in range(len(N)):
        for (j, r_i_j) in N[i]:
            H[i][j] = 1 - math.log(r_i_j, k + 1)
    return H

## Main

### Log-based Hypergraph of Ranking References (LHRR) Algorithm

This function implements the Log-based Hypergraph of Ranking References (LHRR) algorithm.

#### Parameters:
- **T**:  
  A collection of ranked images represented in the format:  
  `[[ (image_index, weight), ...], ...]`.
  
- **iterations**:  
  Specifies the total number of iterations to execute.

#### Returns:
- **A collection of ranked images**:  
  Formatted as: `[[ (image_index, weight), ...], ...]`.
  
- **The total number of similar images**:  
  Indicates the number of similar images analyzed within the neighborhood set.


In [12]:
def LHRR(T, iterations):
    for iteration in range(iterations):
        print("\nStarting iteration " + str(iteration + 1) + "/" + str(iterations) + ":")

        # Rank normalization
        print("\tNormalizing the ranked list T...")
        T = normalize_ranked_list(T)

        # Create the neighborhood set matrix N
        print("\tCreating the neighborhood set matrix N...")
        k = 5  # k most similar images to consider in the neighborhood set
        N = create_neighborhood_set_matrix(T, k)

        # Calculate continuous incidence matrix H
        print("\tCalculating the continuous incidence matrix H...")
        H = calculate_continuous_incidence_matrix(N, k)

        # Calculate the pairwise similarity matrix S
        print("\tCalculating the pairwise similarity matrix S...")
        S = calculate_pairwise_similarity_matrix(H)

        # Calculate the Cartesian product C
        print("\tCalculating the Cartesian product C...")
        C = calculate_cartesian_product(H)

        # Calculate the affinity matrix W
        print("\tCalculating the affinity matrix W...")
        W = C * S

        # Update the ranked list T with the new weights
        print("\tUpdating the ranked list T with the new weights...")
        for i in range(len(W)):
            for j in range(len(W[i])):
                T[i][j] = (T[i][j][0], W[i][j])

        # Sort the ranked list T
        print("\tSorting the ranked list T...")
        T = [sorted(t, key=lambda x: x[1], reverse=True) for t in T]
    return T, k

Hello World


In [None]:
def assign_weights_with_relevance(no_of_images, relevance_scores):
    """
    Assign weights to the images based on their index and relevance scores
    :param no_of_images: Number of images
    :param relevance_scores: List of relevance scores
    :return: List of weights
    """
    return [relevance_scores[i] * (no_of_images - i) for i in range(no_of_images)]


def show_images(target_image, subset_dataset, categories):
    """
    Show a specific target image with the k most similar images and the accuracy of the algorithm
    :param target_image: The target image with the k most similar images
    :param subset_dataset: The subset dataset
    :param categories: The categories names
    """
    _, axs = plt.subplots(1, len(target_image), figsize=(14, 4))
    plt.gcf().canvas.manager.set_window_title(f"Target image {target_image[0][0]}")

    # Target image category
    target_category_idx = subset_dataset.dataset.samples[subset_dataset.indices[target_image[0][0]]][1]

    # Initialize the relevance scores
    relevance_scores = []

    # Iterate through the target images
    for ax, (img_idx, score) in zip(axs, target_image):
        # Retrieve the image from the dataset
        image, _ = subset_dataset.dataset[subset_dataset.indices[img_idx]]

        # Convert the image tensor to numpy array and transpose it
        image = np.transpose(image.numpy(), (1, 2, 0))

        # Plot the image
        ax.imshow(image)

        # Set the title of the image
        category_idx = subset_dataset.dataset.samples[subset_dataset.indices[img_idx]][1]
        category = categories[category_idx]
        title = f'Category: {category}'
        if img_idx == target_image[0][0]:
            title += ' (target image)'
        ax.set_title(title)

        # Remove the axis
        ax.axis('off')

        # Calculate the relevance score
        score = 1 if category_idx == target_category_idx else 0
        relevance_scores.append(score)

    # Assign weights to the images based on their index and relevance scores
    weights = assign_weights_with_relevance(len(target_image), relevance_scores)

    # Get the accuracy of the algorithm based on the weights
    accuracy = sum(weights) / sum(range(1, len(target_image) + 1))

    # Show the accuracy on the plot
    plt.suptitle(f'Accuracy: {accuracy:.2f}')

    # Show the images
    plt.show()


def get_target_images(T, target_indices, k):
    """
    Get the target images from the ranked list
    :param T: A list of ranked images, format: [[(image_index, weight), ...], ...]
    :param target_indices: The indices of the target images in the subset
    :param k: The number of similar images considered in the neighborhood set
    :return: The target images with the k most similar images
    """
    return [T[idx][:k] for idx in target_indices]


if __name__ == '__main__':
    # Load the dataset
    print("\nFetching the dataset...")
    dataset = load_dataset()

    # Create a subset of the dataset
    print("Creating a subset of the dataset...")
    subset_dataset = get_subset_dataset(dataset)

    # Get the indices of the target images in the subset dataset
    print("Selecting the target images...")
    target_indices = get_target_indices(subset_dataset)

    # Extract the feature vectors for all images in the subset dataset
    print("Extracting the feature vectors...")
    feature_vectors = extract_feature_vectors(subset_dataset)

    # Calculate the similarity measures for all images
    print("Calculating the similarity measures...")
    similarity_measures = calculate_similarity_measures(feature_vectors)

    # Rank the images based on the similarity measures
    print("Ranking the images based on the similarity measures...")
    T = get_ranked_list(similarity_measures)

    # Iterate through the main steps of the algorithm
    T, k = LHRR(T, iterations=5)

    # Get the target images from the ranked list
    target_images = get_target_images(T, target_indices, k)

    # Get all categories names from the dataset
    categories = subset_dataset.dataset.classes

    # Show the target images and the k most similar images
    for target_image in target_images:
        show_images(target_image, subset_dataset, categories)