In [1]:
import pickle
from PIL import Image
import numpy as np
import pandas as pd
from tqdm import tqdm
from src.descriptors import LBPDescriptor, DCTDescriptor, WaveletDescriptor, GaborDescriptor

from src.data import GT_QSD1_W3_LIST
from src.paths import (
    BBDD_PATH, 
    QSD1_W3_PATH, 
    QSD1_NON_AUGMENTED_W3_PATH, 
    WEEK_3_PATH, 
    WEEK_3_RESULTS_PATH
)
from src.similarities import HistogramIntersection, CosineSimilarity
from src.metrics import MeanAveragePrecisionAtK

In [2]:
database_image_PIL_list = [Image.open(db_img_path) for db_img_path in sorted(BBDD_PATH.glob("*.jpg"))]  # Load once
query_d1_image_PIL_list = [Image.open(query_img_path) for query_img_path in sorted(QSD1_W3_PATH.glob("*.jpg"))]  # Load once

In [None]:
texture_descriptors = [
    #WaveletDescriptor(wavelet='haar', level=3),  #triga molt poc
    #WaveletDescriptor(wavelet='db1',  level=4),  #triga molt poc
    #LBPDescriptor(num_points=8, radius=1),   # triga mig
    # LBPDescriptor(num_points=24, radius=3),  # triga molt
    GaborDescriptor(),
    DCTDescriptor(N=10),                     # triga poc
    DCTDescriptor(N=21),                     # triga poc
    # DCTDescriptor(N=36),                   # triga poc
    
]

In [5]:
partition_levels = [5]

To make the execution faster we persist the partitions of the images for the next runs of the notebook.

In [None]:
def partition_image(image: Image.Image, N: int):
    w, h = image.size
    part_width, part_height = w // N, h // N
    return [image.crop((col * part_width, row * part_height,
                        (col + 1) * part_width, (row + 1) * part_height))
            for row in range(N) for col in range(N)]


def process_partitioned_images(path, PIL_list, partition_levels, mode='auto'):
    partitioned_images = {}
    
    for partition_level in partition_levels:
        partition_level_dir = path.with_name(f"{path.stem}_level_{partition_level}{path.suffix}")

        # Load existing partitions from disk if they exist and mode allows loading
        if mode != 'compute' and partition_level_dir.exists():
            partitioned_images[partition_level] = []

            for img_idx in tqdm(range(len(PIL_list)), desc=f"Loading images at level {partition_level}"): 
                partitions = []
                block_idx = 0
                while True:
                    img_path = partition_level_dir / f"img_{img_idx}_block_{block_idx}.jpg"
                    if not img_path.exists():
                        break  
                    with Image.open(img_path) as img:  # Use context manager
                        partitions.append(img.copy())
                    block_idx += 1

                partitioned_images[partition_level].append(partitions)

            continue  # Skip computation for this level

        # If partitions don't exist, or if mode is 'compute', calculate and store partitions
        partition_level_dir.mkdir(parents=True, exist_ok=True)

        if partition_level == 1:
            print("Partitioning at level 1")
            partitioned_images[partition_level] = [[img] for img in PIL_list]
        else:
            partitioned_images[partition_level] = [
                partition_image(img, partition_level) 
                for img in tqdm(PIL_list, desc=f"Partitioning at level {partition_level}")
            ]

        # Save computed partitions to disk
        for img_idx, partitions in tqdm(enumerate(partitioned_images[partition_level]), 
                                        total=len(partitioned_images[partition_level]), 
                                        desc=f"Saving images at level {partition_level}"):
            for block_idx, block_img in enumerate(partitions):
                block_img.save(partition_level_dir / f"img_{img_idx}_block_{block_idx}.jpg")

    return partitioned_images


partitioned_images_query = process_partitioned_images(WEEK_3_RESULTS_PATH/"partitioned_query",query_d1_image_PIL_list, partition_levels)
partitioned_images_db = process_partitioned_images(WEEK_3_RESULTS_PATH/"partitioned_db",database_image_PIL_list, partition_levels)

In [None]:
def process_partitioned_histograms(descriptors, partition_levels, partitioned_images):
    partitioned_histograms = {}

    for descriptor in descriptors:
        print("Descriptor: ", descriptor.name)
        partitioned_histograms[descriptor.name] = {}

        for partition_level in partition_levels:
            partitioned_histograms[descriptor.name][partition_level] = []

            for partitions in tqdm(partitioned_images[partition_level], desc=f"Processing partitions at level {partition_level}"):
                histograms_img = []
                for partition_img in partitions:
                    histogram_partition = descriptor.compute(np.array(partition_img))
                    histograms_img.append(histogram_partition)

                concatenated_histogram = np.concatenate(histograms_img, axis=0)
                partitioned_histograms[descriptor.name][partition_level].append(concatenated_histogram)

    return partitioned_histograms

def save_load_histograms(path, compute_func, *args):
    if path.exists():
        return load_histograms(path)
    else:
        histograms = compute_func(*args)
        with open(path, 'wb') as f:
            pickle.dump(histograms, f)
        return histograms

def load_histograms(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

partitioned_histograms_query = save_load_histograms(WEEK_3_RESULTS_PATH/"partitioned_histograms_query.pkl", process_partitioned_histograms, texture_descriptors, partition_levels, partitioned_images_query)
partitioned_histograms_db = save_load_histograms(WEEK_3_RESULTS_PATH/"partitioned_histograms_db.pkl", process_partitioned_histograms, texture_descriptors, partition_levels, partitioned_images_db)

In [8]:
similarity_classes = [
    HistogramIntersection(),
    CosineSimilarity()
]

In [None]:
query_descriptor_distances_to_db_list = {}

for similarity in similarity_classes:
    similarity_name = similarity.__class__.__name__
    query_descriptor_distances_to_db_list[similarity_name] = {}

    for descriptor in texture_descriptors: 
        descriptor_name = descriptor.name
        print(f"- {similarity_name} & {descriptor_name}")

        query_descriptor_distances_to_db_list[similarity_name][descriptor_name] = {}
        
        # Compute BB similarities for each partition level
        for partition_level in partition_levels:
            partitioned_db_desc = np.array(partitioned_histograms_db[descriptor_name][partition_level])
            partitioned_query_desc = np.array(partitioned_histograms_query[descriptor_name][partition_level])
            
            bb_similarity = similarity.compute(partitioned_query_desc, partitioned_db_desc)
            query_descriptor_distances_to_db_list[similarity_name][descriptor_name][partition_level] = bb_similarity


In [10]:
def get_topk_distances(query_distances_to_bbdd: np.array, k: int = 1) -> tuple[list[list], list[list]]:
    retrieved_bbdd_indices = np.argsort(query_distances_to_bbdd, axis=1)[:, :k]
    
    retrieved_bbdd_similarity = np.take_along_axis(query_distances_to_bbdd, retrieved_bbdd_indices, axis=1)
    
    return retrieved_bbdd_indices.tolist(), retrieved_bbdd_similarity.tolist()

In [None]:
# Define k (number of top results to retrieve)
k = 5

retrieved_db = {

}

for similarity_name, descriptors_dict in query_descriptor_distances_to_db_list.items():
    retrieved_db[similarity_name] = {}
    for descriptor_name, data_dict in descriptors_dict.items():
        print(similarity_name, descriptor_name)
        retrieved_db[similarity_name][descriptor_name] = {}

        # BB Top-k retrieval for each partition level
        bb_similarity = data_dict
        for partition_level, distances in bb_similarity.items():
            retrieved_db[similarity_name][descriptor_name][partition_level] = {}
            topk_indices_bb, topk_similarities_bb = get_topk_distances(distances, k)
            retrieved_db[similarity_name][descriptor_name][partition_level]["indexes"] = topk_indices_bb
            retrieved_db[similarity_name][descriptor_name][partition_level]["similarities"] = topk_similarities_bb
            print(f"Top-{k} for {similarity_name} - {descriptor_name} (BB Level {partition_level}):")
            print(f"Indices: {topk_indices_bb}\n\n")

In [12]:
metrics = [MeanAveragePrecisionAtK()]
K = [1,5]

In [13]:
results = []

for i, k in enumerate(K):
    for metric in metrics:
        for similarity in similarity_classes:
            similarity_name = similarity.__class__.__name__
            for descriptor in texture_descriptors:
                descriptor_name = descriptor.name


                # BB
                for partition_level in partition_levels:
                    indexes_retrieved = retrieved_db[similarity_name][descriptor_name][partition_level]["indexes"]
                    map_val = round(metric.compute(GT_QSD1_W3_LIST, indexes_retrieved, k), 2)
                    results.append({
                        "K": k,
                        "Metric": metric.__class__.__name__,
                        "Descriptor": descriptor_name,
                        "Similarity": similarity_name,
                        "Method": f"BB at level {partition_level}",
                        "Result": map_val,
                        "Indices": indexes_retrieved,
                    })


results_df = pd.DataFrame(results)

results_df_cleaned = results_df.drop(columns=["Indices", "Descriptor_id", "Similarity_id"], errors='ignore')

results_df_cleaned

Unnamed: 0,K,Metric,Descriptor,Similarity,Method,Result
0,1,MeanAveragePrecisionAtK,"Gabor_wavelengths_(3, 5, 7)_orientations_4",HistogramIntersection,BB at level 5,0.87
1,1,MeanAveragePrecisionAtK,DCT_10,HistogramIntersection,BB at level 5,0.9
2,1,MeanAveragePrecisionAtK,DCT_21,HistogramIntersection,BB at level 5,0.87
3,1,MeanAveragePrecisionAtK,"Gabor_wavelengths_(3, 5, 7)_orientations_4",CosineSimilarity,BB at level 5,0.93
4,1,MeanAveragePrecisionAtK,DCT_10,CosineSimilarity,BB at level 5,0.87
5,1,MeanAveragePrecisionAtK,DCT_21,CosineSimilarity,BB at level 5,0.9
6,5,MeanAveragePrecisionAtK,"Gabor_wavelengths_(3, 5, 7)_orientations_4",HistogramIntersection,BB at level 5,0.9
7,5,MeanAveragePrecisionAtK,DCT_10,HistogramIntersection,BB at level 5,0.91
8,5,MeanAveragePrecisionAtK,DCT_21,HistogramIntersection,BB at level 5,0.9
9,5,MeanAveragePrecisionAtK,"Gabor_wavelengths_(3, 5, 7)_orientations_4",CosineSimilarity,BB at level 5,0.94
