In [1]:
import pickle
from PIL import Image
import numpy as np
import pandas as pd
from tqdm import tqdm
from src.descriptors import LBPDescriptor, DCTDescriptor, WaveletDescriptor, GaborDescriptor

from src.data import GT_QSD1_W3_LIST
from src.paths import (
    BBDD_PATH, 
    QSD1_W3_PATH,
    WEEK_3_RESULTS_PATH
)
from src.similarities import HistogramIntersection, CosineSimilarity
from src.metrics import MeanAveragePrecisionAtK

In [2]:
database_image_PIL_list = [Image.open(db_img_path) for db_img_path in sorted(BBDD_PATH.glob("*.jpg"))]  # Load once
query_d1_image_PIL_list = [Image.open(query_img_path) for query_img_path in sorted(QSD1_W3_PATH.glob("*.jpg"))]  # Load once

In [3]:
texture_descriptors = [
    #WaveletDescriptor(wavelet='haar', level=3),  #triga molt poc
    #WaveletDescriptor(wavelet='db1',  level=4),  #triga molt poc
    #LBPDescriptor(num_points=8, radius=1),   # triga mig
    # LBPDescriptor(num_points=24, radius=3),  # triga molt
    GaborDescriptor(),
    DCTDescriptor(N=10),                     # triga poc
    DCTDescriptor(N=21),                     # triga poc
    # DCTDescriptor(N=36),                   # triga poc
    
]

In [4]:
partition_levels = [5]

To make the execution faster we persist the partitions of the images for the next runs of the notebook.

In [5]:
def partition_image(image: Image.Image, N: int):
    w, h = image.size
    part_width, part_height = w // N, h // N
    return [image.crop((col * part_width, row * part_height,
                        (col + 1) * part_width, (row + 1) * part_height))
            for row in range(N) for col in range(N)]


def process_partitioned_images(path, PIL_list, partition_levels, mode='auto'):
    # If mode is 'compute_notsave', skip loading/saving and just return computed partitions
    if mode == 'compute_notsave':
        partitioned_images = {level: [] for level in partition_levels}
        for partition_level in partition_levels:
            if partition_level == 1:
                print("Partitioning at level 1")
                partitioned_images[partition_level] = [[img] for img in PIL_list]
            else:
                partitioned_images[partition_level] = [
                    partition_image(img, partition_level) 
                    for img in tqdm(PIL_list, desc=f"Partitioning at level {partition_level}")
                ]
        return partitioned_images

    partitioned_images = {}
    
    for partition_level in partition_levels:
        partition_level_dir = path.with_name(f"{path.stem}_level_{partition_level}{path.suffix}")

        # Load existing partitions from disk if they exist and mode allows loading
        if mode != 'compute' and partition_level_dir.exists():
            partitioned_images[partition_level] = []

            for img_idx in tqdm(range(len(PIL_list)), desc=f"Loading images at level {partition_level}"): 
                partitions = []
                block_idx = 0
                while True:
                    img_path = partition_level_dir / f"img_{img_idx}_block_{block_idx}.jpg"
                    if not img_path.exists():
                        break  
                    with Image.open(img_path) as img:  # Use context manager
                        partitions.append(img.copy())
                    block_idx += 1

                partitioned_images[partition_level].append(partitions)

            continue  # Skip computation for this level

        # If partitions don't exist, or if mode is 'compute', calculate and store partitions
        partition_level_dir.mkdir(parents=True, exist_ok=True)

        if partition_level == 1:
            print("Partitioning at level 1")
            partitioned_images[partition_level] = [[img] for img in PIL_list]
        else:
            partitioned_images[partition_level] = [
                partition_image(img, partition_level) 
                for img in tqdm(PIL_list, desc=f"Partitioning at level {partition_level}")
            ]

        # Save computed partitions to disk
        for img_idx, partitions in tqdm(enumerate(partitioned_images[partition_level]), 
                                        total=len(partitioned_images[partition_level]), 
                                        desc=f"Saving images at level {partition_level}"):
            for block_idx, block_img in enumerate(partitions):
                block_img.save(partition_level_dir / f"img_{img_idx}_block_{block_idx}.jpg")

    return partitioned_images

partitioned_images_query = process_partitioned_images(WEEK_3_RESULTS_PATH/"partitioned_query",query_d1_image_PIL_list, partition_levels, mode='compute')
partitioned_images_db = process_partitioned_images(WEEK_3_RESULTS_PATH/"partitioned_db",database_image_PIL_list, partition_levels)

Partitioning at level 5: 100%|██████████| 30/30 [00:00<00:00, 137.16it/s]
Saving images at level 5: 100%|██████████| 30/30 [00:01<00:00, 15.42it/s]
Loading images at level 5: 100%|██████████| 287/287 [00:08<00:00, 34.79it/s]


In [6]:
def process_partitioned_histograms(descriptors, partition_levels, partitioned_images):
    partitioned_histograms = {}

    for descriptor in descriptors:
        print("Descriptor: ", descriptor.name)
        partitioned_histograms[descriptor.name] = {}

        for partition_level in partition_levels:
            partitioned_histograms[descriptor.name][partition_level] = []

            for partitions in tqdm(partitioned_images[partition_level], desc=f"Processing partitions at level {partition_level}"):
                histograms_img = []
                for partition_img in partitions:
                    histogram_partition = descriptor.compute(np.array(partition_img))
                    histograms_img.append(histogram_partition)

                concatenated_histogram = np.concatenate(histograms_img, axis=0)
                partitioned_histograms[descriptor.name][partition_level].append(concatenated_histogram)

    return partitioned_histograms

def save_load_histograms(path, compute_func, *args, load=True):
    if path.exists() and load:
        return load_histograms(path)
    else:
        histograms = compute_func(*args)
        with open(path, 'wb') as f:
            pickle.dump(histograms, f)
        return histograms

def load_histograms(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)


partitioned_histograms_query = save_load_histograms(WEEK_3_RESULTS_PATH/"partitioned_histograms_query.pkl", process_partitioned_histograms, texture_descriptors, partition_levels, partitioned_images_query, load=True)
partitioned_histograms_db = save_load_histograms(WEEK_3_RESULTS_PATH/"partitioned_histograms_db.pkl", process_partitioned_histograms, texture_descriptors, partition_levels, partitioned_images_db)

Descriptor:  Gabor_wavelengths_(3, 5, 7)_orientations_4


Processing partitions at level 5: 100%|██████████| 30/30 [00:05<00:00,  5.26it/s]


Descriptor:  DCT_10


Processing partitions at level 5: 100%|██████████| 30/30 [00:13<00:00,  2.30it/s]


Descriptor:  DCT_21


Processing partitions at level 5: 100%|██████████| 30/30 [00:13<00:00,  2.29it/s]


In [7]:
def compute_query_descriptor_distances(similarity_classes, texture_descriptors, partition_levels, partitioned_histograms_db, partitioned_histograms_query):
    query_descriptor_distances_to_db_list = {}

    for similarity in similarity_classes:
        similarity_name = similarity.__class__.__name__
        query_descriptor_distances_to_db_list[similarity_name] = {}

        for descriptor in texture_descriptors:
            descriptor_name = descriptor.name
            print(f"- {similarity_name} & {descriptor_name}")
            query_descriptor_distances_to_db_list[similarity_name][descriptor_name] = {}

            for partition_level in partition_levels:
                partitioned_db_desc = np.array(partitioned_histograms_db[descriptor_name][partition_level])
                partitioned_query_desc = np.array(partitioned_histograms_query[descriptor_name][partition_level])

                bb_similarity = similarity.compute(partitioned_query_desc, partitioned_db_desc)
                query_descriptor_distances_to_db_list[similarity_name][descriptor_name][partition_level] = bb_similarity

    return query_descriptor_distances_to_db_list

In [8]:
def get_topk_distances(query_distances_to_bbdd: np.array, k: int = 1) -> tuple[list[list], list[list]]:
    retrieved_bbdd_indices = np.argsort(query_distances_to_bbdd, axis=1)[:, :k]
    
    retrieved_bbdd_similarity = np.take_along_axis(query_distances_to_bbdd, retrieved_bbdd_indices, axis=1)
    
    return retrieved_bbdd_indices.tolist(), retrieved_bbdd_similarity.tolist()

In [9]:
similarity_classes = [
    HistogramIntersection(),
    CosineSimilarity()
]

In [10]:
query_descriptor_distances_to_db_list = compute_query_descriptor_distances(
    similarity_classes          = similarity_classes,
    texture_descriptors         = texture_descriptors,
    partition_levels            = partition_levels,
    partitioned_histograms_db   = partitioned_histograms_db,
    partitioned_histograms_query= partitioned_histograms_query
)

- HistogramIntersection & Gabor_wavelengths_(3, 5, 7)_orientations_4
- HistogramIntersection & DCT_10
- HistogramIntersection & DCT_21
- CosineSimilarity & Gabor_wavelengths_(3, 5, 7)_orientations_4
- CosineSimilarity & DCT_10
- CosineSimilarity & DCT_21


In [11]:
def retrieve_top_k_db_entries(query_descriptor_distances_to_db_list, k, bprint=False):
    retrieved_db = {}

    for similarity_name, descriptors_dict in query_descriptor_distances_to_db_list.items():
        retrieved_db[similarity_name] = {}
        for descriptor_name, data_dict in descriptors_dict.items():
            retrieved_db[similarity_name][descriptor_name] = {}

            for partition_level, distances in data_dict.items():
                topk_indices, topk_similarities = get_topk_distances(distances, k)
                retrieved_db[similarity_name][descriptor_name][partition_level] = {
                    "indexes": topk_indices,
                    "similarities": topk_similarities
                }
                if bprint:
                    print(f"{similarity_name} - {descriptor_name} | BB Level {partition_level}:")
                    print(f"Top-{k} Indices: {topk_indices}\n")

    return retrieved_db

In [12]:
k = 5

In [13]:
retrieved_db = retrieve_top_k_db_entries(query_descriptor_distances_to_db_list, k, bprint=True)

HistogramIntersection - Gabor_wavelengths_(3, 5, 7)_orientations_4 | BB Level 5:
Top-5 Indices: [[7, 182, 217, 114, 4], [186, 189, 47, 177, 36], [128, 142, 104, 32, 76], [35, 40, 65, 120, 47], [262, 23, 138, 176, 235], [23, 176, 239, 278, 114], [21, 174, 105, 46, 200], [272, 120, 258, 202, 40], [13, 3, 11, 274, 78], [133, 93, 157, 142, 37], [286, 163, 147, 90, 150], [22, 202, 212, 40, 161], [91, 279, 101, 248, 102], [222, 245, 163, 47, 283], [219, 179, 155, 144, 35], [248, 102, 101, 168, 279], [94, 103, 46, 191, 239], [104, 161, 257, 132, 286], [74, 164, 106, 107, 40], [201, 48, 22, 243, 161], [252, 226, 248, 60, 261], [81, 163, 226, 60, 205], [280, 165, 114, 100, 182], [110, 93, 37, 131, 142], [258, 42, 212, 176, 150], [120, 40, 116, 24, 47], [106, 35, 30, 107, 148], [25, 35, 147, 90, 254], [93, 142, 110, 37, 157], [212, 202, 42, 258, 94]]

HistogramIntersection - DCT_10 | BB Level 5:
Top-5 Indices: [[7, 30, 1, 194, 53], [186, 1, 184, 194, 182], [238, 35, 47, 106, 128], [35, 194, 5, 3

In [14]:
metrics = [MeanAveragePrecisionAtK()]
K = [1,5]

In [15]:
def compute_results_dataframe(K, metrics, similarity_classes, texture_descriptors, partition_levels, retrieved_db, GT_QSD1_W3_LIST):
    results = []

    for k in K:
        for metric in metrics:
            for similarity in similarity_classes:
                similarity_name = similarity.__class__.__name__
                for descriptor in texture_descriptors:
                    descriptor_name = descriptor.name

                    for partition_level in partition_levels:
                        indexes_retrieved = retrieved_db[similarity_name][descriptor_name][partition_level]["indexes"]
                        map_val = round(metric.compute(GT_QSD1_W3_LIST, indexes_retrieved, k), 2)
                        results.append({
                            "K": k,
                            "Metric": metric.__class__.__name__,
                            "Descriptor": descriptor_name,
                            "Similarity": similarity_name,
                            "Method": f"BB at level {partition_level}",
                            "Result": map_val,
                            "Indices": indexes_retrieved,
                        })

    results_df = pd.DataFrame(results)
    results_df_cleaned = results_df.drop(columns=["Indices", "Descriptor_id", "Similarity_id"], errors='ignore')
    return results_df_cleaned

In [16]:
results_df = compute_results_dataframe(K, metrics, similarity_classes, texture_descriptors, partition_levels, retrieved_db, GT_QSD1_W3_LIST)

In [17]:
results_df

Unnamed: 0,K,Metric,Descriptor,Similarity,Method,Result
0,1,MeanAveragePrecisionAtK,"Gabor_wavelengths_(3, 5, 7)_orientations_4",HistogramIntersection,BB at level 5,0.87
1,1,MeanAveragePrecisionAtK,DCT_10,HistogramIntersection,BB at level 5,0.9
2,1,MeanAveragePrecisionAtK,DCT_21,HistogramIntersection,BB at level 5,0.83
3,1,MeanAveragePrecisionAtK,"Gabor_wavelengths_(3, 5, 7)_orientations_4",CosineSimilarity,BB at level 5,0.93
4,1,MeanAveragePrecisionAtK,DCT_10,CosineSimilarity,BB at level 5,0.87
5,1,MeanAveragePrecisionAtK,DCT_21,CosineSimilarity,BB at level 5,0.87
6,5,MeanAveragePrecisionAtK,"Gabor_wavelengths_(3, 5, 7)_orientations_4",HistogramIntersection,BB at level 5,0.9
7,5,MeanAveragePrecisionAtK,DCT_10,HistogramIntersection,BB at level 5,0.91
8,5,MeanAveragePrecisionAtK,DCT_21,HistogramIntersection,BB at level 5,0.88
9,5,MeanAveragePrecisionAtK,"Gabor_wavelengths_(3, 5, 7)_orientations_4",CosineSimilarity,BB at level 5,0.94


Let's put it all together

In [18]:
def task2(image_list, db_image_list , texture_descriptor, k= 5, similarity_class= CosineSimilarity(), partition_level=5):

    # Flattened list of elements
    image_list_flat = [img for sublist in image_list for img in sublist]

    # List of positions
    positions = [i for i, sublist in enumerate(image_list) for _ in sublist]

    # PARTITION
    partitioned_images_query = process_partitioned_images(WEEK_3_RESULTS_PATH/f"partitioned_histograms_query2",image_list_flat, [partition_level], mode="compute_notsave")
    partitioned_images_db = process_partitioned_images(WEEK_3_RESULTS_PATH/"partitioned_db",db_image_list, [partition_level])
    
    # COMPUTE HISTS
    partitioned_histograms_query = save_load_histograms(WEEK_3_RESULTS_PATH/f"partitioned_histograms_query2.pkl", process_partitioned_histograms, [texture_descriptor], [partition_level], partitioned_images_query, load=False)
    partitioned_histograms_db = save_load_histograms(WEEK_3_RESULTS_PATH/"partitioned_histograms_db.pkl", process_partitioned_histograms, [texture_descriptor], [partition_level], partitioned_images_db)

    query_descriptor_distances_to_db_list = compute_query_descriptor_distances(
        similarity_classes          = [similarity_class],
        texture_descriptors         = [texture_descriptor],
        partition_levels            = [partition_level],
        partitioned_histograms_db   = partitioned_histograms_db,
        partitioned_histograms_query= partitioned_histograms_query
    )

    retrieved_db = retrieve_top_k_db_entries(query_descriptor_distances_to_db_list, k)

    list_all_paintings = list(list(list(list(retrieved_db.values())[0].values())[0].values())[0].values())[0]

    list_all_images = [[] for _ in range(len(image_list))]

    for i, l in enumerate(list_all_paintings):
        index = positions[i]
        list_all_images[index].append(l)

    return list_all_images