In [1]:


from tqdm.auto import tqdm
import torch
import os



def evaluate_retrieval_Kam_Woh_topK_mAP(sorted_indices, query_labels, gallery_labels, dir_results, features_size, top_k=100):
    # Basic input validation
    assert isinstance(sorted_indices, torch.Tensor), "sorted_indices must be a PyTorch Tensor"
    assert isinstance(query_labels, torch.Tensor), "query_labels must be a PyTorch Tensor"
    assert isinstance(gallery_labels, torch.Tensor), "gallery_labels must be a PyTorch Tensor"
    assert isinstance(top_k, int) and top_k > 0, "top_k must be a positive integer"
    assert isinstance(dir_results, str), "dir_results must be a string"

    # Ensure the lengths of the sorted_indices and query_labels match
    assert sorted_indices.shape[0] == len(query_labels), "Length of sorted_indices and query_labels must match"

    # Prepare directory to save results
    if not os.path.exists(dir_results):
        os.makedirs(dir_results)    

    # if isinstance(features_size, str): # this will get triggered if we're evaluation the vanilla model since feature_size will contain the word 'vanilla'
    #     print(f"Parameter features_size a string {features_size}")
    #     print(f"Retrieval results for feature size {features_size} will be saved to {dir_results}\n")
    #     result_file_path = os.path.join(dir_results, f"retrieval_results_size_VANILLA_topK_{top_k}.txt")
    #     # print(f"the full dir+filename is: {result_file_path}\n")

    # elif isinstance(features_size, int): # this will get triggerd if we're evaluation baseline models as feature_size will be the size of the feature layer
    #     print(f"Parameter features_size an integer {features_size}")
    #     print(f"Retrieval results for feature size {features_size} will be saved to {dir_results}\n")
    #     result_file_path = os.path.join(dir_results, f"retrieval_results_size_{features_size}_topK_{top_k}.txt")
    #     # print(f"the full dir+filename is: {result_file_path}\n")

    # else:
    #     raise ValueError("features_size must be a positive integer >=1")


    result_file_path = os.path.join(dir_results, f"retrieval_results_size_{features_size}_topK_{top_k}.txt")

    APs = []
    AP_top_ks = []
    Rs = [1, 5, 10, 20]
    recalls = {R: [] for R in Rs}
    precisions = {R: [] for R in Rs}

    # Set offset based on whether query & gallery are completely different
    offset = 1  # Adjust as needed

    for qi in tqdm(range(len(query_labels))):
        query_label = query_labels[qi]

        query_retrieved_indices_full = sorted_indices[qi][offset:]
        retrieved_labels_full = gallery_labels[query_retrieved_indices_full]

        # For mAP calculation, limit to top_k
        query_retrieved_indices_top_k = sorted_indices[qi][offset:offset+top_k] 
        retrieved_labels_top_k = gallery_labels[query_retrieved_indices_top_k]

        imatch_full = torch.eq(retrieved_labels_full, query_label)
        imatch_top_k = torch.eq(retrieved_labels_top_k, query_label)

        Lx_full = torch.cumsum(imatch_full, dim=0)
        Lx_top_k = torch.cumsum(imatch_top_k, dim=0)

        Px = Lx_full.float() / torch.arange(1, len(imatch_full)+1, 1).to(Lx_full)
        rel = torch.sum(imatch_full)  # number of relevant items
        ranking = Px * imatch_full  # this is to obtain the score of the matched item
        AP = ranking.sum() / rel.clamp(min=1)  # clamp is to avoid division by zero if no relevant item retrieved

        Px_top_k = Lx_top_k.float() / torch.arange(1, len(imatch_top_k)+1, 1).to(Lx_top_k)
        rel_top_k = torch.sum(imatch_top_k)
        ranking_top_k = Px_top_k * imatch_top_k
        AP_top_k = ranking_top_k.sum() / rel_top_k.clamp(min=1)  # Average precision for top_k
        
        Lx_for_recall = (Lx_full >= 1).float()
        # Lx_for_recall_top_k = (Lx_top_k >= 1).float()

        for R in Rs:
            rel_R = torch.sum(imatch_full[:R])
            recalls[R].append(Lx_for_recall[R - 1])
            precisions[R].append(rel_R / R)
        
        APs.append(AP)
        AP_top_ks.append(AP_top_k)

    APs = torch.tensor(APs)
    AP_top_ks = torch.tensor(AP_top_ks)

    recalls = {R: torch.tensor(recalls[R]) for R in Rs}
    precisions = {R: torch.tensor(precisions[R]) for R in Rs}

    mean_ap = APs.mean()
    mean_ap_top_k = AP_top_ks.mean()
    mean_recalls = {R: recalls[R].mean() for R in Rs}
    mean_precisions = {R: precisions[R].mean() for R in Rs}

    # Saving results to a file
    with open(result_file_path, 'w') as file:
        file.write(f"Retrieval results for feature size {features_size}\n\n")

        file.write(f'Mean Average Precision full:\n')
        file.write(f'{mean_ap.item()}\n\n')

        file.write(f'Mean Average Precision at top {top_k} (mAP@top_{top_k}):\n')
        file.write(f'{mean_ap_top_k.item()}\n\n')

        file.write('Recall:\n')
        for R in mean_recalls:
            file.write(f'R@{R}: {mean_recalls[R].item()}\n')

        file.write('\nPrecision:\n')
        for R in mean_precisions:
            file.write(f'P@{R}: {mean_precisions[R].item()}\n')


    print(f'\nMean Average Precision full')
    print(mean_ap)

    print(f'\nMean Average Precision top_k {top_k}')
    print(mean_ap_top_k)

    print('Recall')
    for R in Rs:
        print(f'R@{R}', recalls[R].mean())
        
    print('Precision')
    for R in Rs:
        print(f'P@{R}', precisions[R].mean())


    return mean_ap, mean_ap_top_k, mean_recalls, mean_precisions

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from loaders.cub200loader import DataLoaderCUB200
from helpers.load_models import load_resnet50_convV2, load_resnet50_unmodifiedVanilla
from retrieval.run_retrieval_evaluation_baselines import run_retrieval_evaluation_baselines_models, run_retrieval_evaluation_Vanilla


#### get data #####root, batch_size=32,num_workers=10   
dataloadercub200 = DataLoaderCUB200(data_root, batch_size=batch_size, num_workers=10)
_, testloader_cub200 = dataloadercub200.get_dataloaders()
num_classes_cub200, _, label_to_name_test = dataloadercub200.get_number_of_classes()
_, test_image_ids = dataloadercub200.get_unique_ids()
# # ##############################################################################
dir_retrieval = "/home/alabutaleb/Desktop/confirmation/Retrieval_eval_baselines_experiment_gpu_0/retrieval_results_baselines_plus_vanilla"

dataset_names="cub200"
batch_size = 256
lr = 7e-05
top_k = 1000
# feature_size = "vanilla"

# vanilla finetuned resnet50
feature_size_unmodifed = 2048
vanilla_model = load_resnet50_unmodifiedVanilla(num_classes_cub200, feature_size_unmodifed, dataset_name, batch_size, lr, load_dir_vanilla)
results_vanilla =  run_retrieval_evaluation_Vanilla(vanilla_model, testloader_cub200, dir_retrieval, batch_size, top_k, device)

print_result_dict(results_vanilla, "vanilla")
save_retrieval_results(results_vanilla, f"{dir_retrieval}/results_dic_vanilla_resnet50.pkl")