In [1]:
import torch
import argparse
import json
import os
from utils import get_embeddings, run_retrieval, evaluate_retrieval, setup_logging
from data.utils import get_dataset
from models import load_model
import torch.nn.functional as F
import logging

parser = argparse.ArgumentParser()
# 'ViT-B-16' ; default
# 'laion2b_s34b_b88k'; default pretrained
model_name = "Marqo/marqo-fashionSigLIP"

# Args for datasets
parser.add_argument("--data-dir", type=str, default="./data/", help='Data directory.')
parser.add_argument('--dataset-config', default='./configs/uniqlo_curated.json', help='Dataset config file.')
parser.add_argument("--batch-size", type=int, default=512)
parser.add_argument("--num-workers", type=int, default=4)
# Args for models

parser.add_argument('--model-name', type=str, default=model_name, help='Model name.')
parser.add_argument('--run-name', type=str, default=model_name, help='Run name.')
parser.add_argument("--pretrained", type=str, default=None, help='Pretrained name.')
parser.add_argument('--cache-dir', default="/home/jupyter/cache", help='Cache directory for models and datasets.')
parser.add_argument('--device', default='cuda', help='Device to use for inference.')
parser.add_argument("--query-prefix", type=str, default='', help="Query prefix if required (ex. 'description: ')")
# Args for evaluations
parser.add_argument('--Ks', default=[1, 10], nargs='+', help='Ks for metrics.')
parser.add_argument("--overwrite-embeddings", action="store_true", default=False)
parser.add_argument("--overwrite-retrieval", action="store_true", default=False)
parser.add_argument("--output-dir", type=str, default='./results')

_StoreAction(option_strings=['--output-dir'], dest='output_dir', nargs=None, const=None, default='./results', type=<class 'str'>, choices=None, help=None, metavar=None)

In [2]:
import sys
sys.argv = []
args = parser.parse_args()

In [3]:
setup_logging()
# Output directory settings
args.output_dir = os.path.join(args.output_dir, os.path.basename(args.dataset_config).replace('.json',''), args.run_name)
if not os.path.exists(args.output_dir):
    os.makedirs(args.output_dir, exist_ok=True)
else:
    logging.warning(f'Output directory {args.output_dir} exists. Ignore this if it is expected.')
with open(os.path.join(args.output_dir, 'args.json'), 'w') as f:
    json.dump(args.__dict__, f, indent=4)
args.embeddings_path = os.path.join(args.output_dir, "embeddings.pt")

# Read dataset config file
with open(args.dataset_config, 'r') as file:
    args.dataset_config = json.load(file)
logging.info("Dataset: " + args.dataset_config["name"])


2025-01-13,21:10:23 | INFO | Dataset: UniqloCurated


In [4]:
# Load model
model, preprocess, tokenizer = load_model(args)

# Load documenets and generate embeddings
model = model.to(args.device)


2025-01-13,21:10:30 | INFO | Created a temporary directory at /var/tmp/tmp94iq_sbe
2025-01-13,21:10:30 | INFO | Writing /var/tmp/tmp94iq_sbe/_remote_module_non_scriptable.py
2025-01-13,21:10:31 | INFO | Loaded hf-hub:Marqo/marqo-fashionSigLIP model config.
2025-01-13,21:10:34 | INFO | Loading pretrained hf-hub:Marqo/marqo-fashionSigLIP weights (/home/jupyter/cache/models--Marqo--marqo-fashionSigLIP/snapshots/e5619578fd528afa0bf88d8fae37748336a57fa2/open_clip_pytorch_model.bin).


In [5]:
doc_dataset, item_ID = get_dataset(args, tokenizer, preprocess)
logging.info(f"Number of document rows: {len(doc_dataset):,}")


2025-01-13,21:10:36 | INFO | Loading dataset from huggingface.
D2: <dict object at 0x7f5f50364aa0>
T4: <class 'datasets.data_files.DataFilesDict'>
# T4
D2: <dict object at 0x7f5f62d164b0>
T4: <class 'datasets.data_files.DataFilesList'>
# T4
T4: <class 'datasets.data_files.Url'>
# T4
D2: <dict object at 0x7f5f62d1a870>
# D2
D2: <dict object at 0x7f5f62d1a550>
# D2
# D2
# D2
2025-01-13,21:10:37 | INFO | Using custom data configuration AbhishekSureddy--uniqlo_curated_100-72efc4f3c56b5e50
2025-01-13,21:10:37 | INFO | Overwrite dataset info from restored data version if exists.
2025-01-13,21:10:37 | INFO | Loading Dataset info from /home/abhisheksureddy/.cache/huggingface/datasets/AbhishekSureddy___parquet/AbhishekSureddy--uniqlo_curated_100-72efc4f3c56b5e50/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7
2025-01-13,21:10:37 | INFO | Loading Dataset info from /home/abhisheksureddy/.cache/huggingface/datasets/AbhishekSureddy___parquet/AbhishekSureddy--uniqlo_curated_10

  0%|          | 0/2 [00:00<?, ?it/s]

T4: <class 'data.utils.Transform'>
# T4
D2: <dict object at 0x7f5f53044a00>
T4: <class 'open_clip.tokenizer.HFTokenizer'>
# T4
D2: <dict object at 0x7f5f5035bd70>
T4: <class 'transformers.models.t5.tokenization_t5_fast.T5TokenizerFast'>
# T4
D2: <dict object at 0x7f5f50369a00>
T4: <class 'tokenizers.Tokenizer'>
# T4
T4: <class 'tokenizers.models.Model'>
# T4
D2: <dict object at 0x7f5f50369af0>
D2: <dict object at 0x7f5f50369b40>
D2: <dict object at 0x7f5f50369b90>
# D2
D2: <dict object at 0x7f5f50369be0>
# D2
D2: <dict object at 0x7f5f50369cd0>
# D2
D2: <dict object at 0x7f5f50369d20>
# D2
D2: <dict object at 0x7f5f50369d70>
# D2
D2: <dict object at 0x7f5f50369dc0>
# D2
D2: <dict object at 0x7f5f50369eb0>
# D2
D2: <dict object at 0x7f5f50369f00>
# D2
D2: <dict object at 0x7f5f50369f50>
# D2
D2: <dict object at 0x7f5f50369fa0>
# D2
D2: <dict object at 0x7f5f50370050>
# D2
D2: <dict object at 0x7f5f503700a0>
# D2
D2: <dict object at 0x7f5f503700f0>
# D2
D2: <dict object at 0x7f5f50370140

In [6]:
if not os.path.isfile(args.embeddings_path) or args.overwrite_embeddings:
    logging.info("Getting embeddings of documents")
    embeddings = get_embeddings(model, doc_dataset, args)
    torch.save(embeddings, args.embeddings_path)
else:
    logging.info("Loading embeddings of documents")
    embeddings = torch.load(args.embeddings_path)

2025-01-13,21:10:37 | INFO | Loading embeddings of documents


### Metrics for Uniqlo

In [11]:
# Run tasks for curated uniqlo
# The metrics structure is slightly different here
metric_ways = ["Collar", "Color", "Material", "Sleeve", "Texture/Pattern", "Type"]
for task in args.dataset_config["tasks"]:
    task_dir = os.path.join(args.output_dir, task['name'])
    if not os.path.exists(task_dir):
        os.makedirs(task_dir, exist_ok=True)
    logging.info(f'Task: {json.dumps(task, indent=4)}')
    for metric_way in metric_ways:
        for query_col in task["query_col"]:
            gt_dir = os.path.join(args.data_dir, "uniqlo_curated", metric_way)
            gt_results_path = os.path.join(gt_dir, f"ground_truth_{query_col}-{'+'.join(['image'])}.json")
            print(gt_results_path)
            assert os.path.exists(gt_results_path)

            # Ground-truth query-doc
            logging.info("Loading ground truth")
            with open(gt_results_path, "r") as f:
                gt_results = json.load(f)
                test_queries = list(gt_results.keys()) # randomly sampled queries (up to 2000)
            
            # Running retrieval
            retrieval_path = os.path.join(task_dir, f"{metric_way.replace('/','_')}_retrieved_{query_col}-{'+'.join(task['doc_col'])}.json")
            if os.path.exists(retrieval_path) and not args.overwrite_retrieval:
                logging.info("Loading retrieval")
                with open(retrieval_path, "r") as f:
                    retrieval_results = json.load(f)
            else:
                logging.info("Running retrieval")
                if len(task['doc_col'])==1:
                    doc_embeddings = embeddings[task['doc_col'][0]].to(args.device)
                else:
                    assert ('doc_weights' in task and len(task['doc_weights'])==len(task['doc_col'])), \
                        "Must provide the same number of weights for multi-field documents as the number of multi-fields."
                    doc_embeddings = F.normalize(torch.stack([w*embeddings[c] for c, w in zip(task['doc_col'], task['doc_weights'])], dim=1).sum(1), dim=-1).to(args.device)
                retrieval_results = run_retrieval(test_queries, item_ID, doc_embeddings, tokenizer, model, max(args.Ks), args)
                with open(retrieval_path, "w") as f:
                    json.dump(retrieval_results, f, indent=4)

            # Evaluation Starts
            logging.info("Evaluation Starts")
            output_results = evaluate_retrieval(gt_results, retrieval_results, args)
            output_json = os.path.join(task_dir, f"result_{query_col}-{'+'.join(task['doc_col'])}.json")
            output_json_dict = json.dumps(output_results, indent=4)
            logging.info(output_json_dict)
            with open(output_json, 'w') as f:
                f.write(output_json_dict)

2025-01-13,21:29:10 | INFO | Task: {
    "name": "image-to-image",
    "query_col": [
        "image"
    ],
    "doc_col": [
        "image"
    ]
}
2025-01-13,21:29:10 | INFO | Loading ground truth
2025-01-13,21:29:10 | INFO | Loading retrieval
2025-01-13,21:29:10 | INFO | Evaluation Starts
2025-01-13,21:29:10 | INFO | For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | NDCG@1: 0.0000
2025-01-13,21:29:10 | INFO | NDCG@10: 0.0000
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | MAP@1: 0.0000
2025-01-13,21:29:10 | INFO | MAP@10: 0.0000
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | Recall@1: 0.0000
2025-01-13,21:29:10 | INFO | Recall@10: 0.0000
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | P@1: 0.0000
2025-01-13,21:29:10 | INFO | P@10: 0.0000
2025-01-13,21:29:10 | INFO | {
    "mAP": {
        "MAP@1": 0

./data/uniqlo_curated/Collar/ground_truth_image-image.json
./data/uniqlo_curated/Color/ground_truth_image-image.json
./data/uniqlo_curated/Material/ground_truth_image-image.json
./data/uniqlo_curated/Sleeve/ground_truth_image-image.json
./data/uniqlo_curated/Texture/Pattern/ground_truth_image-image.json


100%|██████████| 5/5 [00:00<00:00, 65.41it/s]
2025-01-13,21:29:10 | INFO | Evaluation Starts
2025-01-13,21:29:10 | INFO | For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | NDCG@1: 0.2000
2025-01-13,21:29:10 | INFO | NDCG@10: 0.0772
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | MAP@1: 0.0250
2025-01-13,21:29:10 | INFO | MAP@10: 0.0315
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | Recall@1: 0.0250
2025-01-13,21:29:10 | INFO | Recall@10: 0.0650
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | P@1: 0.2000
2025-01-13,21:29:10 | INFO | P@10: 0.0600
2025-01-13,21:29:10 | INFO | {
    "mAP": {
        "MAP@1": 0.025,
        "MAP@10": 0.0315
    },
    "ndcg": {
        "NDCG@1": 0.2,
        "NDCG@10": 0.0772
    },
    "precision": {
        "P@1": 0.2,
        "P@10": 0.06
    },
    "recall": {
        "R

./data/uniqlo_curated/Type/ground_truth_image-image.json


100%|██████████| 5/5 [00:00<00:00, 66.50it/s]
2025-01-13,21:29:10 | INFO | Evaluation Starts
2025-01-13,21:29:10 | INFO | For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | NDCG@1: 0.2000
2025-01-13,21:29:10 | INFO | NDCG@10: 0.0440
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | MAP@1: 0.0143
2025-01-13,21:29:10 | INFO | MAP@10: 0.0143
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | Recall@1: 0.0143
2025-01-13,21:29:10 | INFO | Recall@10: 0.0143
2025-01-13,21:29:10 | INFO | 

2025-01-13,21:29:10 | INFO | P@1: 0.2000
2025-01-13,21:29:10 | INFO | P@10: 0.0200
2025-01-13,21:29:10 | INFO | {
    "mAP": {
        "MAP@1": 0.01429,
        "MAP@10": 0.01429
    },
    "ndcg": {
        "NDCG@1": 0.2,
        "NDCG@10": 0.04402
    },
    "precision": {
        "P@1": 0.2,
        "P@10": 0.02
    },
    "recall": {
      

./data/uniqlo_curated/Collar/ground_truth_image-image.json


100%|██████████| 5/5 [00:00<00:00, 68.23it/s]
2025-01-13,21:29:11 | INFO | Evaluation Starts
2025-01-13,21:29:11 | INFO | For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | NDCG@1: 0.0000
2025-01-13,21:29:11 | INFO | NDCG@10: 0.0000
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | MAP@1: 0.0000
2025-01-13,21:29:11 | INFO | MAP@10: 0.0000
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | Recall@1: 0.0000
2025-01-13,21:29:11 | INFO | Recall@10: 0.0000
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | P@1: 0.0000
2025-01-13,21:29:11 | INFO | P@10: 0.0000
2025-01-13,21:29:11 | INFO | {
    "mAP": {
        "MAP@1": 0.0,
        "MAP@10": 0.0
    },
    "ndcg": {
        "NDCG@1": 0.0,
        "NDCG@10": 0.0
    },
    "precision": {
        "P@1": 0.0,
        "P@10": 0.0
    },
    "recall": {
        "Recall@1":

./data/uniqlo_curated/Color/ground_truth_image-image.json


100%|██████████| 5/5 [00:00<00:00, 70.28it/s]
2025-01-13,21:29:11 | INFO | Evaluation Starts
2025-01-13,21:29:11 | INFO | For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | NDCG@1: 0.0000
2025-01-13,21:29:11 | INFO | NDCG@10: 0.0655
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | MAP@1: 0.0000
2025-01-13,21:29:11 | INFO | MAP@10: 0.0264
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | Recall@1: 0.0000
2025-01-13,21:29:11 | INFO | Recall@10: 0.0582
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | P@1: 0.0000
2025-01-13,21:29:11 | INFO | P@10: 0.0600
2025-01-13,21:29:11 | INFO | {
    "mAP": {
        "MAP@1": 0.0,
        "MAP@10": 0.02636
    },
    "ndcg": {
        "NDCG@1": 0.0,
        "NDCG@10": 0.06546
    },
    "precision": {
        "P@1": 0.0,
        "P@10": 0.06
    },
    "recall": {
        "R

./data/uniqlo_curated/Material/ground_truth_image-image.json


100%|██████████| 5/5 [00:00<00:00, 71.16it/s]
2025-01-13,21:29:11 | INFO | Evaluation Starts
2025-01-13,21:29:11 | INFO | For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | NDCG@1: 0.0000
2025-01-13,21:29:11 | INFO | NDCG@10: 0.0795
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | MAP@1: 0.0000
2025-01-13,21:29:11 | INFO | MAP@10: 0.0227
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | Recall@1: 0.0000
2025-01-13,21:29:11 | INFO | Recall@10: 0.0630
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | P@1: 0.0000
2025-01-13,21:29:11 | INFO | P@10: 0.0800
2025-01-13,21:29:11 | INFO | {
    "mAP": {
        "MAP@1": 0.0,
        "MAP@10": 0.02267
    },
    "ndcg": {
        "NDCG@1": 0.0,
        "NDCG@10": 0.07953
    },
    "precision": {
        "P@1": 0.0,
        "P@10": 0.08
    },
    "recall": {
        "R

./data/uniqlo_curated/Sleeve/ground_truth_image-image.json


100%|██████████| 5/5 [00:00<00:00, 71.26it/s]
2025-01-13,21:29:11 | INFO | Evaluation Starts
2025-01-13,21:29:11 | INFO | For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | NDCG@1: 0.0000
2025-01-13,21:29:11 | INFO | NDCG@10: 0.0000
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | MAP@1: 0.0000
2025-01-13,21:29:11 | INFO | MAP@10: 0.0000
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | Recall@1: 0.0000
2025-01-13,21:29:11 | INFO | Recall@10: 0.0000
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | P@1: 0.0000
2025-01-13,21:29:11 | INFO | P@10: 0.0000
2025-01-13,21:29:11 | INFO | {
    "mAP": {
        "MAP@1": 0.0,
        "MAP@10": 0.0
    },
    "ndcg": {
        "NDCG@1": 0.0,
        "NDCG@10": 0.0
    },
    "precision": {
        "P@1": 0.0,
        "P@10": 0.0
    },
    "recall": {
        "Recall@1":

./data/uniqlo_curated/Texture/Pattern/ground_truth_image-image.json


100%|██████████| 5/5 [00:00<00:00, 71.81it/s]
2025-01-13,21:29:11 | INFO | Evaluation Starts
2025-01-13,21:29:11 | INFO | For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | NDCG@1: 0.0000
2025-01-13,21:29:11 | INFO | NDCG@10: 0.0856
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | MAP@1: 0.0000
2025-01-13,21:29:11 | INFO | MAP@10: 0.0322
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | Recall@1: 0.0000
2025-01-13,21:29:11 | INFO | Recall@10: 0.0900
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | P@1: 0.0000
2025-01-13,21:29:11 | INFO | P@10: 0.0800
2025-01-13,21:29:11 | INFO | {
    "mAP": {
        "MAP@1": 0.0,
        "MAP@10": 0.03217
    },
    "ndcg": {
        "NDCG@1": 0.0,
        "NDCG@10": 0.08558
    },
    "precision": {
        "P@1": 0.0,
        "P@10": 0.08
    },
    "recall": {
        "R

./data/uniqlo_curated/Type/ground_truth_image-image.json


100%|██████████| 5/5 [00:00<00:00, 72.47it/s]
2025-01-13,21:29:11 | INFO | Evaluation Starts
2025-01-13,21:29:11 | INFO | For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | NDCG@1: 0.0000
2025-01-13,21:29:11 | INFO | NDCG@10: 0.0405
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | MAP@1: 0.0000
2025-01-13,21:29:11 | INFO | MAP@10: 0.0100
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | Recall@1: 0.0000
2025-01-13,21:29:11 | INFO | Recall@10: 0.0286
2025-01-13,21:29:11 | INFO | 

2025-01-13,21:29:11 | INFO | P@1: 0.0000
2025-01-13,21:29:11 | INFO | P@10: 0.0400
2025-01-13,21:29:11 | INFO | {
    "mAP": {
        "MAP@1": 0.0,
        "MAP@10": 0.01
    },
    "ndcg": {
        "NDCG@1": 0.0,
        "NDCG@10": 0.0405
    },
    "precision": {
        "P@1": 0.0,
        "P@10": 0.04
    },
    "recall": {
        "Recal

### Metrics for other datasets

In [8]:
# Run tasks
for task in args.dataset_config["tasks"]:
    task_dir = os.path.join(args.output_dir, task['name'])
    if not os.path.exists(task_dir):
        os.makedirs(task_dir, exist_ok=True)
    logging.info(f'Task: {json.dumps(task, indent=4)}')

    for query_col in task["query_col"]:
        gt_dir = os.path.join(args.data_dir, args.dataset_config["name"], 'gt_query_doc')
        gt_results_path = os.path.join(gt_dir, f"ground_truth_{query_col}-{'+'.join(task['doc_col'])}.json")
        assert os.path.exists(gt_results_path)

        # Ground-truth query-doc
        logging.info("Loading ground truth")
        with open(gt_results_path, "r") as f:
            gt_results = json.load(f)
            test_queries = list(gt_results.keys()) # randomly sampled queries (up to 2000)
        
        # Running retrieval
        retrieval_path = os.path.join(task_dir, f"retrieved_{query_col}-{'+'.join(task['doc_col'])}.json")
        if os.path.exists(retrieval_path) and not args.overwrite_retrieval:
            logging.info("Loading retrieval")
            with open(retrieval_path, "r") as f:
                retrieval_results = json.load(f)
        else:
            logging.info("Running retrieval")
            if len(task['doc_col'])==1:
                doc_embeddings = embeddings[task['doc_col'][0]].to(args.device)
            else:
                assert ('doc_weights' in task and len(task['doc_weights'])==len(task['doc_col'])), \
                    "Must provide the same number of weights for multi-field documents as the number of multi-fields."
                doc_embeddings = F.normalize(torch.stack([w*embeddings[c] for c, w in zip(task['doc_col'], task['doc_weights'])], dim=1).sum(1), dim=-1).to(args.device)
            retrieval_results = run_retrieval(test_queries, item_ID, doc_embeddings, tokenizer, model, max(args.Ks), args)
            with open(retrieval_path, "w") as f:
                json.dump(retrieval_results, f, indent=4)

        # Evaluation Starts
        logging.info("Evaluation Starts")
        output_results = evaluate_retrieval(gt_results, retrieval_results, args)
        output_json = os.path.join(task_dir, f"result_{query_col}-{'+'.join(task['doc_col'])}.json")
        output_json_dict = json.dumps(output_results, indent=4)
        logging.info(output_json_dict)
        with open(output_json, 'w') as f:
            f.write(output_json_dict)

2025-01-07,00:07:58 | INFO | Task: {
    "name": "text-to-image",
    "query_col": [
        "text"
    ],
    "doc_col": [
        "image"
    ]
}
2025-01-07,00:07:58 | INFO | Loading ground truth
2025-01-07,00:07:58 | INFO | Running retrieval
100%|██████████| 2000/2000 [01:32<00:00, 21.56it/s]
2025-01-07,00:09:31 | INFO | Evaluation Starts
2025-01-07,00:09:31 | INFO | For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2025-01-07,00:09:31 | INFO | 

2025-01-07,00:09:31 | INFO | NDCG@1: 0.0630
2025-01-07,00:09:31 | INFO | NDCG@10: 0.1365
2025-01-07,00:09:31 | INFO | 

2025-01-07,00:09:31 | INFO | MAP@1: 0.0624
2025-01-07,00:09:31 | INFO | MAP@10: 0.1075
2025-01-07,00:09:31 | INFO | 

2025-01-07,00:09:31 | INFO | Recall@1: 0.0624
2025-01-07,00:09:31 | INFO | Recall@10: 0.2297
2025-01-07,00:09:31 | INFO | 

2025-01-07,00:09:31 | INFO | P@1: 0.0630
2025-01-07,00:09:31 | INFO | P@10: 0.0238
2025-01-07,0