In [1]:
import json
import os
import subprocess

# Function to load and display JSON results
def load_and_print_results(file_path):
    if os.path.exists(file_path):
        # Read the file
        with open(file_path) as f:
            # Read each line and decode it as a separate JSON object
            for line in f:
                try:
                    data = json.loads(line)
                    print(data)
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON: {e}")
                    
    else:
        print(f"No results found at {file_path}")

# Function to run a command and print output
def run_command(command):
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    print(result.stdout)
    if result.stderr:
        print(result.stderr)


# Zero-shot cross-modal retrieval

In [2]:
command = (
    "python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    "--dataset 'bookset_retrieval' 'pubmed_retrieval' "
    "--task 'zeroshot_retrieval' "
    "--batch_size 512 "
    "--num_workers 16 "
    "--seed 42 "
    "--recall_k 1 10 50 "
    "--dataset_root '' "
    "--output './results/benchmark_mm_retrieval.json'"
)
run_command(command)

Models: [['musk_large_patch16_384', '../musk/models/musk.pth']]
Datasets: ['bookset_retrieval', 'pubmed_retrieval']
Languages: ['en']
Running 'zeroshot_retrieval' on 'bookset_retrieval' with the model '../musk/models/musk.pth' on language 'en'
Load ckpt from ../musk/models/musk.pth
Weights from pretrained model not used in MUSK: ['logit_scale_moco']
Load 4265 image-text pairs from /mnt/sdd/data/downstreams/arch/books_set/books_set_retrieval.test.jsonl. 
Dataset size: 4265
Dataset split: test
Dump results to: ./results/benchmark_mm_retrieval.json
Running 'zeroshot_retrieval' on 'pubmed_retrieval' with the model '../musk/models/musk.pth' on language 'en'
Load ckpt from ../musk/models/musk.pth
Weights from pretrained model not used in MUSK: ['logit_scale_moco']
Load 3308 image-text pairs from /mnt/sdd/data/downstreams/arch/pubmed_set/pubmed_set_retrieval.test.jsonl. 
Dataset size: 3308
Dataset split: test
Dump results to: ./results/benchmark_mm_retrieval.json


0it [00:00, ?it/s]
1it [00:

In [3]:
load_and_print_results('./results/benchmark_mm_retrieval.json')

{'dataset': 'bookset_retrieval', 'model': 'musk_large_patch16_384', 'pretrained': '../musk/models/musk.pth', 'task': 'zeroshot_retrieval', 'metrics': {'image_retrieval_recall@1': 0.09964829683303833, 'text_retrieval_recall@1': 0.09425556659698486, 'image_retrieval_recall@10': 0.3355216979980469, 'text_retrieval_recall@10': 0.31254395842552185, 'image_retrieval_recall@50': 0.5767878293991089, 'text_retrieval_recall@50': 0.5432590842247009}, 'language': 'en'}
{'dataset': 'pubmed_retrieval', 'model': 'musk_large_patch16_384', 'pretrained': '../musk/models/musk.pth', 'task': 'zeroshot_retrieval', 'metrics': {'image_retrieval_recall@1': 0.17593711614608765, 'text_retrieval_recall@1': 0.19588875770568848, 'image_retrieval_recall@10': 0.40477630496025085, 'text_retrieval_recall@10': 0.42019346356391907, 'image_retrieval_recall@50': 0.6109431385993958, 'text_retrieval_recall@50': 0.6281741261482239}, 'language': 'en'}


# Few-shot classification

In [None]:
# Few-shot linear probe commands
seed = 42
k_shot = 10

command = (
    f"python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    f"--dataset 'nct_crc' 'pcam' 'skin' 'sicap' 'pannuke' 'unitopatho' 'wsss4luad' 'osteo' 'lc25' 'renal_cell' 'bracs6cls' 'bracs3cls' "
    f"--task 'linear_probe' "
    f"--batch_size 512 "
    f"--num_workers 16 "
    f"--fewshot_k {k_shot} "
    f"--seed {seed} "
    f"--dataset_root '' "
    f"--output './results/benchmark_fs_{k_shot}shot_seed{seed}.json'"
)

run_command(command)


In [None]:
load_and_print_results(f'./results/benchmark_fs_{k_shot}shot_seed{seed}.json')

# Linear probe classification

In [None]:
# Linear probe commands
seed = 123

command = (
    f"python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    f"--dataset 'nct_crc' 'pcam' 'skin' 'sicap' 'pannuke' 'unitopatho' 'wsss4luad' 'osteo' 'lc25' 'renal_cell' 'bracs6cls' 'bracs3cls' "
    f"--task 'linear_probe' "
    f"--batch_size 1024 "
    f"--num_workers 8 "
    f"--fewshot_k -1 "
    f"--seed {seed} "
    f"--dataset_root '' "
    f"--output './results/linear_probe_{seed}.json'"
)

run_command(command)


In [None]:
load_and_print_results(f'./results/linear_probe_{seed}.json')

# Image-image retrieval

In [None]:
command = (
    "python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    "--dataset 'unitopatho_retrieval' 'bracs_retrieval' "
    "--task 'image_retrieval' "
    "--batch_size 512 "
    "--num_workers 16 "
    "--seed 41 "
    "--dataset_root '' "
    "--output './results/benchmark_image_retrieval.json'"
)
run_command(command)


In [None]:
load_and_print_results('./results/benchmark_image_retrieval.json')

# Zero-shot classification

In [None]:
command = (
    "python3 -m clip_benchmark.cli eval --pretrained_model models.txt "
    "--dataset 'nct_crc' 'pcam' 'skin' 'sicap' 'pannuke' 'unitopatho' 'wsss4luad' 'osteo' 'lc25' 'renal_cell' 'bracs6cls' 'bracs3cls' "
    "--task 'zeroshot_classification' "
    "--batch_size 512 "
    "--dataset_root '' "
    "--output './results/benchmark_zeroshot_cls.json'"
)
run_command(command)


In [None]:
load_and_print_results('./results/benchmark_zeroshot_cls.json')