In [5]:
# run options
num_images = 10
device = "cuda"
quantizations = ["none", "skip_vision_tower", "full"]


In [6]:
for quantization in quantizations:
    print(f"Running Quantization-Mode: {quantization}")
    !uv run main.py -q {quantization} -n {num_images} -d {device} --save-captions

Running Quantization-Mode: none
[32mINFO[0m: Starting inference - Quantization: none, Device: cuda, Images: 10[0m
[32mINFO[0m: Using device: cuda, dtype: torch.bfloat16[0m
[32mINFO[0m: Loading model...[0m
[32mINFO[0m: Model weights loaded in 2.14s[0m
[32mINFO[0m: Model ready (total: 2.75s)[0m
[32mINFO[0m: Processing 10 images...[0m
[32mINFO[0m: Completed 10 images in 0.00s[0m
Running Quantization-Mode: skip_vision_tower
[32mINFO[0m: Starting inference - Quantization: skip_vision_tower, Device: cuda, Images: 10[0m
[32mINFO[0m: Using device: cuda, dtype: torch.float16[0m
[32mINFO[0m: Loading model with skip_vision_tower quantization...[0m
[32mINFO[0m: Model weights loaded in 8.41s[0m
[32mINFO[0m: Model ready (total: 9.04s)[0m
[32mINFO[0m: Processing 10 images...[0m
[32mINFO[0m: Completed 10 images in 0.00s[0m
Running Quantization-Mode: full
[32mINFO[0m: Starting inference - Quantization: full, Device: cuda, Images: 10[0m
[32mINFO[0m: Using d

In [7]:
from evaluation import calculate_cider_score, calculate_bleu_score, calculate_spice_score, import_data, load_metrics


results = {}

for mode in quantizations:
    print(f"Evaluating mode: {mode}")
    actual_captions, predicted_captions = import_data(mode)

    cider_mean, _ = calculate_cider_score(actual_captions, predicted_captions)
    bleu_mean, _  = calculate_bleu_score(actual_captions, predicted_captions)
    spice_mean, _ = calculate_spice_score(actual_captions, predicted_captions) 

    results[mode] = {
        "CIDEr": cider_mean,
        "BLEU-4": bleu_mean,
        "SPICE": spice_mean,
    }

print(results)  


Evaluating mode: none
{'testlen': 457, 'reflen': 130, 'guess': [457, 447, 437, 427], 'correct': [115, 43, 11, 2]}
ratio: 3.5153846153575743
SPICE could not be executed: [Errno 2] No such file or directory: 'java'
Evaluating mode: skip_vision_tower
{'testlen': 145, 'reflen': 128, 'guess': [145, 135, 125, 115], 'correct': [96, 44, 15, 6]}
ratio: 1.13281249999115
SPICE could not be executed: [Errno 2] No such file or directory: 'java'
Evaluating mode: full
{'testlen': 146, 'reflen': 125, 'guess': [146, 136, 126, 116], 'correct': [89, 39, 12, 3]}
ratio: 1.167999999990656
SPICE could not be executed: [Errno 2] No such file or directory: 'java'
{'none': {'CIDEr': 0.4686390989279722, 'BLEU-4': 0.04110209752517103, 'SPICE': None}, 'skip_vision_tower': {'CIDEr': 0.6186018373313188, 'BLEU-4': 0.19171857782265467, 'SPICE': None}, 'full': {'CIDEr': 0.6000251567565085, 'BLEU-4': 0.14404865380830967, 'SPICE': None}}


In [8]:
for mode in quantizations:
    print(f"{mode}: {load_metrics(mode)}")



none: {'peak_VRAM': 6054.29541015625, 'latency_per_image': 7.9536212682724, 'througput': 0.1256290401755195, 'model_size': 4284.425506591797}
skip_vision_tower: {'peak_VRAM': 3109.45703125, 'latency_per_image': 3.505574083328247, 'througput': 0.28478013721985734, 'model_size': 4284.425506591797}
full: {'peak_VRAM': 3203.17626953125, 'latency_per_image': 3.621748661994934, 'througput': 0.27570922470663617, 'model_size': 4284.425506591797}
