In [1]:
# !pip install httpx==0.24.0 transformers==4.37.2

In [2]:
# ! pip install aac_metrics

In [23]:
import json
import pandas as pd

from tqdm import tqdm
import sys
sys.path.append("/home/asureddy_umass_edu/cs682/VILA_codes/llava/eval")
from eval_datasets import CaptionDataset, FlickrCaptioningDataset
from aac_metrics.functional import cider_d, bleu_4, rouge_l
from aac_metrics.utils.tokenization import preprocess_mono_sents, preprocess_mult_sents


def get_captioning_outputs(res_file):
    with open(res_file) as f:
        data = json.load(f)
    
    model_outputs = []
    for item in data["outputs"]:
        model_outputs.append( item)
    return model_outputs

def get_metrics_captioning(hyps, refs):
    hyps = [v.replace('\n',' ') for v in hyps]
    refs = [[rf.replace('\n',' ') for rf in refss] for refss in refs]

    candidates = preprocess_mono_sents(hyps)
    mult_references = preprocess_mult_sents(refs)
    
    cider_scores, _ = cider_d(candidates, mult_references)
    bleu_scores, _  = bleu_4(candidates, mult_references)
    rouge_scores, _  = rouge_l(candidates, mult_references)
    scores = {
        'cider_d': cider_scores['cider_d'].item(),
        'bleu_4': bleu_scores['bleu_4'].item(),
        'rouge_l': rouge_scores['rouge_l'].item()
    }
    return scores

### For COCO, Flickr

In [9]:
annotations_path = "./dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
coco_dataset = CaptionDataset(image_dir_path, annotations_path)

img_dir = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/captioning/flickr8k/Images"
annotations_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/captioning/flickr8k/captions.txt"
flickr_dataset = FlickrCaptioningDataset(img_dir, annotations_path, istrain=False)


In [12]:
coco_dataset.get_item_with_idx(0)

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x427>,
 'captions': ['A man is in a kitchen making pizzas.',
  'Man in apron standing on front of oven with pans and bakeware',
  'A baker is working in the kitchen rolling dough.',
  'A person standing by a stove in a kitchen.',
  'A table with pies being made and a person standing near a wall with pots and pans hanging on the wall.'],
 'image_id': 397133}

In [10]:
flickr_dataset[0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x333>,
 'caption': 'A black-and-white dog bounds off the ground , all feet in the air , of a yellow field .',
 'captions': ['A black-and-white dog bounds off the ground , all feet in the air , of a yellow field .',
  'A black and white dog is jumping over high yellow grass .',
  'A black and white dog is jumping through a field of brown grass .',
  'a dog runs through the dry grass .',
  'The black and white dog runs through the field .'],
 'image_id': '3139895886_5a6d495b13.jpg'}

In [13]:
coco_refs = [coco_dataset.get_item_with_idx(i)['captions'] for i in range(len(coco_dataset))]
flickr_refs = [flickr_dataset[i]['captions'] for i in range(len(flickr_dataset))]

In [24]:
res_file = "/home/asureddy_umass_edu/cs682/VILA_codes/results/captioning/VILA1.5-3b_0-shot.json"
hyps = get_captioning_outputs(res_file)
get_metrics_captioning(hyps, coco_refs)

{'cider_d': 0.27376845031139796,
 'bleu_4': 0.10652897272907658,
 'rouge_l': 0.3583564963409409}

In [25]:
def generate_captioning_metrics_csv(res_file_template, n_shots, coco_refs, output_csv_path):
    # Initialize a list to store results
    results = []

    # Iterate through the number of shots and calculate metrics
    for n in n_shots:
        res_file = res_file_template.format(n_shots=n)
        hyps = get_captioning_outputs(res_file)
        metrics = get_metrics_captioning(hyps, coco_refs)
        metrics['n_shots'] = n
        results.append(metrics)

    # Create a pandas DataFrame from the results
    df = pd.DataFrame(results)

    # Save the results to a CSV file
    df.to_csv(output_csv_path, index=False)

    print(f"Results saved to {output_csv_path}")


### Saving results in csv

In [30]:
# fixed params
n_shots = [0, 2, 4, 8]
model_names = ["VILA1.5-3b", "VILA1.5-13b"]
strategy_names = ["random", "rice"]
dataset_names = ["flickr8k"]

In [32]:
# Parameters
for dataset_name in dataset_names:
    for model_name in model_names:
        for strategy in strategy_names:
            strategy_str = "_random-examples" if strategy=="random" else ""
            if dataset_name=="coco":
                dataset_str = "/home/asureddy_umass_edu/cs682/VILA_codes/results/captioning/"
                refs = coco_refs
            elif dataset_name=="flickr8k":
                dataset_str = "/home/asureddy_umass_edu/cs682/VILA_codes/results/flickr8k_captioning/"
                refs = flickr_refs
            res_file_template = dataset_str+str(model_name)+"_{n_shots}-shot"+strategy_str+".json"
            op_path = f"/home/asureddy_umass_edu/cs682/metrics_results/captioning/{dataset_name}_{model_name}_{strategy}.csv"
            generate_captioning_metrics_csv(res_file_template, n_shots, refs, op_path)


Results saved to /home/asureddy_umass_edu/cs682/metrics_results/captioning/flickr8k_VILA1.5-3b_random.csv
Results saved to /home/asureddy_umass_edu/cs682/metrics_results/captioning/flickr8k_VILA1.5-3b_rice.csv
Results saved to /home/asureddy_umass_edu/cs682/metrics_results/captioning/flickr8k_VILA1.5-13b_random.csv
Results saved to /home/asureddy_umass_edu/cs682/metrics_results/captioning/flickr8k_VILA1.5-13b_rice.csv


### Random In-context demonstrations

In [3]:
res_file = "/home/asureddy_umass_edu/cs682/flamingo/results/captioning/OpenFlamingo-3B-vitl-mpt1b_0-shot_random-examples.json"
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
scores = get_metrics_captioning(res_file, annotations_path, image_dir_path)

Corpus BLEU Score: {'bleu_4': tensor(0.0324, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.2225, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.1288, dtype=torch.float64)}


In [4]:
scores

{'cider_d': 0.1288488658011192,
 'bleu_4': 0.03241325450792217,
 'rouge_l': 0.2224781488994907}

In [3]:
res_file = "/home/asureddy_umass_edu/cs682/flamingo/results/captioning/OpenFlamingo-3B-vitl-mpt1b_1-shot_random-examples.json"
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
get_metrics_captioning(res_file, annotations_path, image_dir_path)

Corpus BLEU Score: {'bleu_4': tensor(0.0945, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.3238, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.2331, dtype=torch.float64)}


In [11]:
res_file = "/home/asureddy_umass_edu/cs682/flamingo/results/captioning/OpenFlamingo-3B-vitl-mpt1b_2-shot_random-examples.json"
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
get_metrics_captioning(res_file, annotations_path, image_dir_path)

Corpus BLEU Score: {'bleu_4': tensor(0.1307, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.3844, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.3318, dtype=torch.float64)}


In [9]:
res_file = "/home/asureddy_umass_edu/cs682/flamingo/results/captioning/OpenFlamingo-3B-vitl-mpt1b_4-shot_random-examples.json"
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
get_metrics_captioning(res_file, annotations_path, image_dir_path)

Corpus BLEU Score: {'bleu_4': tensor(0.1440, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.3961, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.3558, dtype=torch.float64)}


In [12]:
res_file = "/home/asureddy_umass_edu/cs682/flamingo/results/captioning/OpenFlamingo-3B-vitl-mpt1b_8-shot_random-examples.json"
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
get_metrics_captioning(res_file, annotations_path, image_dir_path)

Corpus BLEU Score: {'bleu_4': tensor(0.1539, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.4065, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.3749, dtype=torch.float64)}


In [5]:
# random examples
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
res_prefix = "/home/asureddy_umass_edu/cs682/flamingo/results/captioning/OpenFlamingo-3B-vitl-mpt1b_"
def make_captioning_results_csv(res_prefix, shots = [0,2,4,8], prefix="shot_random-examples.json"):
    results = []
    for shot in tqdm(shots):
        res_file = f"{res_prefix}{shot}-{prefix}"
        print(res_file)
        res = get_metrics_captioning(res_file, annotations_path, image_dir_path)
        res["shots"] = shot
        results.append(res)
    return results

In [20]:
results

[{'cider_d': 0.1288488658011192,
  'bleu_4': 0.03241325450792217,
  'rouge_l': 0.2224781488994907,
  'shots': 0},
 {'cider_d': 0.3318401536013735,
  'bleu_4': 0.13072228143017525,
  'rouge_l': 0.3844033588177514,
  'shots': 2},
 {'cider_d': 0.35578189093092394,
  'bleu_4': 0.14402223225927346,
  'rouge_l': 0.39609784709364887,
  'shots': 4},
 {'cider_d': 0.3749366642092328,
  'bleu_4': 0.15393822428375473,
  'rouge_l': 0.40647422579440007,
  'shots': 8}]

In [22]:
res_df = pd.DataFrame(results)

In [25]:
res_df.to_csv(res_prefix+"random.csv",index=False)

In [8]:
res_prefix = "/home/asureddy_umass_edu/cs682/VILA/results/captioning_detailed/VILA1.5-13b_"
results_vila = make_captioning_results_csv(res_prefix, shots = [0,2,4,8], prefix="shot_random-examples.json")

  0%|                                                                                                                                                                                 | 0/4 [00:00<?, ?it/s]

/home/asureddy_umass_edu/cs682/VILA/results/captioning_detailed/VILA1.5-13b_0-shot_random-examples.json


 25%|██████████████████████████████████████████▎                                                                                                                              | 1/4 [00:01<00:03,  1.28s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2424, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5209, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.9726, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning_detailed/VILA1.5-13b_2-shot_random-examples.json


 50%|████████████████████████████████████████████████████████████████████████████████████▌                                                                                    | 2/4 [00:02<00:02,  1.12s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2714, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5483, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.1383, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning_detailed/VILA1.5-13b_4-shot_random-examples.json


 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                          | 3/4 [00:03<00:01,  1.06s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.3424, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5656, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.2858, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning_detailed/VILA1.5-13b_8-shot_random-examples.json


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.07s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.3146, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5678, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.2519, dtype=torch.float64)}





In [7]:
res_prefix = "/home/asureddy_umass_edu/cs682/VILA/results/captioning_detailed/VILA1.5-13b_"
results_vila_rice = make_captioning_results_csv(res_prefix, shots = [0,2,4,8], prefix="shot.json")

  0%|                                                                                                                                                                                 | 0/4 [00:00<?, ?it/s]

/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_0-shot.json


 25%|██████████████████████████████████████████▎                                                                                                                              | 1/4 [00:01<00:03,  1.21s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2422, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5221, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.9784, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_2-shot.json


 50%|████████████████████████████████████████████████████████████████████████████████████▌                                                                                    | 2/4 [00:02<00:02,  1.07s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2905, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5438, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.1470, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_4-shot.json


 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                          | 3/4 [00:03<00:01,  1.05s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2852, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5404, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.2289, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_8-shot.json


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.04s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2850, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5380, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.2223, dtype=torch.float64)}





In [10]:
res_prefix = "/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_"
results_vila = make_captioning_results_csv(res_prefix, shots = [0,2,4,8], prefix="shot_random-examples.json")

  0%|                                                                                                                                                                                 | 0/4 [00:00<?, ?it/s]

/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_0-shot_random-examples.json


 25%|██████████████████████████████████████████▎                                                                                                                              | 1/4 [00:01<00:03,  1.12s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2397, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5217, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.9916, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_2-shot_random-examples.json


 50%|████████████████████████████████████████████████████████████████████████████████████▌                                                                                    | 2/4 [00:02<00:02,  1.07s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.3005, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5503, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.2073, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_4-shot_random-examples.json


 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                          | 3/4 [00:03<00:01,  1.14s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.3275, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5593, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.2553, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_8-shot_random-examples.json


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.10s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.3006, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5565, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.1904, dtype=torch.float64)}





In [9]:
res_prefix = "/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_"
results_vila_rice = make_captioning_results_csv(res_prefix, shots = [0,2,4,8], prefix="shot.json")

  0%|                                                                                                                                                                                 | 0/4 [00:00<?, ?it/s]

/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_0-shot.json


 25%|██████████████████████████████████████████▎                                                                                                                              | 1/4 [00:01<00:05,  1.80s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2366, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5180, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.9706, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_2-shot.json


 50%|████████████████████████████████████████████████████████████████████████████████████▌                                                                                    | 2/4 [00:03<00:03,  1.78s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2358, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5005, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.0061, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_4-shot.json


 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                          | 3/4 [00:05<00:01,  1.76s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2883, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5421, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.1977, dtype=torch.float64)}
/home/asureddy_umass_edu/cs682/VILA/results/captioning/VILA1.5-13b_8-shot.json


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:07<00:00,  1.75s/it]

Corpus BLEU Score: {'bleu_4': tensor(0.2762, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.5285, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(1.1604, dtype=torch.float64)}





### RICE based in-context examples

In [13]:
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"

In [14]:
for i in [1,2,4,8]:
    res_file = f"/home/asureddy_umass_edu/cs682/flamingo/results/captioning/OpenFlamingo-3B-vitl-mpt1b_{i}-shot.json"
    print(f"\n{i}-shots ICL with RICE\n")
    get_metrics_captioning(res_file, annotations_path, image_dir_path)


1-shots ICL with RICE

Corpus BLEU Score: {'bleu_4': tensor(0.1042, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.3500, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.2251, dtype=torch.float64)}

2-shots ICL with RICE

Corpus BLEU Score: {'bleu_4': tensor(0.1437, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.4019, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.3358, dtype=torch.float64)}

4-shots ICL with RICE

Corpus BLEU Score: {'bleu_4': tensor(0.1713, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.4275, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.4084, dtype=torch.float64)}

8-shots ICL with RICE

Corpus BLEU Score: {'bleu_4': tensor(0.1892, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.4402, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.4414, dtype=torch.float64)}


In [None]:
# rice based
# n-shots,bleu,rouge,cider
# 0,0.0324,0.2225,0.1288
# 1,0.1042,0.3500,0.2251
# 2,0.1437,0.4019,0.3358
# 4,0.1713,0.4275,0.4084
# 8,0.1892,0.4402,0.4414


In [None]:
# random

In [44]:
res_file = "/home/asureddy_umass_edu/cs682/flamingo/flamingo-3b-coco-one-shot.json"
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
get_metrics(res_file, annotations_path, image_dir_path)

Corpus BLEU Score: {'bleu_4': tensor(0.1012, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.3440, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.4058, dtype=torch.float64)}


In [45]:
res_file = "/home/asureddy_umass_edu/cs682/flamingo/flamingo-3b-coco-ZS.json"
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
get_metrics(res_file, annotations_path, image_dir_path)

Corpus BLEU Score: {'bleu_4': tensor(0.0532, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.2362, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.2325, dtype=torch.float64)}


In [46]:
res_file = "/home/asureddy_umass_edu/cs682/flamingo/flamingo-3b-coco-two-shot.json"
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
get_metrics(res_file, annotations_path, image_dir_path)

Corpus BLEU Score: {'bleu_4': tensor(0.0899, dtype=torch.float64)}
Corpus ROUGE Scores: {'rouge_l': tensor(0.3341, dtype=torch.float64)}
Corpus Cider Scores: {'cider_d': tensor(0.4238, dtype=torch.float64)}


In [24]:
# old
res_file = "/home/asureddy_umass_edu/cs682/flamingo/flamingo-3b-coco-ZS.json"
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
get_metrics(res_file, annotations_path, image_dir_path)

Corpus BLEU Score: 0.059729001580259705
Corpus ROUGE Scores: {'rouge1': 0.010141944522348787, 'rouge2': 0.0002675585284280936, 'rougeL': 0.010141944522348787}


In [25]:
res_file = "/home/asureddy_umass_edu/cs682/flamingo/flamingo-3b-coco-two-shot.json"
annotations_path = "../dataset/annotations/captions_val2017.json"
image_dir_path = "/scratch/workspace/asureddy_umass_edu-llm_alignment/dataset/val2017"
get_metrics(res_file, annotations_path, image_dir_path)

Corpus BLEU Score: 0.08197111291929228
Corpus ROUGE Scores: {'rouge1': 0.014435707489431725, 'rouge2': 9.020386072523904e-05, 'rougeL': 0.014435707489431725}


In [27]:
with open(res_file) as f:
    data = json.load(f)

model_outputs = {}
for k,item in data["outputs"].items():
    model_outputs[k] = item[len(data["query"]):]

dataset = CaptionDataset(image_dir_path, annotations_path)
hyps, refs = [], []
for k,v in model_outputs.items():
    hyps.append(dataset.data_dict[int(k)]["captions"][0])
    refs.append(dataset.data_dict[int(k)]["captions"])

bleu_score = compute_corpus_bleu(refs, hyps)
rouge_scores = compute_corpus_rouge(refs, hyps)

print("Corpus BLEU Score:", bleu_score)
print("Corpus ROUGE Scores:", rouge_scores)

Corpus BLEU Score: 1.0
Corpus ROUGE Scores: {'rouge1': 0.014656737642366828, 'rouge2': 0.0, 'rougeL': 0.014656737642366828}
