In [12]:
# 1.  Firstly, convert the initial json file to standard MS-COCO format 
#  dataset_flickr8k.json   ---->    flickr8k_test.json / flickr8k_val.json  
 
import json
import os

def read_input_file(file_path):
    with open(file_path, "r") as f:
        return json.load(f)
 
def write_output_file(file_path, data):
    output_dir = os.path.dirname(file_path)
    os.makedirs(output_dir, exist_ok=True)

    with open(file_path, "w") as f:
        json.dump(data, f, indent=4)

def convert_json(input_data, split, dataset_name):

    output_data = {
        "licenses": [],
        "info": {
            "url": "",
            "date_created": " ",
            "version": "0.1",
            "description": f"{dataset_name} {split} dataset",
            "contributor": "",
            "year": ""
        },
        "images": [],
        "annotations": [],
        "type": "captions"
    }

    for image in input_data["images"]:

        if image["split"] == split:

            output_data["images"].append({
                "file_name": image["filename"],
                "fkickr_id": image["filename"].split(".")[0],  
                "id": image["imgid"],
            })
            
            for sentence in image["sentences"]:
                output_data["annotations"].append({
                    "image_id": image["imgid"],
                    "id": sentence["sentid"],
                    "caption": sentence["raw"]
                })
    return output_data


def main(input_file_path, output_file_path, split, dataset_name):

    input_data = read_input_file(input_file_path)
    output_data = convert_json(input_data, split, dataset_name)
    write_output_file(output_file_path, output_data)
    print(f" json file is saved in {output_file_path} .")


if __name__ == "__main__":
    dataset_name =  "flickr8k" # flickr30k   flickr8k
    split = "test"  #  "train" "test" "val"
    input_file_path = f"/gemini/data-2/caption_datasets/dataset_{dataset_name}.json" 
    output_file_path = f"/gemini/output/{dataset_name}_{split}.json"  
    
    main(input_file_path, output_file_path, split, dataset_name)

 json file is saved in /gemini/output/flickr8k_test.json .


In [None]:
#2.   run the eval_flickr8k.py script to obtain the flickr8k_test_result.json.

# run as bash command
export DATA_ROOT=${GEMINI_DATA_IN2}/flickr8k_images           
export OUTPUT_ROOT=${GEMINI_DATA_OUT}
export MODEL_ROOT1=${GEMINI_PRETRAIN}
export MODEL_ROOT2=${GEMINI_PRETRAIN2}    
python eval_flickr8k.py ++model.cap_generator.decoder_name=Parallel \
    exp.checkpoint=${MODEL_ROOT2}/checkpoint_best_test.pth \
    exp.name=flickr8k_evaluation_Parallel \
    optimizer.num_workers=2 \
    exp.ngpus_per_node=2 \
    exp.world_size=2


In [21]:
#3.  evalution the flickr8k_test_result.json by eval_json_by_pycocoevalcap.py


from pycocotools.coco import COCO
from pycocoevalcap.eval import COCOEvalCap


def main(annotation_file=None, results_file=None):

    # Downloading stanford-corenlp-3.6.0, and move it to /root/miniconda3/lib/python3.9/site-packages/pycocoevalcap/spice/lib

    # create coco object and coco_result object
    coco = COCO(annotation_file)
    coco_result = coco.loadRes(results_file)

    # create coco_eval object by taking coco and coco_result
    coco_eval = COCOEvalCap(coco, coco_result)
    # evaluate on a subset of images by setting
    # coco_eval.params['image_id'] = coco_result.getImgIds()
    # please remove this line when evaluating the full validation set
    coco_eval.params['image_id'] = coco_result.getImgIds()

    # evaluate results
    # SPICE will take a few minutes the first time, but speeds up due to caching

    coco_eval.evaluate()

    # print output evaluation scores
    for metric, score in coco_eval.eval.items():
        print(f'{metric}: {score:.3f}')


if __name__ == "__main__":
    annotation_file="/gemini/output/flickr8k_test.json"
    results_file="/gemini/output/flickr8k_evaluation_Parallel/flickr8k_test_result.json"
    main(annotation_file,results_file)

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
tokenization...


PTBTokenizer tokenized 64178 tokens at 267971.51 tokens per second.
PTBTokenizer tokenized 10264 tokens at 128472.14 tokens per second.


setting up scorers...
computing Bleu score...
{'testlen': 9265, 'reflen': 9256, 'guess': [9265, 8265, 7265, 6265], 'correct': [6790, 3432, 1453, 579]}
ratio: 1.0009723422643688
Bleu_1: 0.733
Bleu_2: 0.552
Bleu_3: 0.393
Bleu_4: 0.274
computing METEOR score...
METEOR: 0.251
computing Rouge score...
ROUGE_L: 0.536
computing CIDEr score...
CIDEr: 0.785
computing SPICE score...


Parsing reference captions
Initiating Stanford parsing pipeline
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.TokenizerAnnotator - TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ssplit
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator parse
[main] INFO edu.stanford.nlp.parser.common.ParserGrammar - Loading parser from serialized file edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ... 
done [0.4 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ner
Loading classifier from edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ... done [1.1 sec].
Loading classifier from edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ... done [0.5 sec].
Loading classif

SPICE evaluation took: 35.92 s
SPICE: 0.190
Bleu_1: 0.733
Bleu_2: 0.552
Bleu_3: 0.393
Bleu_4: 0.274
METEOR: 0.251
ROUGE_L: 0.536
CIDEr: 0.785
SPICE: 0.190
