In [16]:
import pandas as pd
import json

In [2]:
def exact_match_accuracy(predictions, references):
    """
    Calculate exact match accuracy.
    
    :param predictions: List of predicted values.
    :param references: List of true/reference values.
    :return: Exact match accuracy as a float.
    """
    if len(predictions) != len(references):
        raise ValueError("Length of predictions and references must be the same.")

    matches = sum(p == r for p, r in zip(predictions, references))
    accuracy = matches / len(references)
    return accuracy

# Example usage
predictions = ["cat", "dog", "bird", "fish"]
references = ["cat", "dog", "fish", "fish"]

em_accuracy = exact_match_accuracy(predictions, references)
print(f"Exact Match Accuracy: {em_accuracy:.2%}")


Exact Match Accuracy: 75.00%


In [3]:
def calculate_em_accuracy(res_file):
    data = json.load(open(res_file))
    refs = [x['ground_truth'] for x in data]
    preds = [x['output'] for x in data]
    em_accuracy = exact_match_accuracy(preds, refs)
    return em_accuracy

In [6]:
res_file = "/home/asureddy_umass_edu/cs682/VILA_codes/results/keypoint_detection/mcq_style/VILA1.5-3b_random_0_shots.json"
calculate_em_accuracy(res_file)

Accuracy: 13.27%


0.13271604938271606

### Generating Csvs

In [18]:
def generate_em_accuracy_csv(models, n_shots, strategies, styles, file_template, output_dir_template):
    for style in styles:
        for strategy in strategies:
            for model in models:
                results = []
                # print(f"Processing model: {model}")
                for shot in n_shots:
                    # print(f"n-shot: {shot}")
                    res_file = file_template.format(model=model, n_shot=shot, strategy=strategy, style=style)
                    try:
                        accuracy = calculate_em_accuracy(res_file)
                        results.append({"n_shot": shot, "accuracy": accuracy})
                    except Exception as e:
                        print(f"Error processing file {res_file}: {e}")
                        results.append({"n_shot": shot, "accuracy": None})  # Handle missing data gracefully
        
                # Create a DataFrame and save it as a CSV file
                df = pd.DataFrame(results)
                output_dir = output_dir_template.format(style=style)
                output_csv_path = f"{output_dir}/coco_{model}_{strategy}.csv"
                df.to_csv(output_csv_path, index=False)
                print(f"{output_csv_path}")


In [21]:
# Example usage
models = ['VILA1.5-3b', 'VILA1.5-13b']
strategies = ["random", "rice"]
styles = ["vqa_style", "mcq_style"]
n_shots = [0,1, 2, 4]
file_template = "/home/asureddy_umass_edu/cs682/VILA_codes/results/keypoint_detection/{style}/{model}_{strategy}_{n_shot}_shots.json"
output_dir = "/home/asureddy_umass_edu/cs682/metrics_results/keypoint_detection/{style}"

generate_em_accuracy_csv(models, n_shots, strategies, styles, file_template, output_dir)

/home/asureddy_umass_edu/cs682/metrics_results/keypoint_detection/vqa_style/coco_VILA1.5-3b_random.csv
/home/asureddy_umass_edu/cs682/metrics_results/keypoint_detection/vqa_style/coco_VILA1.5-13b_random.csv
/home/asureddy_umass_edu/cs682/metrics_results/keypoint_detection/vqa_style/coco_VILA1.5-3b_rice.csv
/home/asureddy_umass_edu/cs682/metrics_results/keypoint_detection/vqa_style/coco_VILA1.5-13b_rice.csv
/home/asureddy_umass_edu/cs682/metrics_results/keypoint_detection/mcq_style/coco_VILA1.5-3b_random.csv
/home/asureddy_umass_edu/cs682/metrics_results/keypoint_detection/mcq_style/coco_VILA1.5-13b_random.csv
/home/asureddy_umass_edu/cs682/metrics_results/keypoint_detection/mcq_style/coco_VILA1.5-3b_rice.csv
/home/asureddy_umass_edu/cs682/metrics_results/keypoint_detection/mcq_style/coco_VILA1.5-13b_rice.csv


### Random Incontext examples

In [10]:
strategy= "random"
style = "mcq_style"
model_name = "VILA1.5-3b"
n_shots = [0,1,2,4,8]
print(f"{strategy}-{style}-{model_name}")
for n_shot in n_shots:
    res_file = f"/home/asureddy_umass_edu/cs682/VILA_codes/results/keypoint_detection/{style}/{model_name}_{strategy}_{n_shot}_shots.json"
    em_accuracy = calculate_em_accuracy(res_file)
    print(f"n_shot: {n_shot} Accuracy: {em_accuracy:.2%}")

random-mcq_style-VILA1.5-3b
n_shot: 0 Accuracy: 13.27%
n_shot: 1 Accuracy: 19.07%
n_shot: 2 Accuracy: 18.95%
n_shot: 4 Accuracy: 20.31%
n_shot: 8 Accuracy: 20.37%


In [11]:
strategy= "random"
style = "mcq_style"
model_name = "VILA1.5-13b"
n_shots = [0,1,2,4,8]
print(f"{strategy}-{style}-{model_name}")
for n_shot in n_shots:
    res_file = f"/home/asureddy_umass_edu/cs682/VILA_codes/results/keypoint_detection/{style}/{model_name}_{strategy}_{n_shot}_shots.json"
    em_accuracy = calculate_em_accuracy(res_file)
    print(f"n_shot: {n_shot} Accuracy: {em_accuracy:.2%}")

random-mcq_style-VILA1.5-13b
n_shot: 0 Accuracy: 5.93%
n_shot: 1 Accuracy: 17.84%
n_shot: 2 Accuracy: 20.00%
n_shot: 4 Accuracy: 19.88%
n_shot: 8 Accuracy: 18.21%


In [12]:
strategy= "random"
style = "vqa_style"
model_name = "VILA1.5-3b"
n_shots = [0,1,2,4,8]
print(f"{strategy}-{style}-{model_name}")
for n_shot in n_shots:
    res_file = f"/home/asureddy_umass_edu/cs682/VILA_codes/results/keypoint_detection/{style}/{model_name}_{strategy}_{n_shot}_shots.json"
    em_accuracy = calculate_em_accuracy(res_file)
    print(f"n_shot: {n_shot} Accuracy: {em_accuracy:.2%}")

random-vqa_style-VILA1.5-3b
n_shot: 0 Accuracy: 0.00%
n_shot: 1 Accuracy: 16.91%
n_shot: 2 Accuracy: 16.73%
n_shot: 4 Accuracy: 19.51%
n_shot: 8 Accuracy: 20.37%


In [13]:
strategy= "random"
style = "vqa_style"
model_name = "VILA1.5-13b"
n_shots = [0,1,2,4,8]
print(f"{strategy}-{style}-{model_name}")
for n_shot in n_shots:
    res_file = f"/home/asureddy_umass_edu/cs682/VILA_codes/results/keypoint_detection/{style}/{model_name}_{strategy}_{n_shot}_shots.json"
    em_accuracy = calculate_em_accuracy(res_file)
    print(f"n_shot: {n_shot} Accuracy: {em_accuracy:.2%}")

random-vqa_style-VILA1.5-13b
n_shot: 0 Accuracy: 0.12%
n_shot: 1 Accuracy: 6.36%
n_shot: 2 Accuracy: 14.94%
n_shot: 4 Accuracy: 19.69%
n_shot: 8 Accuracy: 21.91%


### RICE Incontext examples

In [5]:
strategy= "rice"
style = "mcq_style"
model_name = "VILA1.5-3b"
n_shots = [0,1,2,4]
print(f"{strategy}-{style}-{model_name}")
for n_shot in n_shots:
    res_file = f"/home/asureddy_umass_edu/cs682/VILA_codes/results/keypoint_detection/{style}/{model_name}_{strategy}_{n_shot}_shots.json"
    em_accuracy = calculate_em_accuracy(res_file)
    print(f"n_shot: {n_shot} Accuracy: {em_accuracy:.2%}")

rice-mcq_style-VILA1.5-3b
n_shot: 0 Accuracy: 12.96%
n_shot: 1 Accuracy: 20.12%
n_shot: 2 Accuracy: 21.60%
n_shot: 4 Accuracy: 22.10%


In [7]:
strategy= "rice"
style = "mcq_style"
model_name = "VILA1.5-13b"
n_shots = [0,1,2,4]
print(f"{strategy}-{style}-{model_name}")
for n_shot in n_shots:
    res_file = f"/home/asureddy_umass_edu/cs682/VILA_codes/results/keypoint_detection/{style}/{model_name}_{strategy}_{n_shot}_shots.json"
    em_accuracy = calculate_em_accuracy(res_file)
    print(f"n_shot: {n_shot} Accuracy: {em_accuracy:.2%}")

rice-mcq_style-VILA1.5-13b
n_shot: 0 Accuracy: 5.49%
n_shot: 1 Accuracy: 20.68%
n_shot: 2 Accuracy: 20.62%
n_shot: 4 Accuracy: 21.30%


In [8]:
strategy= "rice"
style = "vqa_style"
model_name = "VILA1.5-3b"
n_shots = [0,1,2,4]
print(f"{strategy}-{style}-{model_name}")
for n_shot in n_shots:
    res_file = f"/home/asureddy_umass_edu/cs682/VILA_codes/results/keypoint_detection/{style}/{model_name}_{strategy}_{n_shot}_shots.json"
    em_accuracy = calculate_em_accuracy(res_file)
    print(f"n_shot: {n_shot} Accuracy: {em_accuracy:.2%}")

rice-vqa_style-VILA1.5-3b
n_shot: 0 Accuracy: 0.00%
n_shot: 1 Accuracy: 18.21%
n_shot: 2 Accuracy: 17.72%
n_shot: 4 Accuracy: 18.64%


In [9]:
strategy= "rice"
style = "vqa_style"
model_name = "VILA1.5-13b"
n_shots = [0,1,2,4]
print(f"{strategy}-{style}-{model_name}")
for n_shot in n_shots:
    res_file = f"/home/asureddy_umass_edu/cs682/VILA_codes/results/keypoint_detection/{style}/{model_name}_{strategy}_{n_shot}_shots.json"
    em_accuracy = calculate_em_accuracy(res_file)
    print(f"n_shot: {n_shot} Accuracy: {em_accuracy:.2%}")

rice-vqa_style-VILA1.5-13b
n_shot: 0 Accuracy: 0.00%
n_shot: 1 Accuracy: 12.10%
n_shot: 2 Accuracy: 17.28%
n_shot: 4 Accuracy: 18.52%
