In [19]:
import numpy as np
import os
import json
import csv
import pandas as pd

base_score_dir = '/home/ubuntu/MMSci/mmsci-exps/eval/eval_scores/image_caption_generation/textgen'
target_score_dir = '/home/ubuntu/MMSci/mmsci-exps/eval/eval_scores/image_caption_generation'
csv_dir = os.path.join(target_score_dir, 'csv')

if not os.path.exists(csv_dir):
    os.makedirs(csv_dir, exist_ok=True)

In [20]:
model_list = ['blip2', 'kosmos2', 'qwen', 'llava', 'llava-next', 'llava-next-mistral', 'gpt-4-turbo', 'gpt-4o', 'llava-next-mmsci']
metric_list = ['BLEU-1', 'BLEU-2', 'BLEU-3', 'BLEU-4', 'METEOR', 'rougeL', 'BERTSCORE', 'CLIPScore', 'RefCLIPScore']
fields = ['model']
for metric in metric_list:
    fields.extend([f'{metric} [mean]', f'{metric} [std]'])

for w_abs in [False, True]:
    for w_ctx in [False, True]:
        if w_abs and w_ctx:
            continue
        tag = f'abs{w_abs}_ctx{w_ctx}'

        with open(os.path.join(csv_dir, f'{tag}.csv'), 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fields)
            writer.writeheader()
            score_dict = []

            for model in model_list:
                filename = os.path.join(base_score_dir, tag, f'{model}.json')
                if not os.path.exists(filename):
                    continue
                all_scores = json.load(open(filename))
                info = {'model': model}
                for metric in metric_list:
                    if metric in all_scores:
                        scores = all_scores[metric]
                        mean = np.mean(scores)
                        std = np.std(scores)
                        info[f'{metric} [mean]'] = mean
                        info[f'{metric} [std]'] = std
                    else:
                        info[f'{metric} [mean]'] = 0.0
                        info[f'{metric} [std]'] = 0.0
                score_dict.append(info)

            writer.writerows(score_dict)



In [21]:
w_abs = True
w_ctx = False

tag = f'abs{w_abs}_ctx{w_ctx}'

# Load the CSV file
cap_score_path = os.path.join(target_score_dir, "csv", f'{tag}.csv')
df = pd.read_csv(cap_score_path)
print(df.columns)

Index(['model', 'BLEU-1 [mean]', 'BLEU-1 [std]', 'BLEU-2 [mean]',
       'BLEU-2 [std]', 'BLEU-3 [mean]', 'BLEU-3 [std]', 'BLEU-4 [mean]',
       'BLEU-4 [std]', 'METEOR [mean]', 'METEOR [std]', 'rougeL [mean]',
       'rougeL [std]', 'BERTSCORE [mean]', 'BERTSCORE [std]',
       'CLIPScore [mean]', 'CLIPScore [std]', 'RefCLIPScore [mean]',
       'RefCLIPScore [std]'],
      dtype='object')


In [24]:
# Filter columns that contain "[mean]"
# filtered_columns = df.columns
filtered_columns = [col for col in df.columns if "[mean]" in col or "model" in col]

# Check if the string is in the 'Name' column (case-sensitive)
filtered_df = df[filtered_columns]

numeric_cols = filtered_df.select_dtypes(include='number').columns
filtered_df[numeric_cols] = filtered_df[numeric_cols].applymap(lambda x: x * 100)
filtered_df[numeric_cols] = filtered_df[numeric_cols].round(2)

# Print the filtered DataFrame
print(filtered_df)

                model  BLEU-1 [mean]  BLEU-2 [mean]  BLEU-3 [mean]  \
0               blip2          32.88           4.18           0.45   
1             kosmos2          22.28           2.91           0.61   
2                qwen          38.27           8.75           2.22   
3               llava          30.78           4.50           0.66   
4          llava-next          19.79           3.70           0.68   
5  llava-next-mistral          19.50           3.95           0.76   
6         gpt-4-turbo          22.95           5.63           1.56   
7              gpt-4o          21.06           5.58           1.76   
8    llava-next-mmsci          45.89          16.96           8.12   

   BLEU-4 [mean]  METEOR [mean]  rougeL [mean]  BERTSCORE [mean]  \
0           0.09           7.32           9.14             79.72   
1           0.20          19.50          11.81             79.09   
2           0.70          16.02          15.38             81.87   
3           0.18          1

  filtered_df[numeric_cols] = filtered_df[numeric_cols].applymap(lambda x: x * 100)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df[numeric_cols] = filtered_df[numeric_cols].applymap(lambda x: x * 100)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df[numeric_cols] = filtered_df[numeric_cols].round(2)
