In [6]:
import json
from transformers import pipeline
import pandas as pd

# Helper function to generate result path dictionaries concisely
def make_result_path_dict(model, task_dict):
    d = {"baseline1": f"output/{model}/n/baseline_1/result.jsonl"}
    for variants in task_dict.values():
        for key, subpath in variants.items():
            d[key] = f"output/{model}/n/{subpath}/result.jsonl"
    return d

# llava dictionaries
llava_sentiment_result_path_dictionary = make_result_path_dict(
    "llava",
    {"Sentiment": {
        "Negative_baseline2": "Sentiment/Negative/baseline_2",
        "Positive_baseline2": "Sentiment/Positive/baseline_2",
        "Neutral_baseline2": "Sentiment/Neutral/baseline_2",
        "Negative_our": "Sentiment/Negative/constrained_eps_32_batch_8",
        "Positive_our": "Sentiment/Positive/constrained_eps_32_batch_8",
        "Neutral_our": "Sentiment/Neutral/constrained_eps_32_batch_8",
    }}
)

llava_formality_result_path_dictionary = make_result_path_dict(
    "llava",
    {"Formality": {
        "formal_baseline2": "Formality/Formal/baseline_2",
        "informal_baseline2": "Formality/Informal/baseline_2",
        "formal_our": "Formality/Formal/constrained_eps_32_batch_8",
        "informal_our": "Formality/Informal/constrained_eps_32_batch_8",
    }}
)

llava_politics_result_path_dictionary = make_result_path_dict(
    "llava",
    {"Politics": {
        "left_baseline2": "Politics/Left/baseline_2",
        "right_baseline2": "Politics/Right/baseline_2",
        "left_our": "Politics/Left/constrained_eps_32_batch_8",
        "right_our": "Politics/Right/constrained_eps_32_batch_8",
    }}
)

llava_language_result_path_dictionary = make_result_path_dict(
    "llava",
    {"Language": {
        "English_baseline2": "Language/English/baseline_2",
        "French_baseline2": "Language/French/baseline_2",
        "Spanish_baseline2": "Language/Spanish/baseline_2",
        "English_our": "Language/English/constrained_eps_32_batch_8",
        "French_our": "Language/French/constrained_eps_32_batch_8",
        "Spanish_our": "Language/Spanish/constrained_eps_32_batch_8",
    }}
)

# minigpt4 dictionaries
minigpt4_sentiment_result_path_dictionary = make_result_path_dict(
    "minigpt4",
    {"Sentiment": {
        "Negative_baseline2": "Sentiment/Negative/baseline_2",
        "Positive_baseline2": "Sentiment/Positive/baseline_2",
        "Neutral_baseline2": "Sentiment/Neutral/baseline_2",
        "Negative_our": "Sentiment/Negative/constrained_eps_32_batch_8",
        "Positive_our": "Sentiment/Positive/constrained_eps_32_batch_8",
        "Neutral_our": "Sentiment/Neutral/constrained_eps_32_batch_8",
    }}
)

minigpt4_formality_result_path_dictionary = make_result_path_dict(
    "minigpt4",
    {"Formality": {
        "formal_baseline2": "Formality/Formal/baseline_2",
        "informal_baseline2": "Formality/Informal/baseline_2",
        "formal_our": "Formality/Formal/constrained_eps_32_batch_8",
        "informal_our": "Formality/Informal/constrained_eps_32_batch_8",
    }}
)

minigpt4_politics_result_path_dictionary = make_result_path_dict(
    "minigpt4",
    {"Politics": {
        "left_baseline2": "Politics/Left/baseline_2",
        "right_baseline2": "Politics/Right/baseline_2",
        "left_our": "Politics/Left/constrained_eps_32_batch_8",
        "right_our": "Politics/Right/constrained_eps_32_batch_8",
    }}
)

minigpt4_language_result_path_dictionary = make_result_path_dict(
    "minigpt4",
    {"Language": {
        "English_baseline2": "Language/English/baseline_2",
        "French_baseline2": "Language/French/baseline_2",
        "Spanish_baseline2": "Language/Spanish/baseline_2",
        "English_our": "Language/English/constrained_eps_32_batch_8",
        "French_our": "Language/French/constrained_eps_32_batch_8",
        "Spanish_our": "Language/Spanish/constrained_eps_32_batch_8",
    }}
)

In [3]:
from transformers import pipeline
import json

def evaluate_result(result_path, labels):
    output_list = []
    with open(result_path, 'r') as file:
        for line in file:
            json_line = json.loads(line)
            if 'output' in json_line:
                output_list.append(json_line['output'])
    scores = {}
    label_count = {}
    for label in labels:
        if label not in scores:
            scores[label] = []
            label_count[label] = 0
    for o in output_list:
        highest_score = 0
        highest_label = ''
        pipeline_output = pipeline(o)
        for item in pipeline_output[0]:
            for label in labels:
                if item['label'] == label:
                    scores[label].append(item['score'])
                    highest_score = max(highest_score, item['score'])
                    if highest_score == item['score']:
                        highest_label = label
        label_count[highest_label] += 1
    results = {}
    for key in scores:
        if key in label_count:
            if key not in results:
                results[key] = {}
            results[key]["label_num"] = round(label_count[key]/len(output_list), 3)
            results[key]["score"] = round(sum(scores[key])/len(scores[key]), 3)
    return results

models = {
    "Sentiment": {"model": "cardiffnlp/twitter-roberta-base-sentiment-latest", "label_len": 3, "labels": ["negative", "neutral", "positive"]},
    "Formality": {"model": "s-nlp/roberta-base-formality-ranker", "label_len": 2, "labels": ["formal", "informal"]},
    "Language": {"model": "papluca/xlm-roberta-base-language-detection", "label_len": 3, "labels": ["en", "es","fr"]},
    "Politics": {"model": "m-newhauser/distilbert-political-tweets", "label_len": 2, "labels": ["Republican", "Democrat"]},
}

In [None]:
from transformers import pipeline
# Define the task name and the result path dictionary
# Note: Change the task_name and result_dictionary to evaluate the results for different tasks
task_name = "Sentiment"
result_dictionary=llava_sentiment_result_path_dictionary

full_result_path_dictionary = {}
model = models.get(task_name, {}).get("model")
label_len = models.get(task_name, {}).get("label_len")
labels=models.get(task_name, {}).get("labels")

# Check if the model exists for the given task
if model is None:
    print("No model found for the task:", task_name)
else:
    # Initialize the pipeline with the selected model
    pipeline = pipeline(model=model, top_k=None)

for i in range(1,10):
    inner_dict = {}
    for key, value in result_dictionary.items():
        # Replace '0' in the path with the current number i
        new_value = value.replace('/n/', f'/coco_{i}/')
        inner_dict[key] = new_value
    full_result_path_dictionary[i] = inner_dict

print(full_result_path_dictionary)
result_dict = {}

for i, result_path_dictinary in full_result_path_dictionary.items():
    print(f"> Evaluating the results for the image {i}")
    result_dict[i] = {}
    for result_name, result_path in result_path_dictinary.items():
        print(f">> Evaluating the result: {result_name}")
        result = evaluate_result(result_path, labels)
        result_dict[i][result_name] = result
        print("\n")

import pandas as pd
# Flattening the data
flattened_data = []
for i, result in result_dict.items():
  for condition, sentiments in result.items():
      flattened_data.append([
          i, 
          condition, 
          sentiments['positive']['label_num'], 
          sentiments['neutral']['label_num'], 
          sentiments['negative']['label_num']
      ])

df = pd.DataFrame(flattened_data, columns=['i','Condition', 'negative', 'neutral', 'positive'])
mean_df = df.groupby('Condition').mean().reset_index()
print(mean_df)