In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
from datasets import load_dataset
from mcq import mcq_utils

dataset = load_dataset('UCSD-GENIE/ClimaQA', data_files="climaqa_gold/mcq/mcq_benchmark.csv")
dataset = dataset['train']


mcq_df = dataset.to_pandas()
mcq_df = mcq_df[mcq_df['Validation'] == True]
mcq_list = mcq_utils.pandas_to_mcq(mcq_df)

In [6]:
from mcq.mcq_agent import MCQAgent
from mcq.mcq_fewshot_agent import MCQFewShotAgent
from mcq.mcq_rag_agent import MCQRagAgent
from llm.openai_llm import OpenAIILLM
from llm.together_ai_llm import TogetherAILLM

# Default LLM Agents
mcq_agents = [
    MCQAgent(OpenAIILLM('gpt-3.5-turbo')),
    MCQAgent(OpenAIILLM('gpt-4o')),
    MCQAgent(TogetherAILLM('llama3-70b')),
    MCQAgent(TogetherAILLM('mixtral-8x22b')),
    MCQAgent(TogetherAILLM('gemma-27b'))
]


# LLM Agents that answer with Few-shot prompting
# mcq_agents = [
#     MCQFewShotAgent(OpenAIILLM('gpt-3.5-turbo')),
#     MCQFewShotAgent(OpenAIILLM('gpt-4o')),
#     MCQFewShotAgent(TogetherAILLM('llama3-70b')),
#     MCQFewShotAgent(TogetherAILLM('mixtral-8x22b')),
#     MCQFewShotAgent(TogetherAILLM('gemma-27b'))
# ]

# LLM Agents that answer with RAG prompting. Need to create a chroma vectordb for these to work
# mcq_agents = [
#     MCQRagAgent(OpenAIILLM('gpt-3.5-turbo')),
#     MCQRagAgent(OpenAIILLM('gpt-4o')),
#     MCQRagAgent(TogetherAILLM('llama3-70b')),
#     MCQRagAgent(TogetherAILLM('mixtral-8x22b')),
#     MCQRagAgent(TogetherAILLM('gemma-27b'))
# ]

In [26]:
import random

mcq = random.choice(mcq_list)
mcq_utils.get_results_for_question(mcq_agents, mcq)

How does the knowledge of the horizontal distribution of the absolute momentum M allow us to characterize the stability of the atmosphere?

a) By evaluating the static stability
b) By analyzing the potential instability
c) By determining the convective instability
d) By assessing the inertial stability

Correct Answer: d

gpt-3.5-turbo: a
gpt-4o: d
llama3-70b: d
mixtral-8x22b: a
gemma-27b: d


In [None]:
import os

result_dir = 'results/mcq'

num_trials = 3
for i in range(num_trials):
    print(f'\nRunning trial {i+1}\n')
    trial_dir = os.path.join(result_dir, f'trial_{i+1}')
    for agent in mcq_agents:
        print(f'Answering with {agent.name}')
        mcq_utils.generate_and_save_answer(agent, mcq_df, trial_dir)

In [None]:
from mcq import mcq_utils

mcq_utils.evaluate_result(result_dir)