In [1]:
import os
import tensorflow as tf
from dotenv import load_dotenv
load_dotenv()

# Suppress TensorFlow logging (1 for INFO, 2 for WARN, 3 for ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Disable specific TensorFlow warnings (like TensorRT not found)
tf.get_logger().setLevel('ERROR')

In [2]:
from datasets import load_dataset

dataset = load_dataset('UCSD-GENIE/ClimaQA', data_files="climaqa_gold/cloze/cloze_benchmark.csv")
dataset = dataset['train']
df = dataset.to_pandas()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from cloze import cloze_utils
from cloze.cloze_agent import ClozeAgent
from cloze.cloze_fewshot_agent import ClozeFewShotAgent
from cloze.cloze_rag_agent import ClozeRagAgent
from llm.openai_llm import OpenAIILLM
from llm.together_ai_llm import TogetherAILLM
import random

ind = random.randint(0,len(df)-1)

question = df['Question'][ind]
reference_answer = df['Answer'][ind]

# Default LLM Agents
cloze_agents = [
    ClozeAgent(OpenAIILLM('gpt-3.5-turbo')),
    ClozeAgent(OpenAIILLM('gpt-4o')),
    ClozeAgent(TogetherAILLM('llama3-70b')),
    ClozeAgent(TogetherAILLM('mixtral-8x22b')),
    ClozeAgent(TogetherAILLM('gemma-27b'))
]


# LLM Agents that answer with Few-shot prompting
# cloze_agents = [
#     ClozeFewShotAgent(OpenAIILLM('gpt-3.5-turbo')),
#     ClozeFewShotAgent(OpenAIILLM('gpt-4o')),
#     ClozeFewShotAgent(TogetherAILLM('llama3-70b')),
#     ClozeFewShotAgent(TogetherAILLM('mixtral-8x22b')),
#     ClozeFewShotAgent(TogetherAILLM('gemma-27b'))
# ]

# LLM Agents that answer with RAG prompting. Need to create a chroma vectordb for these to work
# cloze_agents = [
#     ClozeRagAgent(OpenAIILLM('gpt-3.5-turbo')),
#     ClozeRagAgent(OpenAIILLM('gpt-4o')),
#     ClozeRagAgent(TogetherAILLM('llama3-70b')),
#     ClozeRagAgent(TogetherAILLM('mixtral-8x22b')),
#     ClozeRagAgent(TogetherAILLM('gemma-27b'))
# ]

cloze_utils.get_results_for_question(cloze_agents, question, reference_answer)

Acid digestion of filter or impactor samples followed by inductively coupled <MASK> analysis can provide concentrations of metals in dust particles for further analysis.
Reference Answer: plasma

gpt-3.5-turbo: plasma
gpt-4o: plasma
llama3-70b: plasma
mixtral-8x22b: plasma
gemma-27b: plasma


In [None]:
import os

result_dir = 'results/cloze'

num_trials = 3
for i in range(num_trials):
    print(f'\nRunning trial {i+1}\n')
    trial_dir = os.path.join(result_dir, f'trial_{i+1}')
    for agent in cloze_agents:
        print(f'Answering with {agent.name}')
        cloze_utils.generate_and_save_answer(agent, df, trial_dir)

In [None]:
from cloze import cloze_utils

cloze_utils.evaluate_result(result_dir)