## Building a BATTLE Eval

In [421]:
new = True
initial_message = [{"role": "user", "content": ""}]
battles_generation = "dense"      # methods: dense, only-new
roles = "system-user"             # methods: system-user, system, user

#!cd evals
#!git lfs fetch --all
#!git lfs pull

In [422]:
import os
from openai import OpenAI
import pandas as pd
import yaml
#import datetime
#from dotenv import load_dotenv

#load_dotenv()  # Load environment variables from .env file

#api_key = os.environ.get("OPENAI_API_KEY")
client = OpenAI()

# Install Evals if you haven't already
# %pip install -e ../.
# pip install --upgrade openai
# %pip install pandas


In [423]:
# Paths. Assuming this notebook is in examples/

evals_path = os.path.join(os.getcwd(), "..", "evals")

registry_path = os.path.join(evals_path, "registry", "evals", "battles.yaml")

data_path = os.path.join(evals_path, "registry", "data", "battles")
os.makedirs(data_path, exist_ok=True)
data_path = os.path.join(data_path, "samples.jsonl")

json_logs_path = os.path.join(os.getcwd(), "logs")
os.makedirs(json_logs_path, exist_ok=True)
json_logs_path = os.path.join(json_logs_path, "logs")

df_path = os.path.join(evals_path, "evallogs", "df")
os.makedirs(df_path, exist_ok=True)
dataset_path = os.path.join(df_path, "dataset")

In [424]:
# Registry yaml

registry_yaml = {}

registry_yaml["battles"] = {
    "id": "battles.test.v1",
    "metrics": ["accuracy"]
}
registry_yaml["battles.test.v1"] = {
    "class": "evals.elsuite.modelgraded.classify:ModelBasedClassify",
    "args": {
        "samples_jsonl": "battles/samples.jsonl",
        "eval_type": "cot_classify",
        "modelgraded_spec": "battle"
    }
}
with open(os.path.join(registry_path), "w") as f:
    yaml.dump(registry_yaml, f)

In [425]:
# Initial data
if new:
    dataset = ()
    dataset = pd.DataFrame(dataset, columns=['Instruction1', 'Response1', 'Instruction2', 'Response2', 'Sampled',
       'Choice', 'Data'])
    best_message = initial_message
    best_response = client.chat.completions.create(
        messages = initial_message,
        model = "gpt-3.5-turbo",
        ).choices[0].message.content
else:
    with open(dataset_path, "r") as f:
        dataset = pd.read_json(f, lines=True)
    if dataset.iloc[-1]["Choice"] == "No":
        best_content = dataset.iloc[-1]["Instruction2"]
        best_response = dataset.iloc[-1]["Response2"]
    else:
        best_content = dataset.iloc[-1]["Instruction1"]
        best_response = dataset.iloc[-1]["Response1"]
    best_message = [{"role": "user", "content": best_content}]
candidate_messages = [best_message]
next_candidate_messages = []
generation_distance = 0


In [426]:
def battle(best_message, best_response, candidate_message):

    dataset = [{"input1": candidate_message, "input2": best_message, "completion2":best_response}]

    df = pd.DataFrame(dataset)
    df.to_json(data_path, orient="records", lines=True)

    !oaieval gpt-3.5-turbo battles --record_path logs/logs
    
    with open(json_logs_path, "r") as f:
        df = pd.read_json(f, lines=True)

    #current_time = datetime.datetime.now()
    #formatted_time = current_time.strftime("%Y%m%d_%H%M%S")
    #df.to_json(os.path.join(df_path, formatted_time), lines=True, orient="records")

    instruction1 = candidate_message[0]["content"]
    instruction2 = best_message[0]["content"]

    battle_prompt_content = df["data"].iloc[-2]["prompt"][0]["content"]
    response1 = battle_prompt_content.split("\n[Response 1]\n",)[1].split("\n\n[Instruction 2]\n")[0]
    response1 = response1.replace("\\'", "'").replace("\\n", "\n")
    response2 = battle_prompt_content.split("\n[Response 2]\n",)[1].split("\n\n\nIs the first response better than the second?")[0]
    response2 = response2.replace("\\'", "'").replace("\\n", "\n")
    print(f"Response1: {response1}")
    #print(f"response2: {response2}")

    sampled = df["data"].iloc[-2]["sampled"][0]

    choice = df["data"].iloc[-1]["choice"]

    data = {'Instruction1': instruction1, 'Response1': response1, 'Instruction2': instruction2, 'Response2': response2, 'Sampled': sampled, 'Choice': choice, 'Data': {}}
    data = pd.DataFrame([data])
    data.at[0, "Data"] = df.to_dict()
    return data

In [427]:
def switch_system_and_user(message):
    new_message = []
    if message[0]["role"] == "system":
        new_message.append([{"role": "user", "content": message[0]["content"]}])
    else:
        new_message.append([{"role": "system", "content": message[0]["content"]}])
    return new_message

In [437]:
def find_parents(content, dataset):
    #print (f"finding parents {content}")
    dataset = pd.DataFrame(dataset)
    dataset = dataset[dataset["Response1"] == content]
    parents = dataset["Instruction1"].unique()
    return parents

In [438]:
def find_children(content, dataset):
    #print (f"finding children {content}")
    dataset = pd.DataFrame(dataset)
    dataset = dataset[dataset["Instruction1"] == content]
    children = dataset["Response1"].unique()
    return children

In [430]:
def list_candidate_messages(dataset, best_message, generation_distance):
    best_content = best_message[0]["content"]
    last = [best_content]
    list_of_contents = [best_content]

    next = []
    for i in range(generation_distance):
        print (f"starting level {i}, generation distance {generation_distance}")
        for content in last:
            #print(f"last_up: {last_up}, now {content}")
            next.extend(find_parents(content, dataset))
            next.extend(find_children(content, dataset))
            
        print (f"behind for loops")
        list_of_contents.extend(next)
        last = next.copy()
        print(f"endind level {i}")
        
    list_of_contents = pd.array(list_of_contents).unique().tolist()
    
    #best_message_index = list_of_contents.index(best_message[0][content])
    #start = max(best_message_index - generation_distance, 0)
    #stop = min(len(list_of_contents))
    #stop = len(list_of_contents) - 1

    messages = []
    for content in list_of_contents:
        if roles == "system-user" or "user":
            messages.append([{"role":"user","content":content}])
        if roles == "system-user" or "system":
            messages.append([{"role":"system","content":content}])
    return messages

In [495]:
for candidate_message in candidate_messages:
    data = battle(best_message, best_response, candidate_message)
    dataset = pd.concat([dataset, data],ignore_index=True)
    dataset.to_json(os.path.join(df_path, "dataset"), lines=True, orient="records")
    if data["Choice"].iloc[0] == "Yes":
        best_message = candidate_message
        best_response = data["Response1"].iloc[0]
        print(f"New best message:{best_message}")
        if battles_generation == "dense":
            generation_distance = 0
    print(f"Instruction1: {candidate_message[0]["content"]}")

    if battles_generation == "only-new":
        new_message = [{"role": candidate_message[0]["role"], "content":data["Response1"].iloc[0]}]
        if new_message not in next_candidate_messages:
            next_candidate_messages.append(new_message)
            if roles == "system-user":
                next_candidate_messages.append(switch_system_and_user(new_message))

if battles_generation == "only-new":
    candidate_messages = next_candidate_messages
    next_candidate_messages = []
if battles_generation == "dense":
    #print("going to list candidate messages")
    candidate_messages = list_candidate_messages(dataset, best_message, generation_distance)
    generation_distance += 1
    #print("candidate messages listed")
print(f"all done, generation distance: {generation_distance}, number of candidate messages: {len(candidate_messages)}")

[2024-08-09 12:28:16,119] [registry.py:271] Loading registry from /Users/janvotava/Desktop/evals/evals/registry/evals
[2024-08-09 12:28:17,143] [registry.py:271] Loading registry from /Users/janvotava/.evals/evals
[2024-08-09 12:28:17,145] [oaieval.py:215] [1;35mRun started: 240809102817SPEGF3KD[0m
[2024-08-09 12:28:17,147] [registry.py:271] Loading registry from /Users/janvotava/Desktop/evals/evals/registry/modelgraded
[2024-08-09 12:28:17,286] [registry.py:271] Loading registry from /Users/janvotava/.evals/modelgraded
[2024-08-09 12:28:17,286] [data.py:94] Fetching /Users/janvotava/Desktop/evals/evals/registry/data/battles/samples.jsonl
[2024-08-09 12:28:17,286] [eval.py:36] Evaluating 1 samples
[2024-08-09 12:28:17,382] [eval.py:144] Running in threaded mode with 10 threads!
100%|█████████████████████████████████████████████| 1/1 [00:06<00:00,  6.36s/it]
[2024-08-09 12:28:23,764] [oaieval.py:275] Found 2/2 sampling events with usage data
[2024-08-09 12:28:23,764] [oaieval.py:283] 

In [488]:
dataset.iloc[-20:]

Unnamed: 0,Instruction1,Response1,Instruction2,Response2,Sampled,Choice,Data
179,One technology trend that I find particularly ...,\n\nAnother technology trend that I am fascina...,I completely agree with you! The advancements ...,\n\nI couldn't agree more! The potential for A...,1. Response 1 discusses the technology trend o...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
180,One technology trend that I find particularly ...,AI and machine learning have already made sign...,I completely agree with you! The advancements ...,\n\nI couldn't agree more! The potential for A...,1. Response 1 provides a more detailed and com...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
181,One technology trend that I find particularly ...,"I agree, the advancements in AI and machine le...",I completely agree with you! The advancements ...,\n\nI couldn't agree more! The potential for A...,1. Response 1 discusses the use of AI in healt...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
182,One technology trend that I find particularly ...,AI and machine learning have the potential to ...,I completely agree with you! The advancements ...,\n\nI couldn't agree more! The potential for A...,1. Response 1 provides a detailed explanation ...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
183,AI and machine learning have the potential to ...,AI and machine learning have the potential to ...,I completely agree with you! The advancements ...,\n\nI couldn't agree more! The potential for A...,1. Response 1 focuses on the potential of AI a...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
184,AI and machine learning have the potential to ...,AI and machine learning have the potential to ...,I completely agree with you! The advancements ...,\n\nI couldn't agree more! The potential for A...,1. Both responses acknowledge the potential of...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
185,\nAI and machine learning have the potential t...,\n\nIt is also important to consider the poten...,I completely agree with you! The advancements ...,\n\nI couldn't agree more! The potential for A...,1. Response 1 discusses the importance of addr...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
186,\nAI and machine learning have the potential t...,"\nBy leveraging AI and machine learning, indus...",I completely agree with you! The advancements ...,\n\nI couldn't agree more! The potential for A...,1. Response 1 focuses more on the practical ap...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
187,AI has the potential to revolutionize industri...,\n\nAI has already shown great promise in vari...,I completely agree with you! The advancements ...,\n\nI couldn't agree more! The potential for A...,1. Response 1 provides a more detailed explana...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
188,AI has the potential to revolutionize industri...,\n\nAI has already shown great promise in fiel...,I completely agree with you! The advancements ...,\n\nI couldn't agree more! The potential for A...,1. Response 1 discusses the potential of AI to...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...


In [433]:
from IPython.display import display_markdown

In [494]:
for entry in dataset[dataset["Choice"]=="Yes"].iloc[-1].drop("Data"):
    display_markdown(entry, raw=True)

I completely agree with you! The advancements in artificial intelligence and machine learning are truly remarkable and have the potential to transform various industries. The ability of AI to analyze data at a scale and speed that humans cannot match opens up new possibilities for innovation and problem-solving.

In healthcare, AI is being used to improve diagnostics, personalize treatment plans, and streamline administrative tasks. In finance, AI-powered algorithms are enhancing fraud detection, risk assessment, and investment strategies. In transportation, autonomous vehicles are becoming a reality, thanks to advancements in machine learning.

The integration of AI with other technologies like IoT, blockchain, and 5G is creating a powerful ecosystem that can drive even more innovation. For example, AI-powered IoT devices can collect and analyze real-time data to optimize processes and improve efficiency. Blockchain technology can enhance the security and transparency of AI algorithms and data.

As AI continues to evolve, it is important to consider ethical and societal implications, such as bias in algorithms, data privacy, and job displacement. By addressing these challenges proactively, we can ensure that AI technologies are developed and deployed responsibly.

Overall, the future of AI and machine learning is incredibly exciting, and I look forward to seeing how these technologies will continue to shape our world in the years to come.



I couldn't agree more! The potential for AI and machine learning to revolutionize industries and improve our daily lives is truly remarkable. The possibilities seem endless, from personalized healthcare treatments to more efficient transportation systems to enhanced cybersecurity measures.

However, as you mentioned, it is crucial to address the ethical and societal implications of these technologies. Ensuring that AI algorithms are unbiased and transparent, protecting individuals' privacy and data, and preparing for potential job displacement are all important considerations that must be taken into account as AI continues to advance.

Collaboration between industry leaders, policymakers, and ethicists will be key in navigating these challenges and ensuring that AI is developed and deployed in a responsible manner. By working together, we can harness the full potential of AI while also mitigating any potential risks.

I am also excited to see how AI will continue to evolve and shape our future. The possibilities are truly endless, and I believe that with careful consideration and collaboration, we can harness the power of AI to create a better world for all.

One technology trend that I find particularly interesting and exciting right now is the advancement of artificial intelligence (AI) and machine learning. The rapid progress in this field has the potential to revolutionize various industries, from healthcare and finance to transportation and entertainment. AI-powered applications are becoming more sophisticated, enabling tasks such as natural language processing, image recognition, and autonomous decision-making.

Additionally, the combination of AI with other emerging technologies like Internet of Things (IoT), blockchain, and 5G is creating new opportunities for innovation and disruption. The ability of AI to analyze vast amounts of data and derive valuable insights is reshaping how businesses operate and how individuals interact with technology.

Overall, the continued development of AI and machine learning holds great promise for solving complex problems, improving efficiency, and enhancing user experiences across a wide range of applications. It is a trend that I believe will continue to shape the future of technology in exciting ways.

I completely agree with you! The advancements in artificial intelligence and machine learning are truly remarkable and have the potential to transform various industries. The ability of AI to analyze data at a scale and speed that humans cannot match opens up new possibilities for innovation and efficiency.

In healthcare, AI is being used to improve diagnostics, personalize treatment plans, and streamline administrative tasks. In finance, AI-powered algorithms are revolutionizing trading strategies, risk management, and fraud detection. In transportation, autonomous vehicles are becoming a reality, thanks to advancements in machine learning.

The integration of AI with other technologies like IoT, blockchain, and 5G is creating a powerful ecosystem that can drive even more innovation. For example, AI-powered IoT devices can collect and analyze data in real-time to optimize processes and improve decision-making. Blockchain technology can enhance the security and transparency of AI algorithms, while 5G networks can provide the high-speed connectivity needed for AI applications to function effectively.

Overall, the possibilities that AI and machine learning offer are truly exciting, and I look forward to seeing how these technologies continue to evolve and shape the future of technology.

1. Both responses show a strong understanding and appreciation for the advancements in artificial intelligence and machine learning.
2. Response 1 goes into more detail about the specific applications of AI in healthcare, finance, and transportation, showcasing a deeper understanding of the topic.
3. Response 1 also addresses the ethical and societal implications of AI, showing a more comprehensive view of the potential challenges and considerations.
4. Response 1 emphasizes the importance of collaboration between industry leaders, policymakers, and ethicists in navigating these challenges, showcasing a proactive approach to responsible AI development.
5. Response 1 concludes with a hopeful and optimistic tone about the future of AI and machine learning, expressing excitement for the possibilities ahead.

Based on the above reasoning, the first response is better than the second.

Yes

Yes

In [490]:
candidate_messages

[[{'role': 'user',
   'content': 'I completely agree with you! The advancements in artificial intelligence and machine learning are truly remarkable and have the potential to transform various industries. The ability of AI to analyze data at a scale and speed that humans cannot match opens up new possibilities for innovation and problem-solving.\n\nIn healthcare, AI is being used to improve diagnostics, personalize treatment plans, and streamline administrative tasks. In finance, AI-powered algorithms are enhancing fraud detection, risk assessment, and investment strategies. In transportation, autonomous vehicles are becoming a reality, thanks to advancements in machine learning.\n\nThe integration of AI with other technologies like IoT, blockchain, and 5G is creating a powerful ecosystem that can drive even more innovation. For example, AI-powered IoT devices can collect and analyze real-time data to optimize processes and improve efficiency. Blockchain technology can enhance the secu