## Building a BATTLE Eval

In [421]:
new = True
initial_message = [{"role": "user", "content": ""}]
battles_generation = "dense"      # methods: dense, only-new
roles = "system-user"             # methods: system-user, system, user

#!cd evals
#!git lfs fetch --all
#!git lfs pull

In [422]:
import os
from openai import OpenAI
import pandas as pd
import yaml
#import datetime
#from dotenv import load_dotenv

#load_dotenv()  # Load environment variables from .env file

#api_key = os.environ.get("OPENAI_API_KEY")
client = OpenAI()

# Install Evals if you haven't already
# %pip install -e ../.
# pip install --upgrade openai
# %pip install pandas


In [423]:
# Paths. Assuming this notebook is in examples/

evals_path = os.path.join(os.getcwd(), "..", "evals")

registry_path = os.path.join(evals_path, "registry", "evals", "battles.yaml")

data_path = os.path.join(evals_path, "registry", "data", "battles")
os.makedirs(data_path, exist_ok=True)
data_path = os.path.join(data_path, "samples.jsonl")

json_logs_path = os.path.join(os.getcwd(), "logs")
os.makedirs(json_logs_path, exist_ok=True)
json_logs_path = os.path.join(json_logs_path, "logs")

df_path = os.path.join(evals_path, "evallogs", "df")
os.makedirs(df_path, exist_ok=True)
dataset_path = os.path.join(df_path, "dataset")

In [424]:
# Registry yaml

registry_yaml = {}

registry_yaml["battles"] = {
    "id": "battles.test.v1",
    "metrics": ["accuracy"]
}
registry_yaml["battles.test.v1"] = {
    "class": "evals.elsuite.modelgraded.classify:ModelBasedClassify",
    "args": {
        "samples_jsonl": "battles/samples.jsonl",
        "eval_type": "cot_classify",
        "modelgraded_spec": "battle"
    }
}
with open(os.path.join(registry_path), "w") as f:
    yaml.dump(registry_yaml, f)

In [425]:
# Initial data
if new:
    dataset = ()
    dataset = pd.DataFrame(dataset, columns=['Instruction1', 'Response1', 'Instruction2', 'Response2', 'Sampled',
       'Choice', 'Data'])
    best_message = initial_message
    best_response = client.chat.completions.create(
        messages = initial_message,
        model = "gpt-3.5-turbo",
        ).choices[0].message.content
    few_shot_prompt = []
else:
    with open(dataset_path, "r") as f:
        dataset = pd.read_json(f, lines=True)
    if dataset.iloc[-1]["Choice"] == "No":
        best_content = dataset.iloc[-1]["Instruction2"]
        best_response = dataset.iloc[-1]["Response2"]
    else:
        best_content = dataset.iloc[-1]["Instruction1"]
        best_response = dataset.iloc[-1]["Response1"]
    best_message = [{"role": "user", "content": best_content}]
candidate_messages = [best_message]
next_candidate_messages = []
generation_distance = 0


In [426]:
def battle(best_message, best_response, candidate_message):

    dataset = [{"input1": candidate_message, "input2": best_message, "completion2":best_response}]

    df = pd.DataFrame(dataset)
    df.to_json(data_path, orient="records", lines=True)

    !oaieval gpt-3.5-turbo battles --record_path logs/logs
    
    with open(json_logs_path, "r") as f:
        df = pd.read_json(f, lines=True)

    #current_time = datetime.datetime.now()
    #formatted_time = current_time.strftime("%Y%m%d_%H%M%S")
    #df.to_json(os.path.join(df_path, formatted_time), lines=True, orient="records")

    instruction1 = candidate_message[-1]["content"]
    instruction2 = best_message[-1]["content"]

    battle_prompt_content = df["data"].iloc[-2]["prompt"][0]["content"]
    response1 = battle_prompt_content.split("\n[Response 1]\n",)[1].split("\n\n[Instruction 2]\n")[0]
    response1 = response1.replace("\\'", "'").replace("\\n", "\n")
    response2 = battle_prompt_content.split("\n[Response 2]\n",)[1].split("\n\n\nIs the first response better than the second?")[0]
    response2 = response2.replace("\\'", "'").replace("\\n", "\n")
    print(f"Response1: {response1}")
    #print(f"response2: {response2}")

    sampled = df["data"].iloc[-2]["sampled"][0]

    choice = df["data"].iloc[-1]["choice"]

    data = {'Instruction1': instruction1, 'Response1': response1, 'Instruction2': instruction2, 'Response2': response2, 'Sampled': sampled, 'Choice': choice, 'Data': {}}
    data = pd.DataFrame([data])
    data.at[0, "Data"] = df.to_dict()
    return data

In [427]:
def switch_system_and_user(message):
    new_message = []
    if message[0]["role"] == "system":
        new_message.append([{"role": "user", "content": message[0]["content"]}])
    else:
        new_message.append([{"role": "system", "content": message[0]["content"]}])
    return new_message

In [437]:
def find_parents(content, dataset):
    #print (f"finding parents {content}")
    dataset = pd.DataFrame(dataset)
    dataset = dataset[dataset["Response1"] == content]
    parents = dataset["Instruction1"].unique()
    return parents

In [438]:
def find_children(content, dataset):
    #print (f"finding children {content}")
    dataset = pd.DataFrame(dataset)
    dataset = dataset[dataset["Instruction1"] == content]
    children = dataset["Response1"].unique()
    return children

In [430]:
def list_candidate_messages(dataset, best_message, generation_distance):
    best_content = best_message[-1]["content"]
    last = [best_content]
    list_of_contents = [best_content]

    next = []
    for i in range(generation_distance):
        print (f"starting level {i}, generation distance {generation_distance}")
        for content in last:
            #print(f"last_up: {last_up}, now {content}")
            next.extend(find_parents(content, dataset))
            next.extend(find_children(content, dataset))
            
        print (f"behind for loops")
        list_of_contents.extend(next)
        last = next.copy()
        print(f"endind level {i}")
        
    list_of_contents = pd.array(list_of_contents).unique().tolist()
    
    #best_message_index = list_of_contents.index(best_message[0][content])
    #start = max(best_message_index - generation_distance, 0)
    #stop = min(len(list_of_contents))
    #stop = len(list_of_contents) - 1

    messages = []
    for content in list_of_contents:
        if roles == "system-user" or "user":
            messages.append([{"role":"user","content":content}])
        if roles == "system-user" or "system":
            messages.append([{"role":"system","content":content}])
    return messages

In [601]:
for candidate_message in candidate_messages:
    #if few_shot_prompts == True:
        #candidate_message = few_shot_prompt.extend(candidate_message)
    data = battle(best_message, best_response, candidate_message)
    dataset = pd.concat([dataset, data],ignore_index=True)
    dataset.to_json(os.path.join(df_path, "dataset"), lines=True, orient="records")
    if data["Choice"].iloc[0] == "Yes":
        best_message = candidate_message
        best_response = data["Response1"].iloc[0]
        print(f"New best message:{best_message}")
        if battles_generation == "dense":
            generation_distance = 0
        #if few_shot_prompts == True:
            #few_shot_prompt.append([{"role": "system", "content": best_message, "name": "example_user"}])
            #few_shot_prompt.append([{"role": "system", "content": best_response, "name": "example_assistant"}])
    print(f"Instruction1: {candidate_message[0]["content"]}")

    if battles_generation == "only-new":
        new_message = [{"role": candidate_message[0]["role"], "content":data["Response1"].iloc[0]}]
        if new_message not in next_candidate_messages:
            next_candidate_messages.append(new_message)
            if roles == "system-user":
                next_candidate_messages.append(switch_system_and_user(new_message))

if battles_generation == "only-new":
    candidate_messages = next_candidate_messages
    next_candidate_messages = []
if battles_generation == "dense":
    #print("going to list candidate messages")
    candidate_messages = list_candidate_messages(dataset, best_message, generation_distance)
    generation_distance += 1
    #print("candidate messages listed")
print(f"all done, generation distance: {generation_distance}, number of candidate messages: {len(candidate_messages)}")

[2024-08-09 18:03:05,622] [registry.py:271] Loading registry from /Users/janvotava/Desktop/evals/evals/registry/evals
[2024-08-09 18:03:06,429] [registry.py:271] Loading registry from /Users/janvotava/.evals/evals
[2024-08-09 18:03:06,431] [oaieval.py:215] [1;35mRun started: 240809160306TLH5DJXU[0m
[2024-08-09 18:03:06,432] [registry.py:271] Loading registry from /Users/janvotava/Desktop/evals/evals/registry/modelgraded
[2024-08-09 18:03:06,457] [registry.py:271] Loading registry from /Users/janvotava/.evals/modelgraded
[2024-08-09 18:03:06,458] [data.py:94] Fetching /Users/janvotava/Desktop/evals/evals/registry/data/battles/samples.jsonl
[2024-08-09 18:03:06,458] [eval.py:36] Evaluating 1 samples
[2024-08-09 18:03:06,471] [eval.py:144] Running in threaded mode with 10 threads!
100%|█████████████████████████████████████████████| 1/1 [00:08<00:00,  8.50s/it]
[2024-08-09 18:03:14,982] [oaieval.py:275] Found 2/2 sampling events with usage data
[2024-08-09 18:03:14,982] [oaieval.py:283] 

In [433]:
from IPython.display import display_markdown

In [602]:
for entry in dataset[dataset["Choice"]=="Yes"].iloc[-1].drop("Data"):
    display_markdown(entry, raw=True)

Together, let us embrace the potential of AI to drive progress and create a more inclusive and equitable society. Let us prioritize collaboration, transparency, and accountability in the development and deployment of AI technologies. Let us ensure that AI is used to amplify human capabilities, foster creativity, and enable opportunities for all.

Let us engage in open dialogue and debate to address the ethical and social implications of AI, and to ensure that its benefits are shared equitably among all members of society. Let us advocate for policies and regulations that promote fairness, privacy, and security in the use of AI, and that safeguard against discrimination and bias.

Let us celebrate diversity and inclusion in the AI community, and strive to amplify the voices of underrepresented groups in shaping the future of AI. Let us foster a culture of continuous learning and improvement, where we challenge assumptions, learn from our mistakes, and adapt our practices to reflect the values of empathy, integrity, and respect for all.

Let us seize this moment to shape a future where AI serves as a powerful tool for advancing social good and building a more just and sustainable world. Let us stand united in our commitment to harness the transformative potential of AI for the benefit of all, and to create a future where technology is a force for positive change in the hands of a diverse and inclusive global community.

Together, let us work towards a future where AI is used responsibly and ethically to enhance human potential and create a more equitable society. Let us strive to build a world where AI is a force for good, empowering individuals and communities to thrive and succeed. Let us embrace the opportunities that AI presents, while also being mindful of the challenges and risks that come with it.

Let us commit to upholding the values of fairness, transparency, and accountability in the development and deployment of AI technologies. Let us ensure that AI is used in ways that respect human rights, protect privacy, and promote equality for all. Let us work together to address issues of bias, discrimination, and inequality in AI systems, and to create a more inclusive and diverse AI ecosystem.

Let us engage in meaningful conversations and collaborations across disciplines, sectors, and borders to shape the future of AI in a way that benefits everyone. Let us listen to diverse perspectives, learn from each other, and co-create solutions that reflect the needs and aspirations of all people. Let us build a future where AI is a tool for empowerment, innovation, and social progress.

Together, let us embrace the potential of AI to drive positive change and build a better world for future generations. Let us seize this opportunity to shape a future where AI is a force for good, and where technology is used to create a more just, inclusive, and sustainable society for all. Let us stand together in our commitment to harness the power of AI for the greater good, and to ensure that it serves the needs and interests of all people, now and in the years to come.



Let us work together to ensure that AI is used to benefit all members of society, regardless of their background or circumstances. Let us strive to create AI systems that are fair, unbiased, and inclusive, and that promote equality and justice for all.

Let us collaborate across disciplines and industries to harness the full potential of AI in addressing complex challenges such as poverty, education, and healthcare. Let us leverage AI to create innovative solutions that improve the quality of life for people around the world and contribute to sustainable development.

Let us invest in education and training to equip individuals with the skills and knowledge needed to thrive in an AI-driven world. Let us empower people to understand and engage with AI technologies, and to participate in shaping their impact on society.

Let us commit to ongoing research and development to advance the capabilities of AI and to ensure that it continues to evolve in a responsible and ethical manner. Let us remain vigilant in monitoring and addressing potential risks and challenges associated with AI, and in safeguarding against misuse and abuse.

Let us approach the future of AI with optimism and determination, knowing that together, we can harness its power for the greater good. Let us work together to build a future where AI is a force for positive change, where it enhances our collective well-being and contributes to a more prosperous and sustainable world for generations to come.

Together, let us embrace the potential of AI to drive progress and create a more inclusive and equitable society. Let us prioritize collaboration, transparency, and accountability in the development and deployment of AI technologies. Let us ensure that AI is used to amplify human capabilities, foster creativity, and enable opportunities for all.

Let us engage in open dialogue and debate to address the ethical and social implications of AI, and to ensure that its benefits are shared equitably among all members of society. Let us advocate for policies and regulations that promote fairness, privacy, and security in the use of AI, and that safeguard against discrimination and bias.

Let us celebrate diversity and inclusion in the AI community, and strive to amplify the voices of underrepresented groups in shaping the future of AI. Let us foster a culture of continuous learning and improvement, where we challenge assumptions, learn from our mistakes, and adapt our practices to reflect the values of empathy, integrity, and respect for all.

Let us seize this moment to shape a future where AI serves as a powerful tool for advancing social good and building a more just and sustainable world. Let us stand united in our commitment to harness the transformative potential of AI for the benefit of all, and to create a future where technology is a force for positive change in the hands of a diverse and inclusive global community.

1. Both responses address the importance of using AI responsibly and ethically to create a more equitable society.
2. Response 1 emphasizes the values of fairness, transparency, and accountability in the development and deployment of AI technologies.
3. Response 2 focuses on collaboration across disciplines and industries to harness the full potential of AI in addressing complex challenges.
4. Both responses advocate for policies and regulations that promote fairness, privacy, and security in the use of AI.
5. Response 1 highlights the need to address issues of bias, discrimination, and inequality in AI systems.
6. Response 2 emphasizes the importance of investing in education and training to equip individuals with the skills needed in an AI-driven world.
7. Both responses stress the importance of continuous learning and improvement in the AI community.
8. Response 1 mentions the values of empathy, integrity, and respect for all in shaping the future of AI.
9. Response 2 discusses the need for ongoing research and development to advance the capabilities of AI in a responsible and ethical manner.
10. Both responses express a commitment to harnessing the transformative potential of AI for the benefit of all and creating a more just and sustainable world.

Based on the detailed comparison, Response 1 is better than Response 2.

Yes

Yes

In [603]:
dataset.iloc[-20:]

Unnamed: 0,Instruction1,Response1,Instruction2,Response2,Sampled,Choice,Data
983,"Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...",\n\nLet us work together to ensure that AI is ...,"Together, let us embrace the potential of AI t...",1. Both responses address the importance of us...,Yes,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
984,"Together, let us embrace the potential of AI t...",Let us work together to ensure that AI is deve...,"Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...",1. Both responses address the instructions pro...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
985,"\n\nTogether, let us shape a future where AI i...","\n\nBy working together towards these goals, w...","Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...",1. Both responses address the instructions pro...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
986,"\n\nTogether, let us shape a future where AI i...",\n\nBy working together towards a shared visio...,"Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...",1. Response 1 emphasizes the importance of div...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
987,"\n\nTogether, let us embrace the potential of ...",\n\nBy working together and committing to thes...,"Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...",1. Both responses address the instructions pro...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
988,"\n\nTogether, let us embrace the potential of ...",\n\nLet us work together to ensure that AI is ...,"Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...",1. Both responses address the importance of us...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
989,"Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...","Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...",1. Both responses address the importance of us...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
990,"Together, let us embrace the potential of AI t...",Let us work together to ensure that AI is deve...,"Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...",1. Both responses address the instructions pro...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
991,"Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...","Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...",1. Both responses address the same instruction...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
992,"Together, let us embrace the potential of AI t...",Let us work together to ensure that AI is deve...,"Together, let us embrace the potential of AI t...","Together, let us work towards a future where A...",1. Both responses address the importance of us...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...


In [490]:
candidate_messages

[[{'role': 'user',
   'content': 'I completely agree with you! The advancements in artificial intelligence and machine learning are truly remarkable and have the potential to transform various industries. The ability of AI to analyze data at a scale and speed that humans cannot match opens up new possibilities for innovation and problem-solving.\n\nIn healthcare, AI is being used to improve diagnostics, personalize treatment plans, and streamline administrative tasks. In finance, AI-powered algorithms are enhancing fraud detection, risk assessment, and investment strategies. In transportation, autonomous vehicles are becoming a reality, thanks to advancements in machine learning.\n\nThe integration of AI with other technologies like IoT, blockchain, and 5G is creating a powerful ecosystem that can drive even more innovation. For example, AI-powered IoT devices can collect and analyze real-time data to optimize processes and improve efficiency. Blockchain technology can enhance the secu