## Building a BATTLE Eval

In [421]:
new = True
initial_message = [{"role": "user", "content": ""}]
battles_generation = "dense"      # methods: dense, only-new
roles = "system-user"             # methods: system-user, system, user

#!cd evals
#!git lfs fetch --all
#!git lfs pull

In [422]:
import os
from openai import OpenAI
import pandas as pd
import yaml
#import datetime
#from dotenv import load_dotenv

#load_dotenv()  # Load environment variables from .env file

#api_key = os.environ.get("OPENAI_API_KEY")
client = OpenAI()

# Install Evals if you haven't already
# %pip install -e ../.
# pip install --upgrade openai
# %pip install pandas


In [423]:
# Paths. Assuming this notebook is in examples/

evals_path = os.path.join(os.getcwd(), "..", "evals")

registry_path = os.path.join(evals_path, "registry", "evals", "battles.yaml")

data_path = os.path.join(evals_path, "registry", "data", "battles")
os.makedirs(data_path, exist_ok=True)
data_path = os.path.join(data_path, "samples.jsonl")

json_logs_path = os.path.join(os.getcwd(), "logs")
os.makedirs(json_logs_path, exist_ok=True)
json_logs_path = os.path.join(json_logs_path, "logs")

df_path = os.path.join(evals_path, "evallogs", "df")
os.makedirs(df_path, exist_ok=True)
dataset_path = os.path.join(df_path, "dataset")

In [424]:
# Registry yaml

registry_yaml = {}

registry_yaml["battles"] = {
    "id": "battles.test.v1",
    "metrics": ["accuracy"]
}
registry_yaml["battles.test.v1"] = {
    "class": "evals.elsuite.modelgraded.classify:ModelBasedClassify",
    "args": {
        "samples_jsonl": "battles/samples.jsonl",
        "eval_type": "cot_classify",
        "modelgraded_spec": "battle"
    }
}
with open(os.path.join(registry_path), "w") as f:
    yaml.dump(registry_yaml, f)

In [425]:
# Initial data
if new:
    dataset = ()
    dataset = pd.DataFrame(dataset, columns=['Instruction1', 'Response1', 'Instruction2', 'Response2', 'Sampled',
       'Choice', 'Data'])
    best_message = initial_message
    best_response = client.chat.completions.create(
        messages = initial_message,
        model = "gpt-3.5-turbo",
        ).choices[0].message.content
else:
    with open(dataset_path, "r") as f:
        dataset = pd.read_json(f, lines=True)
    if dataset.iloc[-1]["Choice"] == "No":
        best_content = dataset.iloc[-1]["Instruction2"]
        best_response = dataset.iloc[-1]["Response2"]
    else:
        best_content = dataset.iloc[-1]["Instruction1"]
        best_response = dataset.iloc[-1]["Response1"]
    best_message = [{"role": "user", "content": best_content}]
candidate_messages = [best_message]
next_candidate_messages = []
generation_distance = 0


In [426]:
def battle(best_message, best_response, candidate_message):

    dataset = [{"input1": candidate_message, "input2": best_message, "completion2":best_response}]

    df = pd.DataFrame(dataset)
    df.to_json(data_path, orient="records", lines=True)

    !oaieval gpt-3.5-turbo battles --record_path logs/logs
    
    with open(json_logs_path, "r") as f:
        df = pd.read_json(f, lines=True)

    #current_time = datetime.datetime.now()
    #formatted_time = current_time.strftime("%Y%m%d_%H%M%S")
    #df.to_json(os.path.join(df_path, formatted_time), lines=True, orient="records")

    instruction1 = candidate_message[0]["content"]
    instruction2 = best_message[0]["content"]

    battle_prompt_content = df["data"].iloc[-2]["prompt"][0]["content"]
    response1 = battle_prompt_content.split("\n[Response 1]\n",)[1].split("\n\n[Instruction 2]\n")[0]
    response1 = response1.replace("\\'", "'").replace("\\n", "\n")
    response2 = battle_prompt_content.split("\n[Response 2]\n",)[1].split("\n\n\nIs the first response better than the second?")[0]
    response2 = response2.replace("\\'", "'").replace("\\n", "\n")
    print(f"Response1: {response1}")
    #print(f"response2: {response2}")

    sampled = df["data"].iloc[-2]["sampled"][0]

    choice = df["data"].iloc[-1]["choice"]

    data = {'Instruction1': instruction1, 'Response1': response1, 'Instruction2': instruction2, 'Response2': response2, 'Sampled': sampled, 'Choice': choice, 'Data': {}}
    data = pd.DataFrame([data])
    data.at[0, "Data"] = df.to_dict()
    return data

In [427]:
def switch_system_and_user(message):
    new_message = []
    if message[0]["role"] == "system":
        new_message.append([{"role": "user", "content": message[0]["content"]}])
    else:
        new_message.append([{"role": "system", "content": message[0]["content"]}])
    return new_message

In [437]:
def find_parents(content, dataset):
    #print (f"finding parents {content}")
    dataset = pd.DataFrame(dataset)
    dataset = dataset[dataset["Response1"] == content]
    parents = dataset["Instruction1"].unique()
    return parents

In [438]:
def find_children(content, dataset):
    #print (f"finding children {content}")
    dataset = pd.DataFrame(dataset)
    dataset = dataset[dataset["Instruction1"] == content]
    children = dataset["Response1"].unique()
    return children

In [430]:
def list_candidate_messages(dataset, best_message, generation_distance):
    best_content = best_message[0]["content"]
    last = [best_content]
    list_of_contents = [best_content]

    next = []
    for i in range(generation_distance):
        print (f"starting level {i}, generation distance {generation_distance}")
        for content in last:
            #print(f"last_up: {last_up}, now {content}")
            next.extend(find_parents(content, dataset))
            next.extend(find_children(content, dataset))
            
        print (f"behind for loops")
        list_of_contents.extend(next)
        last = next.copy()
        print(f"endind level {i}")
        
    list_of_contents = pd.array(list_of_contents).unique().tolist()
    
    #best_message_index = list_of_contents.index(best_message[0][content])
    #start = max(best_message_index - generation_distance, 0)
    #stop = min(len(list_of_contents))
    #stop = len(list_of_contents) - 1

    messages = []
    for content in list_of_contents:
        if roles == "system-user" or "user":
            messages.append([{"role":"user","content":content}])
        if roles == "system-user" or "system":
            messages.append([{"role":"system","content":content}])
    return messages

In [546]:
for candidate_message in candidate_messages:
    data = battle(best_message, best_response, candidate_message)
    dataset = pd.concat([dataset, data],ignore_index=True)
    dataset.to_json(os.path.join(df_path, "dataset"), lines=True, orient="records")
    if data["Choice"].iloc[0] == "Yes":
        best_message = candidate_message
        best_response = data["Response1"].iloc[0]
        print(f"New best message:{best_message}")
        if battles_generation == "dense":
            generation_distance = 0
    print(f"Instruction1: {candidate_message[0]["content"]}")

    if battles_generation == "only-new":
        new_message = [{"role": candidate_message[0]["role"], "content":data["Response1"].iloc[0]}]
        if new_message not in next_candidate_messages:
            next_candidate_messages.append(new_message)
            if roles == "system-user":
                next_candidate_messages.append(switch_system_and_user(new_message))

if battles_generation == "only-new":
    candidate_messages = next_candidate_messages
    next_candidate_messages = []
if battles_generation == "dense":
    #print("going to list candidate messages")
    candidate_messages = list_candidate_messages(dataset, best_message, generation_distance)
    generation_distance += 1
    #print("candidate messages listed")
print(f"all done, generation distance: {generation_distance}, number of candidate messages: {len(candidate_messages)}")

[2024-08-09 15:53:15,036] [registry.py:271] Loading registry from /Users/janvotava/Desktop/evals/evals/registry/evals
[2024-08-09 15:53:16,173] [registry.py:271] Loading registry from /Users/janvotava/.evals/evals
[2024-08-09 15:53:16,175] [oaieval.py:215] [1;35mRun started: 240809135316J5XPUWHO[0m
[2024-08-09 15:53:16,178] [registry.py:271] Loading registry from /Users/janvotava/Desktop/evals/evals/registry/modelgraded
[2024-08-09 15:53:16,220] [registry.py:271] Loading registry from /Users/janvotava/.evals/modelgraded
[2024-08-09 15:53:16,221] [data.py:94] Fetching /Users/janvotava/Desktop/evals/evals/registry/data/battles/samples.jsonl
[2024-08-09 15:53:16,222] [eval.py:36] Evaluating 1 samples
[2024-08-09 15:53:16,246] [eval.py:144] Running in threaded mode with 10 threads!
100%|█████████████████████████████████████████████| 1/1 [00:07<00:00,  7.52s/it]
[2024-08-09 15:53:23,765] [oaieval.py:275] Found 2/2 sampling events with usage data
[2024-08-09 15:53:23,765] [oaieval.py:283] 

In [433]:
from IPython.display import display_markdown

In [544]:
for entry in dataset[dataset["Choice"]=="Yes"].iloc[-2].drop("Data"):
    display_markdown(entry, raw=True)


By bringing together experts from various disciplines, we can address the ethical, legal, and social implications of AI technologies, ensuring that they are developed and deployed in a way that respects human rights, privacy, and dignity. This collaborative approach can help us anticipate and mitigate potential risks and biases in AI systems, promoting fairness, accountability, and transparency in their use.

Moreover, interdisciplinary collaboration can foster innovation and creativity in AI research and development, leading to breakthroughs in areas such as healthcare, education, environmental sustainability, and social welfare. By combining insights from different fields, we can design AI systems that are not only technically robust but also responsive to the needs and values of diverse communities.

In this rapidly evolving technological landscape, it is essential to embrace interdisciplinary collaboration as a fundamental principle in the governance of AI. By working together across disciplines and sectors, we can harness the transformative power of AI to address global challenges, promote human well-being, and advance the common good. Let us join forces to shape a future where AI technologies serve as tools for positive change and collective progress.


Together, we can ensure that AI technologies are developed and used in a way that benefits society as a whole, while also safeguarding against potential harms and risks. By engaging in interdisciplinary collaboration, we can build a more inclusive and responsible AI ecosystem that upholds ethical standards and values human rights.

Through dialogue and cooperation among experts in fields such as computer science, ethics, law, sociology, psychology, and more, we can create frameworks for AI governance that prioritize transparency, accountability, and fairness. This collaborative approach can help us navigate complex ethical dilemmas, such as issues related to bias, discrimination, privacy, and autonomy in AI systems.

Furthermore, interdisciplinary collaboration can enhance public trust and confidence in AI technologies by ensuring that they are developed and deployed in a manner that aligns with societal values and norms. By involving diverse perspectives and expertise in the design and implementation of AI systems, we can promote greater equity and inclusivity in their use across different contexts and communities.

In conclusion, interdisciplinary collaboration is essential for shaping the future of AI in a way that benefits humanity and upholds our shared values. By working together across disciplines, we can harness the potential of AI technologies to drive positive social change and create a more sustainable and equitable future for all. Let us embrace this collaborative spirit to build a better world with AI technologies that serve the common good.


By bringing together experts from various disciplines, we can address the ethical, legal, and social implications of AI technologies, ensuring that they are developed and deployed in a way that respects human rights, privacy, and dignity. This collaborative approach can help us anticipate and mitigate potential risks and biases in AI systems, promoting fairness, accountability, and transparency in their use.

Moreover, interdisciplinary collaboration can foster innovation and creativity in AI research and development, leading to breakthroughs in areas such as healthcare, education, environmental sustainability, and social welfare. By combining insights from different fields, we can design AI systems that are not only technically robust but also responsive to the needs and values of diverse communities.

In this rapidly evolving technological landscape, it is essential to embrace interdisciplinary collaboration as a fundamental principle in the governance of AI. By working together across disciplines and sectors, we can harness the transformative power of AI to address global challenges, promote human well-being, and advance the common good. Let us join forces to shape a future where AI technologies serve as tools for positive change and collective progress.



Together, we can build a more inclusive and equitable society where AI technologies are used responsibly and ethically to benefit all individuals and communities. By fostering interdisciplinary collaboration, we can ensure that AI is developed and deployed in a way that upholds fundamental human values and rights, while also driving innovation and progress in various fields.

Let us come together to shape a future where AI technologies are harnessed for the greater good, where they empower individuals, enhance decision-making processes, and contribute to a more sustainable and prosperous world for all. Through interdisciplinary collaboration, we can pave the way for a future where AI serves as a force for positive change and collective advancement.

1. Response 1 emphasizes the importance of interdisciplinary collaboration in addressing ethical, legal, and social implications of AI technologies.
2. Response 1 discusses the benefits of interdisciplinary collaboration in promoting fairness, accountability, and transparency in AI systems.
3. Response 1 highlights the role of interdisciplinary collaboration in fostering innovation and creativity in AI research and development.
4. Response 1 emphasizes the need for interdisciplinary collaboration to shape a future where AI technologies serve as tools for positive change and collective progress.

Based on the detailed analysis, Response 1 is better than Response 2.

Yes

Yes

Yes

In [535]:
dataset.iloc[-20:]

Unnamed: 0,Instruction1,Response1,Instruction2,Response2,Sampled,Choice,Data
675,"\n\nThrough interdisciplinary collaboration, w...","\n\nBy working together, we can also address p...","\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,1. Both responses address the importance of in...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
676,"\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,"\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,1. Both responses emphasize the importance of ...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
677,"\n\nThrough interdisciplinary collaboration, w...","\n\nBy working together, we can also address p...","\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,1. Both responses address the importance of in...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
678,"\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,"\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,1. Response 1 emphasizes the importance of add...,Yes,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
679,\n\nLet us collaborate across sectors and disc...,"\n\nBy working together, we can ensure that AI...","\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,1. Response 1 emphasizes the importance of col...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
680,\n\nLet us collaborate across sectors and disc...,"\n\nThrough interdisciplinary collaboration, w...","\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,1. Response 1 emphasizes the importance of col...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
681,"\n\nBy working together, we can also address p...","Together, we can build a future where AI is us...","\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,1. Response 1 emphasizes the importance of col...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
682,"\n\nBy working together, we can also address p...","Together, we can build a future where AI and m...","\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,1. Response 1 emphasizes the benefits and posi...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
683,\n\nBy fostering interdisciplinary collaborati...,"\n\nTogether, we can build a future where AI t...","\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,1. Response 1 emphasizes the importance of col...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...
684,\n\nBy fostering interdisciplinary collaborati...,"\nTogether, we can build a future where AI tec...","\n\nThrough interdisciplinary collaboration, w...",\n\nBy fostering interdisciplinary collaborati...,1. Both responses address the importance of in...,No,{'spec': {0: {'completion_fns': ['gpt-3.5-turb...


In [490]:
candidate_messages

[[{'role': 'user',
   'content': 'I completely agree with you! The advancements in artificial intelligence and machine learning are truly remarkable and have the potential to transform various industries. The ability of AI to analyze data at a scale and speed that humans cannot match opens up new possibilities for innovation and problem-solving.\n\nIn healthcare, AI is being used to improve diagnostics, personalize treatment plans, and streamline administrative tasks. In finance, AI-powered algorithms are enhancing fraud detection, risk assessment, and investment strategies. In transportation, autonomous vehicles are becoming a reality, thanks to advancements in machine learning.\n\nThe integration of AI with other technologies like IoT, blockchain, and 5G is creating a powerful ecosystem that can drive even more innovation. For example, AI-powered IoT devices can collect and analyze real-time data to optimize processes and improve efficiency. Blockchain technology can enhance the secu