## Building a BATTLE Eval

In [120]:
new = True
initial_message = [{"role": "user", "content": ""}]
battles_generation = "dense"      # methods: dense, only-new
roles = "system-user"             # methods: system-user, system, user

#!cd evals
#!git lfs fetch --all
#!git lfs pull

In [121]:
import os
from openai import OpenAI
import pandas as pd
import yaml
#import datetime
#from dotenv import load_dotenv

#load_dotenv()  # Load environment variables from .env file

#api_key = os.environ.get("OPENAI_API_KEY")
client = OpenAI()

# Install Evals if you haven't already
# %pip install -e ../.
# pip install --upgrade openai
# %pip install pandas


In [122]:
# Paths. Assuming this notebook is in examples/

evals_path = os.path.join(os.getcwd(), "..", "evals")

registry_path = os.path.join(evals_path, "registry", "evals", "battles.yaml")

data_path = os.path.join(evals_path, "registry", "data", "battles")
os.makedirs(data_path, exist_ok=True)
data_path = os.path.join(data_path, "samples.jsonl")

json_logs_path = os.path.join(os.getcwd(), "logs")
os.makedirs(json_logs_path, exist_ok=True)
json_logs_path = os.path.join(json_logs_path, "logs")

df_path = os.path.join(evals_path, "evallogs", "df")
os.makedirs(df_path, exist_ok=True)
dataset_path = os.path.join(df_path, "dataset")

In [123]:
# Registry yaml

registry_yaml = {}

registry_yaml["battles"] = {
    "id": "battles.test.v1",
    "metrics": ["accuracy"]
}
registry_yaml["battles.test.v1"] = {
    "class": "evals.elsuite.modelgraded.classify:ModelBasedClassify",
    "args": {
        "samples_jsonl": "battles/samples.jsonl",
        "eval_type": "cot_classify",
        "modelgraded_spec": "battle"
    }
}
with open(os.path.join(registry_path), "w") as f:
    yaml.dump(registry_yaml, f)

In [124]:
# Initial data
if new:
    dataset = []
    best_message = initial_message
    best_response = client.chat.completions.create(
        messages = initial_message,
        model = "gpt-3.5-turbo",
        ).choices[0].message.content
else:
    with open(dataset_path, "r") as f:
        dataset = pd.read_json(f, lines=True)
    if dataset.iloc[-1]["Choice"] == "No":
        best_content = dataset.iloc[-1]["Instruction2"]
        best_response = dataset.iloc[-1]["Response2"]
    else:
        best_content = dataset.iloc[-1]["Instruction1"]
        best_response = dataset.iloc[-1]["Response1"]
    best_message = [{"role": "user", "content": best_content}]
candidate_messages = [best_message]
next_candidate_messages = []
generation_distance = 0


In [79]:
def battle(best_message, best_response, candidate_message):

    dataset = [{"input1": candidate_message, "input2": best_message, "completion2":best_response}]

    df = pd.DataFrame(dataset)
    df.to_json(data_path, orient="records", lines=True)

    !oaieval gpt-3.5-turbo battles --record_path logs/logs
    
    with open(json_logs_path, "r") as f:
        df = pd.read_json(f, lines=True)

    #current_time = datetime.datetime.now()
    #formatted_time = current_time.strftime("%Y%m%d_%H%M%S")
    #df.to_json(os.path.join(df_path, formatted_time), lines=True, orient="records")

    instruction1 = candidate_message[0]["content"]
    instruction2 = best_message[0]["content"]

    battle_prompt_content = df["data"].iloc[-2]["prompt"][0]["content"]
    response1 = battle_prompt_content.split("\n[Response 1]\n",)[1].split("\n\n[Instruction 2]\n")[0]
    response1 = response1.replace("\\'", "'").replace("\\n", "\n")
    response2 = battle_prompt_content.split("\n[Response 2]\n",)[1].split("\n\n\nIs the first response better than the second?")[0]
    response2 = response2.replace("\\'", "'").replace("\\n", "\n")
    print(f"Response1: {response1}")
    #print(f"response2: {response2}")

    sampled = df["data"].iloc[-2]["sampled"][0]

    choice = df["data"].iloc[-1]["choice"]

    data = {'Instruction1': instruction1, 'Response1': response1, 'Instruction2': instruction2, 'Response2': response2, 'Sampled': sampled, 'Choice': choice, 'Data': df.to_dict()}
    return data

In [80]:
def switch_system_and_user(message):
    new_message = []
    if message[0]["role"] == "system":
        new_message.append([{"role": "user", "content": message[0]["content"]}])
    else:
        new_message.append([{"role": "system", "content": message[0]["content"]}])
    return new_message

In [81]:
def find_parents(content, dataset):
    print (f"finding parents {content}")
    dataset = pd.DataFrame(dataset)
    dataset = dataset[dataset["Response1"] == content]
    parents = dataset["Instruction1"].unique()
    return parents

In [82]:
def find_children(content, dataset):
    print (f"finding children {content}")
    dataset = pd.DataFrame(dataset)
    dataset = dataset[dataset["Instruction1"] == content]
    children = dataset["Response1"].unique()
    return children

In [83]:
def list_candidate_messages(dataset, best_message, generation_distance):
    best_content = best_message[0]["content"]
    last_up = [best_content]
    last_down = [best_content]
    list_of_contents = [best_content]

    next_up = []
    next_down = []
    for i in range(generation_distance):
        print (f"starting level {i}, generation distance {generation_distance}")
        for content in last_up:
            #print(f"last_up: {last_up}, now {content}")
            next_up.extend(find_parents(content, dataset))
        for content in last_down:
            #print(f"last_up: {last_down}, now {content}")
            next_down.extend(find_children(content, dataset))
        print (f"behind for loops")
        list_of_contents.extend(next_up)
        list_of_contents.extend(next_down)
        last_up = next_up.copy()
        last_down = next_down.copy()
        print(f"endind level {i}")
        

    list_of_contents = pd.array(list_of_contents).unique().tolist()
    
    #best_message_index = list_of_contents.index(best_message[0][content])
    #start = max(best_message_index - generation_distance, 0)
    #stop = min(len(list_of_contents))
    #stop = len(list_of_contents) - 1

    messages = []
    for content in list_of_contents:
        if roles == "system-user" or "user":
            messages.append([{"role":"user","content":content}])
        if roles == "system-user" or "system":
            messages.append([{"role":"system","content":content}])
    return messages

In [116]:
for candidate_message in candidate_messages:
    data = battle(best_message, best_response, candidate_message)
    dataset = pd.concat([dataset, pd.DataFrame(data)],ignore_index=True).to_json(os.path.join(df_path, "dataset"), lines=True, orient="records")
    if data["Choice"] == "Yes":
        best_message = candidate_message
        best_response = data["Response1"]
        print(f"New best message:{best_message}")
        if battles_generation == "dense":
            generation_distance = 0
    print(f"Instruction1: {candidate_message[0]["content"]}")

    if battles_generation == "only-new":
        new_message = [{"role": candidate_message[0]["role"], "content":data["Response1"]}]
        if new_message not in next_candidate_messages:
            next_candidate_messages.append(new_message)
            if roles == "system-user":
                next_candidate_messages.append(switch_system_and_user(new_message))

if battles_generation == "only-new":
    candidate_messages = next_candidate_messages
    next_candidate_messages = []
if battles_generation == "dense":
    print("going to list candidate messages")
    candidate_messages = list_candidate_messages(dataset, best_message, generation_distance)
    generation_distance += 1
    print("candidate messages listed")
print(f"all done, generation distance {generation_distance}")

[2024-08-08 13:47:49,762] [registry.py:271] Loading registry from /Users/janvotava/Desktop/evals/evals/registry/evals
[2024-08-08 13:47:51,898] [registry.py:271] Loading registry from /Users/janvotava/.evals/evals
[2024-08-08 13:47:51,900] [oaieval.py:215] [1;35mRun started: 240808114751NAAYJSN2[0m
[2024-08-08 13:47:51,902] [registry.py:271] Loading registry from /Users/janvotava/Desktop/evals/evals/registry/modelgraded
[2024-08-08 13:47:51,938] [registry.py:271] Loading registry from /Users/janvotava/.evals/modelgraded
[2024-08-08 13:47:51,939] [data.py:94] Fetching /Users/janvotava/Desktop/evals/evals/registry/data/battles/samples.jsonl
[2024-08-08 13:47:51,939] [eval.py:36] Evaluating 1 samples
[2024-08-08 13:47:51,967] [eval.py:144] Running in threaded mode with 10 threads!
100%|█████████████████████████████████████████████| 1/1 [00:03<00:00,  3.86s/it]
[2024-08-08 13:47:55,838] [oaieval.py:275] Found 2/2 sampling events with usage data
[2024-08-08 13:47:55,838] [oaieval.py:283] 

In [125]:
dataset.iloc[166:]

Unnamed: 0,Instruction1,Response1,Instruction2,Response2,Sampled,Choice,Data
166,"Based on the information provided, here is a s...",This outline provides a comprehensive structur...,"Based on the information provided, here is a s...",This outline provides a comprehensive structur...,1. Response 1 acknowledges the importance of d...,No,{'spec': {'0': {'completion_fns': ['gpt-3.5-tu...
167,"\n\nAdditionally, make sure to cite your sourc...","In conclusion, the impact of social media on m...","Based on the information provided, here is a s...",This outline provides a comprehensive structur...,1. Response 1 provides a more detailed and in-...,No,"{'0': {'completion_fns': ['gpt-3.5-turbo'], 'e..."
168,"\n\nAdditionally, make sure to cite your sourc...","In conclusion, the impact of social media on m...","Based on the information provided, here is a s...",This outline provides a comprehensive structur...,1. Response 1 provides a more detailed and in-...,No,"{'0': None, '1': {'counts/No': 1, 'score': 0.0..."
169,"\n\nAdditionally, make sure to cite your sourc...","In conclusion, the impact of social media on m...","Based on the information provided, here is a s...",This outline provides a comprehensive structur...,1. Response 1 provides a more detailed and in-...,No,"{'0': None, '1': '240808115006UNJ53WV5', '2': ..."
170,"\n\nAdditionally, make sure to cite your sourc...","In conclusion, the impact of social media on m...","Based on the information provided, here is a s...",This outline provides a comprehensive structur...,1. Response 1 provides a more detailed and in-...,No,"{'0': None, '1': None, '2': 0.0, '3': 1.0, '4'..."
171,"\n\nAdditionally, make sure to cite your sourc...","In conclusion, the impact of social media on m...","Based on the information provided, here is a s...",This outline provides a comprehensive structur...,1. Response 1 provides a more detailed and in-...,No,"{'0': None, '1': None, '2': 'battles.test.0', ..."
172,"\n\nAdditionally, make sure to cite your sourc...","In conclusion, the impact of social media on m...","Based on the information provided, here is a s...",This outline provides a comprehensive structur...,1. Response 1 provides a more detailed and in-...,No,"{'0': None, '1': None, '2': 'sampling', '3': '..."
173,"\n\nAdditionally, make sure to cite your sourc...","In conclusion, the impact of social media on m...","Based on the information provided, here is a s...",This outline provides a comprehensive structur...,1. Response 1 provides a more detailed and in-...,No,"{'0': None, '1': None, '2': {'prompt': [{'role..."
174,"\n\nAdditionally, make sure to cite your sourc...","In conclusion, the impact of social media on m...","Based on the information provided, here is a s...",This outline provides a comprehensive structur...,1. Response 1 provides a more detailed and in-...,No,"{'0': None, '1': None, '2': '', '3': '', '4': ''}"
175,"\n\nAdditionally, make sure to cite your sourc...","In conclusion, the impact of social media on m...","Based on the information provided, here is a s...",This outline provides a comprehensive structur...,1. Response 1 provides a more detailed and in-...,No,"{'0': None, '1': None, '2': 1723117812345, '3'..."


In [114]:
from IPython.display import display_markdown

In [128]:
display_markdown(best_message[0]["content"], raw=True)

Based on the information provided, here is a suggested outline for structuring your research findings on the impact of social media on mental health:

I. Introduction
   A. Overview of the topic
   B. Importance of studying the impact of social media on mental health
   C. Thesis statement

II. The Positive Effects of Social Media on Mental Health
   A. Connection and social support
   B. Access to information and resources
   C. Promoting self-expression and creativity

III. The Negative Effects of Social Media on Mental Health
   A. Comparison and self-esteem issues
   B. Cyberbullying and harassment
   C. Fear of missing out (FOMO) and anxiety

IV. Potential Strategies for Mitigating Negative Effects
   A. Setting boundaries and limits on social media use
   B. Promoting digital detox and mindfulness
   C. Encouraging positive online interactions and communities

V. Real-Life Examples and Case Studies
   A. Case study: Impact of cyberbullying on mental health
   B. Example: Positive influence of online support groups
   C. Case study: Social media addiction and its consequences

VI. Conclusion
   A. Summary of key findings
   B. Recommendations for individuals, social media platforms, and policymakers
   C. Final thoughts on the future of social media and mental health

When presenting your information, consider using a mix of text, visuals (such as graphs, charts, and infographics), and real-life stories to engage your audience. You could also consider creating a presentation, report, or infographic to effectively communicate your research findings.

Remember to maintain a balanced perspective throughout your project, acknowledging both the positive and negative impacts of social media on mental health. Good luck with your research project!

In [130]:
display_markdown(best_response, raw=True)

This outline provides a comprehensive structure for organizing your research findings on the impact of social media on mental health. By following this outline, you can effectively present both the positive and negative effects of social media on mental health, as well as potential strategies for mitigating the negative impacts. Additionally, including real-life examples and case studies can help illustrate the real-world implications of social media use on mental health.

When presenting your research, consider using a variety of formats such as text, visuals, and real-life stories to engage your audience and effectively communicate your findings. Whether you choose to create a presentation, report, or infographic, make sure to maintain a balanced perspective and provide recommendations for individuals, social media platforms, and policymakers.

Good luck with your research project, and remember to stay focused on your thesis statement while exploring the complexities of the relationship between social media and mental health.

In [131]:
#for entry in dataset[dataset["Choice"]=="Yes"].iloc[-1]:
    #display_markdown(entry, raw=True)

In [90]:
lenght = []
for response in pd.DataFrame(dataset)["Response1"]:
    lenght.append(len(response))
lenght[-30:]

In [119]:
len(candidate_messages)

52