# Simulation

## **Batch Sim**


In [18]:
import importlib
import pandas as pd
from openai import OpenAI
import os
import json
import traceback
import sys

import brikasutils as bu
import shared_utils as utils
import survey
importlib.reload(bu)
importlib.reload(utils)
importlib.reload(survey)

# SAVE_SIMULATION_DIR = "batch/output"
SAVE_SIMULATION_DIR = "batch/output"
queue = bu.FileRunQueue(queue_folder_path="batch/queue", completed_folder_path="batch/done")
report_live_csv = bu.LiveCSV("batch/run_reports/monster_V6.csv")
timer = bu.Benchmarker()

for filepath in queue:
    timer.mark_start(filepath)

    try: 
        ########## Handle batch stuff ########
        filename = os.path.splitext(os.path.basename(filepath))[0]
        with open(filepath, 'r') as f:
            rundata = json.load(f)

        # Load prompt file
        with open(rundata["instructions"]["prompt_file"], 'r') as f:
            final_prompts = json.load(f)

        # Make the surv
        if rundata["instructions"]["survey_type"] == "KanoSurvey":
            surv = survey.KanoSurvey()
        elif rundata["instructions"]["survey_type"] == "PersonalitySurvey":
            surv = survey.PersonalitySurvey()
        else:
            raise Exception("Invalid survey type")

        timestamp = bu.get_timestamp()
        ######### Run Simulation ########
        SIMULATION_NAMEID = filename
        LIMIT = rundata["instructions"]["LIMIT"] if "LIMIT" in rundata["instructions"] else None
        AUTO_INFO = {
            "date": timestamp,
            **rundata["info"], # unpacked from rundata
            "limit": LIMIT,
            "prompt_count": min(len(final_prompts), LIMIT) if LIMIT != None else len(final_prompts),
            "avg_tokens_in_prompt": round(utils.describe_prompts(final_prompts)["total_all_prompt_tokens"]/len(final_prompts)),
        }
        SETTINGS = {
            **rundata["settings"], # unpacked from rundata
        }

        # client depends on if it's local or not
        if rundata["instructions"]["isLocal"]:
            client = OpenAI(
                base_url = 'http://localhost:11434/v1',
                api_key='ollama', # required, but unused
            )
        else:
            client = OpenAI(
                api_key=os.environ.get("OPENAI_API_KEY"),
            )

        completions = []
        l = len(final_prompts)

        for i, (prompt, question) in enumerate(list(zip(final_prompts, surv.questions))):
            if LIMIT != None and i > LIMIT:
                break

            print(f"{i}/{l}...", end="\t") # Print progress
            # Send the Request
            full_response = client.chat.completions.create(
                messages=prompt,
                **SETTINGS,
            )
            r = full_response.choices[0].message.content

            completions.append({'question': question, 'answer': r})

            print(f"{question}: {r}")
            
        ############ Save Important results
        df = pd.DataFrame(completions)
        bu.if_dir_not_exist_make(SAVE_SIMULATION_DIR)
        df.to_csv(f"{SAVE_SIMULATION_DIR}/{SIMULATION_NAMEID}_simulation.csv", index=False)
        bu.if_dir_not_exist_make(os.path.join(SAVE_SIMULATION_DIR, "info"))
        bu.quickJSON({"settings": SETTINGS, "info": AUTO_INFO}, f"{SAVE_SIMULATION_DIR}/info/{SIMULATION_NAMEID}_info.json")

        status = "OK"
    
    except Exception:
        print(f"##### Error while running {filename}.")
        error_string = traceback.format_exc()
        print(error_string)
        status = "Failed"

    ########### Time the run
    try:
        time_taken = timer.mark_end(filepath)
    except:
        print("Error while timing run: ")
        print(traceback.format_exc())
        time_taken = None

    ########### Report the run
    try:
        new_report = {
            "filename": filename,
            "timestamp": timestamp,
            "time_taken": time_taken,
            "status": status,
            **rundata["instructions"],
            "error": error_string if status == "Failed" else "",
        }

        tmp = bu.convert_dicts_to_table([new_report])
        report_live_csv.append_data(tmp[1], tmp[0])
    except Exception as e:
        print(f"Error while reporting: ")
        traceback.print_exc()

    print(f"Processed {filename}. Stauts: {status}")

timer.print_total_execution_time()

Recognized 9 headers in batch/run_reports/monster_V6.csv
[1/4] Running eli-pers-hybrid-250-05-02_mixtral-q2_K_V6_schema_1.json from queue. 
Using default Personality Survey CSV file: surveys/survey_personality-test.csv
0/50...	##### Error while running eli-pers-hybrid-250-05-02_mixtral-q2_K_V6_schema_1.
Traceback (most recent call last):
  File "/var/folders/bk/njz8h9d15b78r5q_bldv98zc0000gp/T/ipykernel_25160/223075676.py", line 78, in <module>
    full_response = client.chat.completions.create(
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/openai/_utils/_utils.py", line 275, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/openai/resources/chat/completions.py", line 581, in create
    return self._post(
           ^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/openai/_base_client.py", line 1233, in post
    return cast(ResponseT, self.

In [14]:
display(df)
display(survey.PersonalitySurvey().df)

Unnamed: 0,question,answer
0,I am the life of the party.,SOMEWHAT DISAGREE
1,I don't talk a lot.,SOMEWHAT AGREE
2,I feel comfortable around people.,SOMEWHAT DISAGREE
3,I keep in the background.,SOMEWHAT AGREE
4,I start conversations.,SOMEWHAT AGREE
5,I have little to say.,SOMEWHAT DISAGREE
6,I talk to a lot of different people at parties.,SOMEWHAT AGREE
7,I don't like to draw attention to myself.,SOMEWHAT DISAGREE
8,I don't mind being the center of attention.,SOMEWHAT AGREEMENT
9,I am quiet around strangers.,SOMEWHAT AGREEMENT


Using default Personality Survey CSV file: surveys/survey_personality-test.csv


Unnamed: 0,question,airidas,elias,format,category,retrieval_1
0,I am the life of the party.,3,3,"A (1-5: 1=Disagree, 3=Neutral, 5=Agree)",EXT1,engaged or not engaged in parties
1,I don't talk a lot.,2,2,"A (1-5: 1=Disagree, 3=Neutral, 5=Agree)",EXT2,talking or not talking
2,I feel comfortable around people.,4,4,"A (1-5: 1=Disagree, 3=Neutral, 5=Agree)",EXT3,comfort or discomfort around people
3,I keep in the background.,2,4,"A (1-5: 1=Disagree, 3=Neutral, 5=Agree)",EXT4,staying or not staying in the background
4,I start conversations.,4,3,"A (1-5: 1=Disagree, 3=Neutral, 5=Agree)",EXT5,starting or not starting conversations
5,I have little to say.,1,1,"A (1-5: 1=Disagree, 3=Neutral, 5=Agree)",EXT6,having little or a lot to say
6,I talk to a lot of different people at parties.,5,4,"A (1-5: 1=Disagree, 3=Neutral, 5=Agree)",EXT7,talking to a lot or few people at parties
7,I don't like to draw attention to myself.,2,3,"A (1-5: 1=Disagree, 3=Neutral, 5=Agree)",EXT8,
8,I don't mind being the center of attention.,4,1,"A (1-5: 1=Disagree, 3=Neutral, 5=Agree)",EXT9,
9,I am quiet around strangers.,1,4,"A (1-5: 1=Disagree, 3=Neutral, 5=Agree)",EXT10,


## Run Live simulation (not batch)

In [None]:
# load json file to dict
with open("batch/prompts/personality_batch-B_8k-dynamic_prompts.json", "r") as read_file:
    pre_final_prompts = json.load(read_file)

# Setup for below
SETTINGS = {
     "model": "llama3", # mixtral, command-r-plus:104b-q2_K
     "stream": True,
    #  "format": "json",
     # "temperature": 0.5,
}
##################################
SIM_ID = f"eli-pers_8k-dynamic_llama3-7_json-test"
save = f"{SETTINGS['model']}_{SIM_ID}"
LIMIT = None # For testing purposes. Set to NONE to run all
AUTO_INFO = {
    "date": bu.get_timestamp(),
    # "EMBEDDING_NAMEID": EMBEDDING_NAMEID,
    # "RETRIEVAL_PROMPT": RETRIEVAL_PROMPT,
    # "CHUNKS_COUNT_IN_CTX": CHUNKS_COUNT_IN_CTX,
    # "survey_type": str(type(surv)),
    # "prompt_count": min(len(final_prompts), LIMIT) if LIMIT != None else len(final_prompts),
    # "avg_tokens_in_prompt": round(prompt_info["total_all_prompt_tokens"]/len(final_prompts)),
}
client = OpenAI(
    base_url = 'http://localhost:11434/v1',
    api_key='ollama', # required, but unused
)
##################################

In [None]:
###########################################
##### ==== THE FUNCTIONAL 1!!!! ===== #####
###########################################
completions = []
l = len(final_prompts)
timer = bu.Benchmarker()
for i, (prompt, question) in enumerate(list(zip(final_prompts, surv.questions))):
    if LIMIT != None and i > LIMIT:
        break
    timer.mark()
    print(f"{i}/{l}...", end="\t") # Print progress
    # Send the Request    
    full_response = client.chat.completions.create(
        model=SETTINGS["model"],
        messages=prompt,
        # timeout=120,
        # temperature=SETTINGS["temperature"],
    )
    r = full_response.choices[0].message.content
    completions.append({'question': question, 'answer': r})
    print(f"{question}: {r}")

# Save results
df = pd.DataFrame(completions)
df.to_csv(f"simulations/{SIM_ID}_simulation.csv", index=False)
bu.quickJSON(final_prompts, f"ignorefolder/{SIM_ID}_prompts.json")
bu.quickJSON({"settings": SETTINGS, "info": AUTO_INFO}, f"simulations/{SIM_ID}_info.json")