In [2]:
import json
import random
import time
from collections import deque

from application.aws import AwsAPI
from application.game import Game
from application.utils import get_kill_events

# Realistic kill events test

For this test, kill events were generated as close to real ones as possible: a tree structure was generated to map events to users, each user got 0 to 6 kills randomly assigned, tree was traversed from leaves to root, so that in the end only 1 user is alive. Context window size was set to 20 messages for performance reasons, so the model generates responses based on 9 previous events and the current event. Claude v2 was used, this notebook can be utilized to test other models' generations quality on that task

In [3]:
kills = get_kill_events("data/kills.csv")
game = Game(kills)
aws = AwsAPI()
root = game.create_users_tree()

In [4]:
init_prompts = [
    (
        "Imagine you're commenting the battle royale game match. You'll be getting kill events from the game like this one: "
        '[{"victim": {"username": str},"KillInstigator": {"username": str,"Distance": float,"first_kill": bool,"used_weapon": {"type": str,"name": str},"Headshot": bool,"OneShot": bool,"num_kills": int,"previous_victims": [str]},"location": str,"num_players_alive": int}]. '
        "Sometimes you will be getting more than one event in this list. "
        "You'll need to comment on the kill events, using 3 senteces max. "
        "If there are multiple events, you can decide either to comment on all of them or comment the last of them while also keeping in mind other events. "
        "Even if there are multiple events, you should still be able to say everything in 3 sentences. "
        "You're not supposed to always use each field for the comment, but you can use them if you think they're relevant. "
        "Try to also remember previous events and if you see some patterns feel free to voice them. Understood?"
    ),
    "Understood. I'm ready to commentate on the battle royale game events.",
]

In [5]:
q = deque(init_prompts)
context_window_size = 20
file_timestamp = int(time.time())
with open(f"output/text/{file_timestamp}-dialogue.txt", "a") as f:
    f.write(f"User: {q[-2]}\n")
    f.write(f"Assistant: {q[-1]}\n\n")

In [6]:
kill_events: list[list[dict]] = []
kill_events_flat = [kill_event for kill_event in game.get_kill_event(root)]
i = 0
while i < len(kill_events_flat):
    kill_events.append([])
    events_num = min(random.randint(1, 4), len(kill_events_flat) - i)
    for _ in range(events_num):
        kill_events[-1].append(kill_events_flat[i])
        i += 1

In [7]:
for kill_events_list in kill_events:
    if len(q) + 2 > context_window_size:
        for _ in range(len(init_prompts)):
            q.popleft()
        q.popleft()
        q.popleft()
        for prompt in reversed(init_prompts):
            q.appendleft(prompt)

    q.append(json.dumps(kill_events_list))

    roles = ("user", "assistant")
    messages = [
        {
            "role": roles[i % 2],
            "content": [{"type": "text", "text": prompt}],
        }
        for i, prompt in enumerate(q)
    ]
    response = []
    try:
        for sentence in aws.get_streamed_response_claude(messages, time.time()):
            response.append(sentence)
    except KeyboardInterrupt:
        break

    print(aws.get_bedrock_stats())

    q.append("".join(response))
    
    with open(f"output/text/{file_timestamp}-dialogue.txt", "a") as f:
        f.write(f"User: {q[-2]}\n")
        f.write(f"Assistant: {q[-1]}\n\n")


Bedrock latencies: avg=0.95sec, min=0.38sec, max=1.52sec
Bedrock latencies: avg=0.69sec, min=0.30sec, max=1.19sec
Bedrock latencies: avg=1.72sec, min=0.53sec, max=2.92sec
Bedrock latencies: avg=1.68sec, min=0.53sec, max=2.84sec
Bedrock latencies: avg=1.25sec, min=0.61sec, max=1.89sec
Bedrock latencies: avg=1.70sec, min=0.68sec, max=2.71sec
Bedrock latencies: avg=1.59sec, min=1.06sec, max=2.11sec
Bedrock latencies: avg=1.77sec, min=1.15sec, max=2.39sec
Bedrock latencies: avg=1.41sec, min=0.38sec, max=3.08sec
Bedrock latencies: avg=2.23sec, min=1.00sec, max=3.46sec
Bedrock latencies: avg=2.37sec, min=1.01sec, max=3.72sec
Bedrock latencies: avg=1.48sec, min=0.78sec, max=2.59sec
