In [None]:
import openai
import datetime
import json
import csv
import pandas as pd
import pathlib
import base64

In [None]:
path = pathlib.Path().resolve()

chapter = "11_Monocos_Station"
dialogues_file_path = path/f"data/csv/{chapter}.csv"
audio_file_path = path/f"data/audio/exported/{chapter}.mp3"

In [None]:
df = pd.read_csv(dialogues_file_path)
df.sort_values(by=["chapter_index", "dialogue_index", "line_index"], inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

In [None]:
df["id"] = df["dialogue_index"].astype(str) +"_"+ df["line_index"].astype(str)

df["outc"] = df["id"] + " | " + df["speaker"] + ": " + df["line"]
df["outc"].head()

In [None]:
negative_emotions = ["anger", "sadness", "fear"]
positive_emotions = ["happiness", "ambitious", "surprise"]
target_emotions = negative_emotions + positive_emotions

system_message = f"""
## TASK
Evaluate the likelihood of the emotions in the dialogue.
Consider the actor's interpretation, the background music and the meaning of the words.
Only classify the following emotions:
- positive: [{', '.join(positive_emotions)}]
- negative: [{', '.join(negative_emotions)}]
- neutral: [neutral]

## REQUIREMENTS
- You will have the transcript of the dialogue. Use the row index as key when returning the estimate for the voice line.
- Make sure to not classify any other emotion apart from those listed.
- Don't mix positive and negative emotions in a single voice line.
- Your estimate should be between 0 and 1, and the total should add up to 1.
- If an emotion has a score lower than 0.1 , ignore it and add that score to the highest valued emotions.
- If an emotion is not scored, return it with a score of 0.0
- When you reply, do not add any other text. Just reply with a JSON formatted string.
"""

In [None]:
audio_b64 = base64.b64encode(open(audio_file_path, "rb").read()).decode("utf-8")
dialogues_text = "\n".join(df["outc"].to_list())

In [None]:
key = open(path/"data/open_ai_token.txt", "r").read()
client = openai.OpenAI(api_key = key)

In [None]:
# https://platform.openai.com/docs/api-reference/chat/create
response = client.chat.completions.create(
  model="gpt-audio",
  temperature=0.1,
  messages=[
    {
      "role": "system",
      "content": system_message
    },
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": dialogues_text
        },
        {
          "type": "input_audio",
          "input_audio": {
            "data": audio_b64,
            "format": "mp3"
          }
        }
      ]
    }
  ]
)

In [None]:
res_dict = response.to_dict()

if res_dict['choices'][0]['finish_reason'] != 'stop':
    print(json.dumps(res_dict, indent=2))
    raise ValueError("API response was not complete. Exiting...")

out_content = json.loads(response.choices[0].message.content)
emotions_df = pd.DataFrame.from_dict(out_content, orient='index')
emotions_df.head()

In [None]:
joined_df = pd.merge(
    df,
    emotions_df,
    "inner",
    left_on="id",
    right_on=emotions_df.index
)
joined_df.drop(["outc", "id"], axis=1, inplace=True)
joined_df.head()

In [None]:
emotions_short = "-".join([e[:3] for e in target_emotions])
now = datetime.datetime.now().strftime("%d-%m-%YT%H-%M")
out_fname = f"{chapter}_{now}_{emotions_short}"
emotions_df_path = path/f"./data/emotions_scored/{out_fname}.csv"

if not emotions_df_path.parent.exists():
    emotions_df_path.parent.mkdir()

# Write dataframe
joined_df.to_csv(
    emotions_df_path.as_posix(),
    quotechar='"',
    quoting=csv.QUOTE_ALL
)
print(f"Written file '{chapter}.csv'")

# Write API response
api_response_path = path/f"./data/emotions_scored/api_responses/{out_fname}.json"
with open(api_response_path.as_posix(), "w") as f:
    json.dump(res_dict, f, indent=2)
    print(f"Written API response '{chapter}.json'")