In [None]:
import json
import pandas as pd
from openai import OpenAI

In [None]:
assistant_content = "Sei un chatbot che riconosce quale emozione tra 'gioia', 'vergogna', 'colpevolezza', 'paura', " \
                    "'rabbia', 'tristezza' esprime la frase che gli viene posta. Se non conosci la risposta rispondi " \
                    "con 'idk'."

In [None]:
train_df = pd.read_csv("sample_data/train_isear_it.csv", delimiter="|")
eval_df = pd.read_csv("sample_data/val_isear_it.csv", delimiter="|")

# 0 gioia
# 1 tristezza
# 2 rabbia
# 3 paura
# 4 vergogna
# 5 disgusto
# 6 colpevolezza
sentiments = ["gioia", "tristezza", "rabbia", "paura", "vergogna", "disgusto", "colpevolezza"]

In [None]:
def format_examples(examples: pd.DataFrame):
    formatted_examples = list()
    for index, example in examples.iterrows():
        formatted_example = {
            "messages": [
                {
                    "role": "system",
                    "content": assistant_content
                },
                {
                    "role": "user",
                    "content": example["text"]
                },
                {
                    "role": "assistant",
                    "content": sentiments[example["label"]]
                }
            ]
        }
        formatted_examples.append(json.dumps(formatted_example))
    return examples.assign(formatted_example=formatted_examples)

In [None]:
train_df = format_examples(train_df)
eval_df = format_examples(eval_df)

In [None]:
with open("train.jsonl", "w") as f:
    for fe in train_df["formatted_example"]:
        f.write(fe + "\n")
with open("val.jsonl", "w") as f:
    for fe in train_df["formatted_example"]:
        f.write(fe + "\n")

In [None]:
client = OpenAI()
train_file = client.files.create(
    file=open("train.jsonl", "rb"),
    purpose="fine-tune"
)
print(f"Train file id: {train_file.id}")
eval_file = client.files.create(
    file=open("val.jsonl", "rb"),
    purpose="fine-tune"
)
print(f"Evaluation file id: {eval_file.id}")

In [None]:
job = client.fine_tuning.jobs.create(
    training_file=train_file.id,
    model="gpt-3.5-turbo-0125",
    suffix="emotions",
    validation_file=eval_file.id
)
print(f"Job id: {job.id}")