In [None]:
import openai
import pandas as pd
import time
from tqdm import tqdm
from dotenv import load_dotenv
import os

load_dotenv()
openai.api_key=os.getenv("OPENAI_KEY")

In [None]:
statements = pd.read_csv('../data/statements.csv')

In [None]:
label_to_score = {
    "dovish": -1, 
    "mostly dovish":-0.5, 
    "neutral": 0, 
    "mostly hawkish": 0.5, 
    "hawkish": 1,
}

In [None]:
def extract_score(label: str) -> float:
    label = label.lower()
    if label not in label_to_score:
        print(label)
        return 0
    return label_to_score[label]

In [None]:
prompt_template = """
<statement>
INPUT
</statement>
<labels>
Dovish: Strongly expresses a belief that the economy may be
growing too slowly and may need stimulus through mon-
etary policy.
Mostly dovish: Overall message expresses a belief that the economy may
be growing too slowly and may need stimulus through
monetary policy.
Neutral: Expresses neither a hawkish nor dovish view and is
mostly objective.
Mostly hawkish: Overall message expresses a belief that the economy is
growing too quickly and may need to be slowed down
through monetary policy.
Hawkish: Strongly expresses a belief that the economy is growing
too quickly and may need to be slowed down through monetary policy.
</labels>
Which label best applies applies to the statement (Dovish, Mostly Dovish, Neutral, Mostly Hawkish, Hawkish)?
"""

In [None]:
scores = {}
for i, statement in tqdm(statements.iterrows()):
    text = statement['statements_text']
    # call GPT-4
    response = None
    while response is None:
        try:
            response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                    {"role": "user", "content": prompt_template.replace("INPUT", text)},
                ],
            temperature=0
            )
        except Exception as e:
            print(e)
            time.sleep(1)

    score = extract_score(response["choices"][0]["message"]["content"])

    scores[statement["date"]] = score

In [None]:
import pandas as pd
pd.DataFrame(scores.items(), columns=["date", "score"]).to_csv("../results/statements_scores.csv", index=False)