In [None]:
# classifying statements by first classifying each sentence

import openai
import pandas as pd
import time
from tqdm import tqdm
from dotenv import load_dotenv
import os

import utils
from utils import extract_score

label_to_score = utils.label_to_score

load_dotenv()
openai.api_key=os.getenv("OPENAI_KEY")

In [None]:
statements = pd.read_csv('../data/statements.csv')

In [None]:
prompt_template = """
<sentence>
INPUT
</sentence>
<labels>
Dovish: Strongly expresses a belief that the economy may be
growing too slowly and may need stimulus through mon-
etary policy.
Mostly dovish: Overall message expresses a belief that the economy may
be growing too slowly and may need stimulus through
monetary policy.
Neutral: Expresses neither a hawkish nor dovish view and is
mostly objective.
Mostly hawkish: Overall message expresses a belief that the economy is
growing too quickly and may need to be slowed down
through monetary policy.
Hawkish: Strongly expresses a belief that the economy is growing
too quickly and may need to be slowed down through monetary policy.
</labels>
Which label best applies applies to the sentence (Dovish, Mostly Dovish, Neutral, Mostly Hawkish, Hawkish)?
"""

In [None]:
few_shot = True
if few_shot:
    shots_file = "../data/fewshot_statement_sentences.csv"
    shots = pd.read_csv(shots_file)
    order = [3, 8, 1, 9, 4, 7, 0, 5, 6, 2]
    # order = [1, 3, 5, 7, 9]

    fewshot_prompt = "Here are a few examples of sentences and their label:\n"
    for i in order:
        fewshot_prompt += shots.iloc[i]["sentence"]
        fewshot_prompt+= ": "
        fewshot_prompt+= shots.iloc[i]["score"]
        fewshot_prompt+="\n"

    fewshot_prompt+="\n"
    prompt_template = fewshot_prompt + prompt_template
    print(prompt_template)

In [None]:
try:
    response = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
            {"role": "user", "content": prompt_template.replace("INPUT", shots.iloc[8]["sentence"])},
        ],
    temperature=0
    )
except Exception as e:
    print(e)
    time.sleep(1)

print(response["choices"][0]["message"]["content"])

In [None]:
global_sentence_scores = {}
for i, statement in tqdm(statements[5:].iterrows()):
    text = statement['statements_text']
    sentences = text.split('.')
    sentence_scores = []
    for sentence in sentences:
        if len(sentence) < 5:
            sentence_scores.append("TOO SHORT")
            continue
        # call GPT-4
        response = None
        while response is None:
            try:
                response = openai.ChatCompletion.create(
                model="gpt-4",
                messages=[
                        {"role": "user", "content": prompt_template.replace("INPUT", sentence)},
                    ],
                temperature=0
                )
            except Exception as e:
                print(e)
                time.sleep(1)
        sentence_score = extract_score(response["choices"][0]["message"]["content"])
        sentence_scores.append(sentence_score)
    global_sentence_scores[statement["date"]] = sentence_scores

In [None]:
global_sentence_scores

In [None]:
import json
# Specify the file path where you want to save the JSON file
file_path = '../results/'
if few_shot:
    file_path += "statements_scores_by_sentence_few_shot.json"
else:
    file_path += "statements_scores_by_sentence.json"

# Convert the dictionary to JSON format
json_data = json.dumps(global_sentence_scores)

# Write the JSON data to the file
with open(file_path, 'w') as file:
    file.write(json_data)