In [6]:
# classifying statements by first classifying each sentence

import openai
import pandas as pd
import time
from tqdm import tqdm
from dotenv import load_dotenv
import os

import utils
from utils import extract_score

label_to_score = utils.label_to_score

load_dotenv()
openai.api_key=os.getenv("OPENAI_KEY")

In [7]:
statements = pd.read_csv('../data/statements.csv')

In [8]:
prompt_template = """
<sentence>
INPUT
</sentence>
<labels>
Dovish: Strongly expresses a belief that the economy may be
growing too slowly and may need stimulus through mon-
etary policy.
Mostly dovish: Overall message expresses a belief that the economy may
be growing too slowly and may need stimulus through
monetary policy.
Neutral: Expresses neither a hawkish nor dovish view and is
mostly objective.
Mostly hawkish: Overall message expresses a belief that the economy is
growing too quickly and may need to be slowed down
through monetary policy.
Hawkish: Strongly expresses a belief that the economy is growing
too quickly and may need to be slowed down through monetary policy.
</labels>
Which label best applies applies to the sentence (Dovish, Mostly Dovish, Neutral, Mostly Hawkish, Hawkish)?
"""

In [9]:
global_sentence_scores = {}
for i, statement in tqdm(statements.iterrows()):
    text = statement['statements_text']
    sentences = text.split('.')
    sentence_scores = []
    for sentence in sentences:
        if len(sentence) < 5:
            sentence_scores.append("TOO SHORT")
            continue
        # call GPT-4
        response = None
        while response is None:
            try:
                response = openai.ChatCompletion.create(
                model="gpt-4",
                messages=[
                        {"role": "user", "content": prompt_template.replace("INPUT", sentence)},
                    ],
                temperature=0
                )
            except Exception as e:
                print(e)
                time.sleep(1)
        sentence_score = extract_score(response["choices"][0]["message"]["content"])
        sentence_scores.append(sentence_score)
    global_sentence_scores[statement["date"]] = sentence_scores

34it [02:25,  6.58s/it]

The server is overloaded or not ready yet.


165it [36:49, 13.39s/it]


In [12]:
global_sentence_scores

{19940204: [1, 0.5, 0.5, 1, 'TOO SHORT'],
 19940322: [1, 0.5, 'TOO SHORT'],
 19940418: [1, 0.5, 'TOO SHORT'],
 19940517: [0, 1, 1, 0, 0, 0, 0, 'TOO SHORT'],
 19940816: [0.5, 0, 0, 0, 0, 0, 'TOO SHORT'],
 19941115: [1, 1, 1, 1, 0, 0, 0, 'TOO SHORT'],
 19950201: [1, 1, 0.5, 1, 0, 0, 0, 'TOO SHORT'],
 19950706: [-1, 0.5, -1, 'TOO SHORT'],
 19951219: [-1, -1, -1, 'TOO SHORT'],
 19960131: [0, 0],
 19970325: [1, 0.5, 0, 0, 0],
 19980929: [-1, 0, 'TOO SHORT', -1, 0, 0],
 19981015: [-1, -1, 0, 0, 0, 0],
 19981117: [0, -1, 0, 0, 0],
 19990518: [0.5, 0, 0, 0.5, 0],
 19990630: [0, 0, 0.5, 0],
 19990824: [1, 1, 'TOO SHORT', 0, 0, 0, 0, 0, 0],
 19991005: [0, 0, 0, 0.5, 0.5, 0, 0, 0],
 19991116: [1, 1, -0.5, 0.5, 0.5, 1, 'TOO SHORT', 0, 0, 0, 0, 0],
 19991221: [0, 1, 1, 0, 0, 0],
 20000202: [1, 1, 1, 1, 1, 0, 0, 0, 0],
 20000321: [1, 1, 0, 1, 1, 0, 0, 0, 0],
 20000516: [1, 1, 1, 1, 1, 0, 0, 0],
 20000628: [0, 0, 0, -0.5, 1, 0],
 20000822: [0, 0, 0, -1, 1, 0],
 20001003: [0, 0, 0, -1, 0.5, 0, 1, 0],


In [11]:
import json
# Specify the file path where you want to save the JSON file
file_path = '../results/statements_scores_by_sentence.json'

# Convert the dictionary to JSON format
json_data = json.dumps(global_sentence_scores)

# Write the JSON data to the file
with open(file_path, 'w') as file:
    file.write(json_data)