In [1]:
# Import libraries
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import pandas as pd
import numpy as np
import re

In [2]:
import os

In [3]:
model = BertForSequenceClassification.from_pretrained("ProsusAI/finbert")

In [4]:
tokenizer = BertTokenizer.from_pretrained("ProsusAI/finbert")

In [5]:
# Function to perform sentiment analysis on text data
def predict_sentiment(text_data):
    # Tokenize the input text data
    tokens = tokenizer(text_data, padding=True, truncation=True, return_tensors='pt')
    
    # Pass the tokenized data through the finBERT model to get predicted sentiment scores
    with torch.no_grad():
        outputs = model(**tokens)
        logits = outputs.logits

    # Get predicted sentiment labels (positive, neutral, negative) from logits
    _, predicted_labels = torch.max(logits, dim=1)
    # Convert the logits to probabilities
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    # Convert probabilities tensor to numpy array
    probs_np = probs.detach().numpy()

    # Map numerical labels back to sentiment categories
    sentiment_mapping = {0: "negative", 1: "neutral", 2: "positive"}
    predicted_sentiments = [sentiment_mapping[label.item()] for label in predicted_labels]
    predicted_sentiments_pd=pd.DataFrame({"Text": text_data, "Predicted Sentiment": predicted_sentiments})
    # Create a new DataFrame with dates and probabilities
    df_probs = pd.DataFrame({
    'negative_prob': probs_np[:, 0],
    'neutral_prob': probs_np[:, 1],
    'positive_prob': probs_np[:, 2]
    })
    #print(probs_np)
    #print(df_results)
    predicted_sentiments_pr = pd.concat([predicted_sentiments_pd, df_probs], axis=1) 
    
   # print(predicted_sentiments_pr)
    return predicted_sentiments_pr

In [6]:
# The function that splits each statment text into seperate sentences. 
def sentence_split(text):
    return re.split(r'(?<=\w\.)\s+',text)


In [8]:
#   Load the text data 
text_dta = pd.read_excel('fomc_statements.xlsx', sheet_name='statements')

In [9]:
# Call the function to split the sentences for each date
text_dta["Statement_sentence"] = text_dta["Statement"].apply(sentence_split)

In [10]:
# Check the splitted text 
text_dta["Statement_sentence"][1]

['The Federal Open Market Committee voted today to raise its target for the federal funds rate by 25 basis points to 6 percent.',
 'In a related action, the Board of Governors approved a 25 basis point increase in the discount rate to 5-1/2 percent.Economic conditions and considerations addressed by the Committee are essentially the same as when the Committee met in February.',
 "The Committee remains concerned that increases in demand will continue to exceed the growth in potential supply, which could foster inflationary imbalances that would undermine the economy's record economic expansion.Against the background of its long-run goals of price stability and sustainable economic growth and of the information currently available, the Committee believes the risks are weighted mainly toward conditions that may generate heightened inflation pressures in the foreseeable future.In taking the discount rate action, the Federal Reserve Board approved requests submitted by the Boards of Directo

In [11]:
text_dta["Statement_sentence"][201]



['Recent indicators suggest that economic activity has continued to expand at a modest pace.',
 'Job gains have been robust in recent months, and the unemployment rate has remained low.',
 'Inflation remains elevated.',
 'The U.S.',
 'banking system is sound and resilient.',
 'Tighter credit conditions for households and businesses are likely to weigh on economic activity, hiring, and inflation.',
 'The extent of these effects remains uncertain.',
 'The Committee remains highly attentive to inflation risks.',
 'The Committee seeks to achieve maximum employment and inflation at the rate of 2 percent over the longer run.',
 'In support of these goals, the Committee decided to maintain the target range for the federal funds rate at 5 to 5-1/4 percent.',
 'Holding the target range steady at this meeting allows the Committee to assess additional information and its implications for monetary policy.',
 'In determining the extent of additional policy firming that may be appropriate to return 

In [20]:
if __name__ == "__main__":
    #text_data = ["Recent indicators suggest that economic activity has continued to expand at a modest pace.",  "Job gains have been robust in recent months, and the unemployment rate has remained low.",  "Inflation remains elevated.", "The U.S. banking system is sound and resilient.",  "Tighter credit conditions for households and businesses are likely to weigh on economic activity, hiring, and inflation.",  "The extent of these effects remains uncertain.",  "The Committee remains highly attentive to inflation risks.", "The Committee seeks to achieve maximum employment and inflation at the rate of 2 percent over the longer run.",  "In support of these goals, the Committee decided to maintain the target range for the federal funds rate at 5 to 5-1/4 percent.",  "Holding the target range steady at this meeting allows the Committee to assess additional information and its implications for monetary policy.",  "In determining the extent of additional policy firming that may be appropriate to return inflation to 2 percent over time, the Committee will take into account the cumulative tightening of monetary policy, the lags with which monetary policy affects economic activity and inflation, and economic and financial developments.", "In addition, the Committee will continue reducing its holdings of Treasury securities and agency debt and agency mortgage-backed securities, as described in its previously announced plans.",  "The Committee is strongly committed to returning inflation to its 2 percent objective.", "In assessing the appropriate stance of monetary policy, the Committee will continue to monitor the implications of incoming information for the economic outlook.",  "The Committee would be prepared to adjust the stance of monetary policy as appropriate if risks emerge that could impede the attainment of the Committee's goals.",  "The Committee's assessments will take into account a wide range of information, including readings on labor market conditions, inflation pressures and inflation expectations, and financial and international developments.", "Voting for the monetary policy action were Jerome H. Powell, Chair; John C. Williams, Vice Chair; Michael S. Barr; Michelle W. Bowman; Lisa D. Cook; Austan D. Goolsbee; Patrick Harker; Philip N. Jefferson; Neel Kashkari; Lorie K. Logan; and Christopher J. Waller."]
    text_data = text_dta["Statement_sentence"][201]
    predicted_sentiments = predict_sentiment(text_data)


In [21]:
predicted_sentiments

Unnamed: 0,Text,Predicted Sentiment,negative_prob,neutral_prob,positive_prob
0,Recent indicators suggest that economic activi...,negative,0.949405,0.020192,0.030403
1,"Job gains have been robust in recent months, a...",neutral,0.437481,0.532786,0.029733
2,Inflation remains elevated.,negative,0.659559,0.161488,0.178953
3,The U.S.,positive,0.089838,0.025371,0.884791
4,banking system is sound and resilient.,positive,0.317888,0.013248,0.668864
5,Tighter credit conditions for households and b...,neutral,0.013767,0.949047,0.037186
6,The extent of these effects remains uncertain.,positive,0.032487,0.121893,0.84562
7,The Committee remains highly attentive to infl...,positive,0.337256,0.020902,0.641842
8,The Committee seeks to achieve maximum employm...,positive,0.194831,0.015097,0.790072
9,"In support of these goals, the Committee decid...",positive,0.102464,0.026402,0.871135


In [14]:
# To get the FOMC Statements Sentiment Index first set 'neutral' sentiment scores to 0 and then calculate the difference b/w positive 
# and negative sentiment scores (probabilities)
# Then sum over the negative of differences and divide by the total number of sentiments or use the average over the number of sentiments-sentences with a sentiment as the index 
predicted_sentiments['fomc_indx_num'] = np.where(predicted_sentiments['Predicted Sentiment']== 'neutral', 0, -1*predicted_sentiments['negative_prob'] + 1*predicted_sentiments['positive_prob'])

In [15]:
predicted_sentiments

Unnamed: 0,Text,Predicted Sentiment,negative_prob,neutral_prob,positive_prob,fomc_indx_num
0,Recent indicators suggest that economic activi...,negative,0.949405,0.020192,0.030403,-0.919002
1,"Job gains have been robust in recent months, a...",neutral,0.437481,0.532786,0.029733,0.0
2,Inflation remains elevated.,negative,0.659559,0.161488,0.178953,-0.480605
3,The U.S.,positive,0.089838,0.025371,0.884791,0.794953
4,banking system is sound and resilient.,positive,0.317888,0.013248,0.668864,0.350976
5,Tighter credit conditions for households and b...,neutral,0.013767,0.949047,0.037186,0.0
6,The extent of these effects remains uncertain.,positive,0.032487,0.121893,0.84562,0.813133
7,The Committee remains highly attentive to infl...,positive,0.337256,0.020902,0.641842,0.304585
8,The Committee seeks to achieve maximum employm...,positive,0.194831,0.015097,0.790072,0.595241
9,"In support of these goals, the Committee decid...",positive,0.102464,0.026402,0.871135,0.768671


In [16]:
fomc_sent_indx = -predicted_sentiments['fomc_indx_num'].mean()

In [17]:
fomc_sent_indx

-0.5519564

In [24]:
for i in len(text_dta["Statement_sentence"]):
    sentiment_list = []
    sentiment_list.append(predict_sentiments(text_dta[i]))

TypeError: 'int' object is not iterable