In [1]:
from FedTools import MonetaryPolicyCommittee
from FedTools import BeigeBooks
from FedTools import FederalReserveMins


### downloading the press release document

In [2]:
monetary_policy = MonetaryPolicyCommittee(
            main_url = 'https://www.federalreserve.gov', 
            calendar_url = 'https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm',
            start_year = 2016,    
            historical_split = 2017,
            verbose = True,
            thread_num = 10)
            
dataset = monetary_policy.find_statements()

Constructing links between 2016 and 2023
Extracting the past 67 FOMC Statements.
Retrieving articles.
...................................................................

In [3]:
dataset

Unnamed: 0,FOMC_Statements
2016-01-27,"The Federal Reserve, the central bank of the U..."
2016-03-16,"The Federal Reserve, the central bank of the U..."
2016-04-27,"The Federal Reserve, the central bank of the U..."
2016-06-15,"The Federal Reserve, the central bank of the U..."
2016-07-27,"The Federal Reserve, the central bank of the U..."
...,...
2023-03-22,"The Federal Reserve, the central bank of the U..."
2023-05-03,"The Federal Reserve, the central bank of the U..."
2023-06-14,"The Federal Reserve, the central bank of the U..."
2023-07-26,"The Federal Reserve, the central bank of the U..."


In [4]:
filtered_df = dataset[(dataset.index.year > 2017) & (dataset.index.year < 2023)]

filtered_df

Unnamed: 0,FOMC_Statements
2018-01-31,"The Federal Reserve, the central bank of the U..."
2018-03-21,"The Federal Reserve, the central bank of the U..."
2018-05-02,"The Federal Reserve, the central bank of the U..."
2018-06-13,"The Federal Reserve, the central bank of the U..."
2018-08-01,"The Federal Reserve, the central bank of the U..."
2018-09-26,"The Federal Reserve, the central bank of the U..."
2018-11-08,"The Federal Reserve, the central bank of the U..."
2018-12-19,"The Federal Reserve, the central bank of the U..."
2019-01-30,"The Federal Reserve, the central bank of the U..."
2019-03-20,"The Federal Reserve, the central bank of the U..."


In [5]:
import pandas as pd

# Assuming your DataFrame is named filtered_df
# You can use the str.extract() method with a regular expression to extract the date
filtered_df['Extracted_Date'] = filtered_df['FOMC_Statements'].str.extract(r'Resources  (\w+ \d+, \d+)')
filtered_df['Extracted_Date'] = pd.to_datetime(filtered_df['Extracted_Date'], format='%B %d, %Y')

# Set the 'Extracted_Date' column as the new index
filtered_df.set_index('Extracted_Date', inplace=True)
# Set the 'Extracted_Date' column as the index and remove the column title
filtered_df.index = filtered_df.index.rename(None)

# Filter for dates greater than or equal to 2017 and less than 2023
filtered_df = filtered_df[(filtered_df.index.year >= 2017) & (filtered_df.index.year < 2023)]


# Display the DataFrame with the updated index
filtered_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Extracted_Date'] = filtered_df['FOMC_Statements'].str.extract(r'Resources  (\w+ \d+, \d+)')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Extracted_Date'] = pd.to_datetime(filtered_df['Extracted_Date'], format='%B %d, %Y')


Unnamed: 0,FOMC_Statements
2018-01-31,"The Federal Reserve, the central bank of the U..."
2018-03-21,"The Federal Reserve, the central bank of the U..."
2018-06-13,"The Federal Reserve, the central bank of the U..."
2018-05-02,"The Federal Reserve, the central bank of the U..."
2018-09-26,"The Federal Reserve, the central bank of the U..."
2018-08-01,"The Federal Reserve, the central bank of the U..."
2018-11-08,"The Federal Reserve, the central bank of the U..."
2019-01-30,"The Federal Reserve, the central bank of the U..."
2019-03-20,"The Federal Reserve, the central bank of the U..."
2019-05-01,"The Federal Reserve, the central bank of the U..."


### Converting the paragraph to sentences

In [6]:
import re
def split_sentences(text):
    sentences = re.split(r'\.\s+', text)
    return sentences

# Apply the function to the 'Text' column and create a new column 'Sentences'
filtered_df['Sentences'] = filtered_df['FOMC_Statements'].apply(split_sentences)

# Print the DataFrame with the text split into sentences
filtered_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Sentences'] = filtered_df['FOMC_Statements'].apply(split_sentences)


Unnamed: 0,FOMC_Statements,Sentences
2018-01-31,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ..."
2018-03-21,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ..."
2018-06-13,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ..."
2018-05-02,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ..."
2018-09-26,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ..."
2018-08-01,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ..."
2018-11-08,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ..."
2019-01-30,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ..."
2019-03-20,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ..."
2019-05-01,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ..."


### sentimental analysis for the sentences to find postive , negative and neutral using the FinBert

In [7]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import numpy as np
import pandas as pd

# Load the finbert model and tokenizer
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone', num_labels=3)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')



# Function to get sentiment label
def get_sentiment_label(logits):
    return labels[np.argmax(logits)]

# Sentiment labels mapping
labels = {0: 'neutral', 1: 'positive', 2: 'negative'}

# Create a new column for sentiment labels
filtered_df['SentimentLabels'] = None

# Iterate through rows of the DataFrame
for idx, row in filtered_df.iterrows():
    sentences = row['Sentences']
    sentiment_labels = []
    
    # Iterate through sentences in the list
    for sentence in sentences:
        # Tokenize the sentence
        inputs = tokenizer(sentence, return_tensors="pt", padding=True)
        
        # Perform sentiment analysis
        outputs = finbert(**inputs)[0]
        
        # Get the sentiment label
        sentiment_label = get_sentiment_label(outputs.detach().numpy()[0])
        sentiment_labels.append(sentiment_label)
    
    # Assign the sentiment labels to the 'SentimentLabels' column for the current row
    filtered_df.at[idx, 'SentimentLabels'] = sentiment_labels

# Print the updated DataFrame with sentiment labels
filtered_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['SentimentLabels'] = None


Unnamed: 0,FOMC_Statements,Sentences,SentimentLabels
2018-01-31,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, neutra..."
2018-03-21,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, negati..."
2018-06-13,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi..."
2018-05-02,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi..."
2018-09-26,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi..."
2018-08-01,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi..."
2018-11-08,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi..."
2019-01-30,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi..."
2019-03-20,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, negati..."
2019-05-01,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, negati..."


In [8]:
# Define a function to count sentiment labels for a row
def count_sentiment_labels(row):
    counts = {
        'neutral': 0,
        'positive': 0,
        'negative': 0
    }
    
    # Iterate through the sentiment labels in the row
    for label in row['SentimentLabels']:
        counts[label] += 1
    
    return pd.Series(counts)

# Apply the function to each row to count sentiment labels
sentiment_counts_per_row = filtered_df.apply(count_sentiment_labels, axis=1)

# Concatenate the counts with the original DataFrame
filtered_df = pd.concat([filtered_df, sentiment_counts_per_row], axis=1)

# Print the updated DataFrame with sentiment label counts per row
filtered_df

Unnamed: 0,FOMC_Statements,Sentences,SentimentLabels,neutral,positive,negative
2018-01-31,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, neutra...",21,5,1
2018-03-21,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, negati...",20,6,2
2018-06-13,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi...",16,6,1
2018-05-02,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi...",18,6,3
2018-09-26,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi...",17,5,1
2018-08-01,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi...",16,6,1
2018-11-08,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi...",17,5,1
2019-01-30,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, positi...",18,5,0
2019-03-20,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, negati...",18,4,1
2019-05-01,"The Federal Reserve, the central bank of the U...","[The Federal Reserve, the central bank of the ...","[positive, neutral, positive, positive, negati...",18,4,1


### extracting the sentiments

In [9]:
import pandas as pd

# Assuming you have 'positive' and 'negative' columns in your 'filtered_df'
# Create a new DataFrame with the same index
sentiment_df = pd.DataFrame(index=filtered_df.index)

# Compare 'positive' and 'negative' columns and classify as 'Hawkish', 'Dovish', or 'Neutral'
sentiment_df['Sentiment'] = 'Neutral'  # Default to 'Neutral'
sentiment_df.loc[filtered_df['positive'] > filtered_df['negative'], 'Sentiment'] = 'Hawkish'
sentiment_df.loc[filtered_df['negative'] > filtered_df['positive'], 'Sentiment'] = 'Dovish'


# Display the new DataFrame
sentiment_df


Unnamed: 0,Sentiment
2018-01-31,Hawkish
2018-03-21,Hawkish
2018-06-13,Hawkish
2018-05-02,Hawkish
2018-09-26,Hawkish
2018-08-01,Hawkish
2018-11-08,Hawkish
2019-01-30,Hawkish
2019-03-20,Hawkish
2019-05-01,Hawkish


### Extracting the dates for the press conference documents

In [10]:
import pandas as pd

# Assuming you have a DataFrame named filtered_df
# Create a new DataFrame with row indexes
date_df = pd.DataFrame({'Date': filtered_df.index})

date_df['Date'] = date_df['Date'].astype(str)

# Remove hyphens from the 'Date' column
date_df['Date'] = date_df['Date'].str.replace('-', '')

# Convert the 'Date' column to numeric
date_df['Date'] = pd.to_numeric(date_df['Date'])

# Print the updated DataFrame
date_df


Unnamed: 0,Date
0,20180131
1,20180321
2,20180613
3,20180502
4,20180926
5,20180801
6,20181108
7,20190130
8,20190320
9,20190501


In [11]:
print('done')

done


### downloading the press conference data

In [12]:
import pandas as pd
import requests
from PyPDF2 import PdfReader
import io  # Import the io module



# Create an empty DataFrame to store the extracted text
text_df = pd.DataFrame(columns=['Date', 'Text'])

# Iterate through each date
for date in date_df['Date']:
    # Convert the date to 'YYYYMMDD' format
    formatted_date = date

    # Construct the URL for the PDF based on the date
    pdf_url = f"https://www.federalreserve.gov/mediacenter/files/FOMCpresconf{formatted_date}.pdf"

    # Download the PDF content as bytes
    response = requests.get(pdf_url)

    # Check if the download was successful (status code 200)
    if response.status_code == 200:
        # Get the PDF content as bytes
        pdf_content = response.content

        # Create a PdfReader object
        pdf_reader = PdfReader(io.BytesIO(pdf_content))

        # Initialize a variable to store the extracted text
        text = ''

        # Iterate through each page and extract text
        for page in pdf_reader.pages:
            text += page.extract_text()

        # Create a DataFrame for the extracted text
        text_data = pd.DataFrame({'Date': [date], 'Text': [text]})

        # Concatenate the text_data DataFrame to text_df
        text_df = pd.concat([text_df, text_data], ignore_index=True)

    else:
        print(f"Failed to download the PDF for {date}")

    # Close the response
    response.close()

# Print the DataFrame with the extracted text
text_df


Failed to download the PDF for 20180131
Failed to download the PDF for 20180502
Failed to download the PDF for 20180801
Failed to download the PDF for 20181108
Failed to download the PDF for 20191011
Failed to download the PDF for 20200331
Failed to download the PDF for 20200323
Failed to download the PDF for 20200827


Unnamed: 0,Date,Text
0,20180321,"March 21, 2018 Chairman Powell’s Press Confere..."
1,20180613,"June 13, 2018 Chairman Powell’s Press Conferen..."
2,20180926,"\n \n \n \n \n \n \n September 26, 2018 Cha..."
3,20190130,"January 30, 2019 Chairman Powell’s Press C on..."
4,20190320,"March 20, 2019 Chair Powell’s Press Confe..."
5,20190501,"May 1, 2019 Chair Powell’s Press Conferenc..."
6,20190619,"June 19, 2019 Chair Powell’s Press Confere..."
7,20190731,"July 31, 2019 Chair Powell’s Press Conferen..."
8,20190918,"September 18, 2019 Chair Powell’s Press Conf..."
9,20191030,"October 30, 2019 Chair Powell’s Press Confer..."


In [13]:
import re
def split_sentences(text):
    sentences = re.split(r'\.\s+', text)
    return sentences

# Apply the function to the 'Text' column and create a new column 'Sentences'
text_df['Sentences'] = text_df['Text'].apply(split_sentences)

# Print the DataFrame with the text split into sentences
text_df

Unnamed: 0,Date,Text,Sentences
0,20180321,"March 21, 2018 Chairman Powell’s Press Confere...","[March 21, 2018 Chairman Powell’s Press Confer..."
1,20180613,"June 13, 2018 Chairman Powell’s Press Conferen...","[June 13, 2018 Chairman Powell’s Press Confere..."
2,20180926,"\n \n \n \n \n \n \n September 26, 2018 Cha...","[ \n \n \n \n \n \n \n September 26, 2018 Ch..."
3,20190130,"January 30, 2019 Chairman Powell’s Press C on...","[January 30, 2019 Chairman Powell’s Press C o..."
4,20190320,"March 20, 2019 Chair Powell’s Press Confe...","[March 20, 2019 Chair Powell’s Press Conf..."
5,20190501,"May 1, 2019 Chair Powell’s Press Conferenc...","[May 1, 2019 Chair Powell’s Press Conferen..."
6,20190619,"June 19, 2019 Chair Powell’s Press Confere...","[June 19, 2019 Chair Powell’s Press Confer..."
7,20190731,"July 31, 2019 Chair Powell’s Press Conferen...","[July 31, 2019 Chair Powell’s Press Confere..."
8,20190918,"September 18, 2019 Chair Powell’s Press Conf...","[September 18, 2019 Chair Powell’s Press Con..."
9,20191030,"October 30, 2019 Chair Powell’s Press Confer...","[October 30, 2019 Chair Powell’s Press Confe..."


In [14]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import numpy as np
import pandas as pd

# Load the finbert model and tokenizer
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone', num_labels=3)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')



# Function to get sentiment label
def get_sentiment_label(logits):
    return labels[np.argmax(logits)]

# Sentiment labels mapping
labels = {0: 'neutral', 1: 'positive', 2: 'negative'}

# Create a new column for sentiment labels
text_df['SentimentLabels'] = None

# Iterate through rows of the DataFrame
for idx, row in text_df.iterrows():
    sentences = row['Sentences']
    sentiment_labels = []
    
    # Iterate through sentences in the list
    for sentence in sentences:
        # Tokenize the sentence
        inputs = tokenizer(sentence, return_tensors="pt", padding=True)
        
        # Perform sentiment analysis
        outputs = finbert(**inputs)[0]
        
        # Get the sentiment label
        sentiment_label = get_sentiment_label(outputs.detach().numpy()[0])
        sentiment_labels.append(sentiment_label)
    
    # Assign the sentiment labels to the 'SentimentLabels' column for the current row
    text_df.at[idx, 'SentimentLabels'] = sentiment_labels
    print(sentiment_labels)

# Print the updated DataFrame with sentiment labels
text_df


['neutral', 'positive', 'neutral', 'positive', 'positive', 'positive', 'positive', 'neutral', 'positive', 'positive', 'positive', 'positive', 'neutral', 'negative', 'neutral', 'neutral', 'negative', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'positive', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'negative', 'neutral', 'neutral', 'negative', 'neutral', 'negative', 'neutral', 'neutral', 'negative', 'neutral', 'negative', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 

['neutral', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'negative', 'positive', 'neutral', 'positive', 'positive', 'positive', 'negative', 'positive', 'neutral', 'positive', 'negative', 'positive', 'neutral', 'positive', 'positive', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'negative', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', '

['neutral', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'positive', 'positive', 'positive', 'negative', 'negative', 'negative', 'negative', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'positive', 'negative', 'positive', 'positive', 'neutral', 'negative', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral

['neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'negative', 'neutral', 'positive', 'neutral', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'neutral', 'negative', 'negative', 'positive', 'positive', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'positive', 'negative', 'negative', 'negative', 'positive', 'positive', 'neutral', 'negative', 'positive', 'negative', 'positive', 'neutral', 'negative', 'negative', 'negative', 'neutral', 'neutral', 'positive', 'positive', 'negative', 'neutral', 'negative', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 

['neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'positive', 'neutral', 'positive', 'positive', 'negative', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'neutral', 'negative', 'neutral', 'negative', 'negative', 'positive', 'neutral', 'neutral', 'negative', 'positive', 'positive', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'n

['neutral', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'positive', 'neutral', 'neutral', 'negative', 'positive', 'positive', 'positive', 'neutral', 'negative', 'neutral', 'positive', 'neutral', 'negative', 'positive', 'neutral', 'positive', 'negative', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutra

['neutral', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'negative', 'neutra

['neutral', 'positive', 'negative', 'negative', 'negative', 'negative', 'positive', 'neutral', 'positive', 'positive', 'negative', 'negative', 'positive', 'neutral', 'negative', 'negative', 'positive', 'negative', 'neutral', 'neutral', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'neutral', 'positive', 'positive', 'positive', 'negative', 'negative', 'positive', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'negative', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'positive', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral'

['neutral', 'positive', 'positive', 'positive', 'neutral', 'positive', 'neutral', 'negative', 'positive', 'negative', 'positive', 'positive', 'neutral', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'positive', 'negative', 'negative', 'neutral', 'positive', 'negative', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'negative', 'neutral', 'neutral', 'neutral', 'negative', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'p

['neutral', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'neutral', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'positive', 'neutral', 'neutral', 'negative', 'neutral', 'positive', 'negative', 'neutral', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'negative', 'neutral', 'neutral', 'neutral', 'negative', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutr

['neutral', 'positive', 'neutral', 'positive', 'negative', 'negative', 'neutral', 'positive', 'positive', 'negative', 'positive', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'negative', 'negative', 'neutral', 'negative', 'positive', 'positive', 'neutral', 'positive', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'negative', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral',

['neutral', 'positive', 'positive', 'neutral', 'positive', 'positive', 'positive', 'positive', 'negative', 'positive', 'positive', 'negative', 'neutral', 'negative', 'positive', 'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'negative', 'negative', 'negative', 'positive', 'neutral', 'neutral', 'negative', 'negative', 'negative', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'positive', 'negative', 'neutral', 'negative', 'neutral', 'negative', 'neutral', 'neutral', 'neutral

['neutral', 'positive', 'positive', 'neutral', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'negative', 'negative', 'neutral', 'positive', 'positive', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative', 'neutral', 'negative', 'negative', 'positive', 'neutral', 'neutral', 'negative', 'negative', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', '

['neutral', 'positive', 'neutral', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'negative', 'negative', 'negative', 'negative', 'positive', 'positive', 'positive', 'neutral', 'negative', 'positive', 'positive', 'negative', 'neutral', 'neutral', 'negative', 'neutral', 'negative', 'negative', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'negative', 'negative', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'positive', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'negative', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutr

['neutral', 'positive', 'negative', 'neutral', 'negative', 'negative', 'positive', 'neutral', 'positive', 'negative', 'positive', 'negative', 'negative', 'positive', 'neutral', 'positive', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'negative', 'neutral', 'positive', 'neutral', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'neutral', 'negative', 'negative', 'negative', 'neutral', 'negative', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'positive', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'negative', 'negative', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'neu

['neutral', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'neutral', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'neutral', 'negative', 'positive', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'negative', 'neutral', 'negative', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral

['neutral', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'positive', 'positive', 'negative', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'neutral', 'negative', 'negative', 'negative', 'positive', 'neutral', 'positive', 'positive', 'neutral', 'negative', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'positive', 'negative', 'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'negative', 'neutral', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'nega

Unnamed: 0,Date,Text,Sentences,SentimentLabels
0,20180321,"March 21, 2018 Chairman Powell’s Press Confere...","[March 21, 2018 Chairman Powell’s Press Confer...","[neutral, positive, neutral, positive, positiv..."
1,20180613,"June 13, 2018 Chairman Powell’s Press Conferen...","[June 13, 2018 Chairman Powell’s Press Confere...","[neutral, positive, neutral, neutral, neutral,..."
2,20180926,"\n \n \n \n \n \n \n September 26, 2018 Cha...","[ \n \n \n \n \n \n \n September 26, 2018 Ch...","[neutral, neutral, neutral, positive, positive..."
3,20190130,"January 30, 2019 Chairman Powell’s Press C on...","[January 30, 2019 Chairman Powell’s Press C o...","[neutral, neutral, neutral, neutral, positive,..."
4,20190320,"March 20, 2019 Chair Powell’s Press Confe...","[March 20, 2019 Chair Powell’s Press Conf...","[neutral, neutral, neutral, positive, neutral,..."
5,20190501,"May 1, 2019 Chair Powell’s Press Conferenc...","[May 1, 2019 Chair Powell’s Press Conferen...","[neutral, neutral, neutral, positive, neutral,..."
6,20190619,"June 19, 2019 Chair Powell’s Press Confere...","[June 19, 2019 Chair Powell’s Press Confer...","[neutral, neutral, positive, neutral, neutral,..."
7,20190731,"July 31, 2019 Chair Powell’s Press Conferen...","[July 31, 2019 Chair Powell’s Press Confere...","[neutral, neutral, neutral, neutral, positive,..."
8,20190918,"September 18, 2019 Chair Powell’s Press Conf...","[September 18, 2019 Chair Powell’s Press Con...","[neutral, neutral, neutral, positive, neutral,..."
9,20191030,"October 30, 2019 Chair Powell’s Press Confer...","[October 30, 2019 Chair Powell’s Press Confe...","[neutral, neutral, neutral, positive, positive..."


In [15]:
# Assuming you have already populated the 'SentimentLabels' column as shown in the previous code

# Define a function to count sentiment labels for a row
def count_sentiment_labels(row):
    counts = {
        'neutral': 0,
        'positive': 0,
        'negative': 0
    }
    
    # Iterate through the sentiment labels in the row
    for label in row['SentimentLabels']:
        counts[label] += 1
    
    return pd.Series(counts)

# Apply the function to each row to count sentiment labels
sentiment_counts_per_row = text_df.apply(count_sentiment_labels, axis=1)

# Concatenate the counts with the original DataFrame
text_df = pd.concat([text_df, sentiment_counts_per_row], axis=1)

text_df['Date'] = pd.to_datetime(text_df['Date'], format='%Y%m%d').dt.strftime('%Y-%m-%d')
text_df.set_index('Date', inplace=True)

# Remove the column title (index name)
text_df.index.name = None

text_df


Unnamed: 0,Text,Sentences,SentimentLabels,neutral,positive,negative
2018-03-21,"March 21, 2018 Chairman Powell’s Press Confere...","[March 21, 2018 Chairman Powell’s Press Confer...","[neutral, positive, neutral, positive, positiv...",293,46,49
2018-06-13,"June 13, 2018 Chairman Powell’s Press Conferen...","[June 13, 2018 Chairman Powell’s Press Confere...","[neutral, positive, neutral, neutral, neutral,...",366,70,58
2018-09-26,"\n \n \n \n \n \n \n September 26, 2018 Cha...","[ \n \n \n \n \n \n \n September 26, 2018 Ch...","[neutral, neutral, neutral, positive, positive...",417,67,63
2019-01-30,"January 30, 2019 Chairman Powell’s Press C on...","[January 30, 2019 Chairman Powell’s Press C o...","[neutral, neutral, neutral, neutral, positive,...",329,49,56
2019-03-20,"March 20, 2019 Chair Powell’s Press Confe...","[March 20, 2019 Chair Powell’s Press Conf...","[neutral, neutral, neutral, positive, neutral,...",293,53,66
2019-05-01,"May 1, 2019 Chair Powell’s Press Conferenc...","[May 1, 2019 Chair Powell’s Press Conferen...","[neutral, neutral, neutral, positive, neutral,...",266,61,48
2019-06-19,"June 19, 2019 Chair Powell’s Press Confere...","[June 19, 2019 Chair Powell’s Press Confer...","[neutral, neutral, positive, neutral, neutral,...",275,58,61
2019-07-31,"July 31, 2019 Chair Powell’s Press Conferen...","[July 31, 2019 Chair Powell’s Press Confere...","[neutral, neutral, neutral, neutral, positive,...",319,58,77
2019-09-18,"September 18, 2019 Chair Powell’s Press Conf...","[September 18, 2019 Chair Powell’s Press Con...","[neutral, neutral, neutral, positive, neutral,...",339,61,81
2019-10-30,"October 30, 2019 Chair Powell’s Press Confer...","[October 30, 2019 Chair Powell’s Press Confe...","[neutral, neutral, neutral, positive, positive...",333,77,52


### extracting the sentiment

In [16]:
import pandas as pd

# Assuming you have 'positive' and 'negative' columns in your 'filtered_df'
# Create a new DataFrame with the same index
sentiment_df_1 = pd.DataFrame(index=text_df.index)

# Compare 'positive' and 'negative' columns and classify as 'Hawkish', 'Dovish', or 'Neutral'
sentiment_df_1['Sentiment_1'] = 'Neutral'  # Default to 'Neutral'
sentiment_df_1.loc[text_df['positive'] > text_df['negative'], 'Sentiment_1'] = 'Hawkish'
sentiment_df_1.loc[text_df['negative'] > text_df['positive'], 'Sentiment_1'] = 'Dovish'


# Display the new DataFrame
sentiment_df_1


Unnamed: 0,Sentiment_1
2018-03-21,Dovish
2018-06-13,Hawkish
2018-09-26,Hawkish
2019-01-30,Dovish
2019-03-20,Dovish
2019-05-01,Hawkish
2019-06-19,Dovish
2019-07-31,Dovish
2019-09-18,Dovish
2019-10-30,Hawkish


### Sentiment_1 is the press conference and the Sentiment is the press release

In [17]:
import pandas as pd
#sentiment_df.index = sentiment_df.index.strftime('%Y-%m-%d')
# Assuming you have sentiment_df and sentiment_df_1 DataFrames with the date as the index
# Combine the DataFrames by the index (date), filling missing values with NaN
combined_df = sentiment_df.join(sentiment_df_1, how='outer')

# Display the combined DataFrame
combined_df


Unnamed: 0,Sentiment,Sentiment_1
2018-01-31 00:00:00,Hawkish,
2018-03-21 00:00:00,Hawkish,
2018-05-02 00:00:00,Hawkish,
2018-06-13 00:00:00,Hawkish,
2018-08-01 00:00:00,Hawkish,
...,...,...
2022-06-15,,Dovish
2022-07-27,,Dovish
2022-09-21,,Dovish
2022-11-02,,Dovish


### Importing the yields

In [18]:
import pandas as pd

# Replace 'your_file.csv' with the path to your CSV file
two_y = pd.read_csv('DGS2.csv')
ten_y= pd.read_csv('DGS10.csv')

two_y['2Y_t+1'] = two_y['DGS2'].shift(-1)  # Shift 'DGS2' values one day forward
two_y['2Y_t-1'] = two_y['DGS2'].shift(1)
ten_y['10Y_t+1'] = ten_y['DGS10'].shift(-1)  # Shift 'DGS2' values one day forward
ten_y['10Y_t-1'] = ten_y['DGS10'].shift(1)
ten_y

Unnamed: 0,DATE,DGS10,10Y_t+1,10Y_t-1
0,2017-01-03,2.45,2.46,
1,2017-01-04,2.46,2.37,2.45
2,2017-01-05,2.37,2.42,2.46
3,2017-01-06,2.42,2.38,2.37
4,2017-01-09,2.38,2.38,2.42
...,...,...,...,...
1559,2022-12-26,.,3.84,3.75
1560,2022-12-27,3.84,3.88,.
1561,2022-12-28,3.88,3.83,3.84
1562,2022-12-29,3.83,3.88,3.88


In [19]:
two_y

Unnamed: 0,DATE,DGS2,2Y_t+1,2Y_t-1
0,2017-01-03,1.22,1.24,
1,2017-01-04,1.24,1.17,1.22
2,2017-01-05,1.17,1.22,1.24
3,2017-01-06,1.22,1.21,1.17
4,2017-01-09,1.21,1.19,1.22
...,...,...,...,...
1559,2022-12-26,.,4.32,4.31
1560,2022-12-27,4.32,4.31,.
1561,2022-12-28,4.31,4.34,4.32
1562,2022-12-29,4.34,4.41,4.31


### Combining yields to the indicator

In [20]:
import pandas as pd

# Assuming you have DataFrames named combined_df and two_y
# Merge the two DataFrames based on the 'DATE' column
if not isinstance(two_y, pd.DataFrame):
    two_y = pd.DataFrame(two_y)

# Merge the two DataFrames based on the 'DATE' column
merged_df = pd.merge(combined_df, two_y, left_index=True, right_on='DATE', how='inner')

merged_df.set_index('DATE', inplace=True)
# Set the 'Extracted_Date' column as the index and remove the column title
merged_df.index = merged_df.index.rename(None)

# Display the DataFrame with the updated index
merged_df

Unnamed: 0,Sentiment,Sentiment_1,DGS2,2Y_t+1,2Y_t-1
2018-03-21,,Dovish,2.31,2.29,2.34
2018-06-13,,Hawkish,2.59,2.59,2.54
2018-09-26,,Hawkish,2.83,2.83,2.83
2019-01-30,,Dovish,2.52,2.45,2.56
2019-03-20,,Dovish,2.4,2.41,2.46
2019-05-01,,Hawkish,2.31,2.35,2.27
2019-06-19,,Dovish,1.74,1.72,1.86
2019-07-31,,Dovish,1.89,1.73,1.85
2019-09-18,,Dovish,1.77,1.74,1.72
2019-10-30,,Hawkish,1.61,1.52,1.64


In [21]:
import pandas as pd

# Assuming you have DataFrames named combined_df and two_y
# Merge the two DataFrames based on the 'DATE' column
if not isinstance(ten_y, pd.DataFrame):
    ten_y = pd.DataFrame(two_y)

# Merge the two DataFrames based on the 'DATE' column
merged_df = pd.merge(merged_df, ten_y, left_index=True, right_on='DATE', how='inner')

merged_df.set_index('DATE', inplace=True)
# Set the 'Extracted_Date' column as the index and remove the column title
merged_df.index = merged_df.index.rename(None)

merged_df['final_sentiment'] = merged_df['Sentiment_1'].fillna(merged_df['Sentiment'])
# Convert relevant columns to numeric (if they are not already)
numeric_columns = ['DGS2', '2Y_t-1', 'DGS10', '10Y_t-1']
merged_df[numeric_columns] = merged_df[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Calculate the change in yield for '2Y' and '10Y'
merged_df['Change_in_Yield_2Y'] = merged_df['DGS2'] - merged_df['2Y_t-1']
merged_df['Change_in_Yield_10Y'] = merged_df['DGS10'] - merged_df['10Y_t-1']

# Calculate the change in spread
merged_df['Change_in_Spread'] = (merged_df['DGS10'] - merged_df['DGS2']) - (merged_df['10Y_t-1'] - merged_df['2Y_t-1'])

# Display the updated DataFrame
merged_df

Unnamed: 0,Sentiment,Sentiment_1,DGS2,2Y_t+1,2Y_t-1,DGS10,10Y_t+1,10Y_t-1,final_sentiment,Change_in_Yield_2Y,Change_in_Yield_10Y,Change_in_Spread
2018-03-21,,Dovish,2.31,2.29,2.34,2.89,2.83,2.89,Dovish,-0.03,0.0,0.03
2018-06-13,,Hawkish,2.59,2.59,2.54,2.98,2.94,2.96,Hawkish,0.05,0.02,-0.03
2018-09-26,,Hawkish,2.83,2.83,2.83,3.06,3.06,3.1,Hawkish,0.0,-0.04,-0.04
2019-01-30,,Dovish,2.52,2.45,2.56,2.7,2.63,2.72,Dovish,-0.04,-0.02,0.02
2019-03-20,,Dovish,2.4,2.41,2.46,2.54,2.54,2.61,Dovish,-0.06,-0.07,-0.01
2019-05-01,,Hawkish,2.31,2.35,2.27,2.52,2.55,2.51,Hawkish,0.04,0.01,-0.03
2019-06-19,,Dovish,1.74,1.72,1.86,2.03,2.01,2.06,Dovish,-0.12,-0.03,0.09
2019-07-31,,Dovish,1.89,1.73,1.85,2.02,1.9,2.06,Dovish,0.04,-0.04,-0.08
2019-09-18,,Dovish,1.77,1.74,1.72,1.8,1.79,1.81,Dovish,0.05,-0.01,-0.06
2019-10-30,,Hawkish,1.61,1.52,1.64,1.78,1.69,1.84,Hawkish,-0.03,-0.06,-0.03


In [22]:
# Create a new DataFrame 'results' and extract the specified columns from 'merged_df'
results = merged_df[['final_sentiment', 'Change_in_Yield_2Y', 'Change_in_Yield_10Y', 'Change_in_Spread']]

# Display the 'results' DataFrame
results



Unnamed: 0,final_sentiment,Change_in_Yield_2Y,Change_in_Yield_10Y,Change_in_Spread
2018-03-21,Dovish,-0.03,0.0,0.03
2018-06-13,Hawkish,0.05,0.02,-0.03
2018-09-26,Hawkish,0.0,-0.04,-0.04
2019-01-30,Dovish,-0.04,-0.02,0.02
2019-03-20,Dovish,-0.06,-0.07,-0.01
2019-05-01,Hawkish,0.04,0.01,-0.03
2019-06-19,Dovish,-0.12,-0.03,0.09
2019-07-31,Dovish,0.04,-0.04,-0.08
2019-09-18,Dovish,0.05,-0.01,-0.06
2019-10-30,Hawkish,-0.03,-0.06,-0.03


In [23]:
# Convert 'final_sentiment' to a binary indicator variable
results['final_sentiment_indicator_hawkish'] = results['final_sentiment'].apply(lambda x: 1 if x == 'Hawkish' else 0)
results['final_sentiment_indicator_Dovish'] = results['final_sentiment'].apply(lambda x: 1 if x == 'Dovish' else 0)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['final_sentiment_indicator_hawkish'] = results['final_sentiment'].apply(lambda x: 1 if x == 'Hawkish' else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['final_sentiment_indicator_Dovish'] = results['final_sentiment'].apply(lambda x: 1 if x == 'Dovish' else 0)


### Calculating Correlation

In [24]:
# Calculate the correlation between 'final_sentiment_indicator' and 'Change_in_Yield_2Y'
correlation_0 = results['final_sentiment_indicator_hawkish'].corr(results['Change_in_Yield_2Y'])
correlation_1 = results['final_sentiment_indicator_hawkish'].corr(results['Change_in_Spread'])
correlation_2 = results['final_sentiment_indicator_Dovish'].corr(results['Change_in_Yield_2Y'])
correlation_3 = results['final_sentiment_indicator_Dovish'].corr(results['Change_in_Spread'])

# Display the correlation
print(f"Correlation between 'Hawkish' indicator and 'Change_in_Yield_2Y': {correlation_0}")
print(f"Correlation between 'Hawkish' indicator and 'Change_in_Spread': {correlation_1}")
print(f"Correlation between 'Dovish' indicator and 'Change_in_Yield_2Y': {correlation_2}")
print(f"Correlation between 'Dovish' indicator and 'Change_in_Spread': {correlation_3}")


Correlation between 'Hawkish' indicator and 'Change_in_Yield_2Y': 0.1959420324929
Correlation between 'Hawkish' indicator and 'Change_in_Spread': 0.002317013385391279
Correlation between 'Dovish' indicator and 'Change_in_Yield_2Y': -0.19144716192415462
Correlation between 'Dovish' indicator and 'Change_in_Spread': -0.023074218249538665


In [25]:
print('done')

done
