In [415]:
import os
import sys
import pandas as pd
from sklearn import datasets
import statsmodels.api as sm
from stargazer.stargazer import Stargazer
from IPython.display import display, HTML
from sklearn.datasets import load_iris
from pystout import pystout
import matplotlib.pyplot as plt

In [416]:
# Global Settings
pd.set_option("display.float_format", lambda x: "%.4f" % x)

In [417]:
cwd = os.getcwd()
# Find and import config file
config_path = os.getcwd()

sys.path.append(config_path)
import config

database = config.database
central_banks = config.central_banks
training_data = os.path.join(database, "Training Data")
fed_docs = config.fed_docs
ecb_docs = config.ecb_docs
boe_docs = config.boe_docs

sentiment = pd.date_range(start="1/1/1990", end="1/1/2024", freq="D")
sentiment = pd.DataFrame(sentiment, columns=["date"])

In [418]:
test = pd.read_csv(
    "/Users/kylenabors/Documents/Database/Models/FinBERT Models/taylor.csv"
)

In [419]:
sentiment = pd.date_range(start="1/1/2000", end="1/1/2024", freq="D")
sentiment = pd.DataFrame(sentiment, columns=["date"])

In [420]:
indexes = pd.read_excel(
    "/Users/kylenabors/Documents/Database/Other Data/Indexes/five_indexes.xlsx"
)
indexes["date"] = pd.to_datetime(indexes["date"])
sentiment = pd.merge(sentiment, indexes, how="left", left_on="date", right_on="date")

In [421]:
# HP Filter

url_map = pd.read_csv(os.path.join(cwd, "url_map.csv"))

for i in range(len(url_map)):
    sent = pd.read_csv(url_map["finbert_url"][i])
    sent = sent[["date", "sentiment"]]

    sent["date"] = pd.to_datetime(sent["date"])
    sent = sent.groupby("date").mean().reset_index()

    # Set 'date' as the index for resampling
    sent.set_index("date", inplace=True)

    # Resample to quarterly frequency
    sent = sent.resample("Q").mean()

    # Handle NaN values in 'sentiment'
    # Option 1: Interpolate missing values
    # sent["sentiment"] = sent["sentiment"].interpolate(method="time")

    # Option 2: Forward-fill missing values
    sent["sentiment"] = sent["sentiment"].ffill()

    # Option 3: Drop periods with NaNs
    # sent.dropna(subset=['sentiment'], inplace=True)

    # Reset index to turn 'date' back into a column
    sent.reset_index(inplace=True)

    filter_df = sent.copy(deep=True)

    # Ensure 'sentiment' has no NaNs before applying HP filter
    if filter_df["sentiment"].isnull().any():
        print(f"Warning: NaNs detected in 'sentiment' for {url_map['document'][i]}.")
        continue  # Skip this iteration or handle accordingly

    cycle, trend = sm.tsa.filters.hpfilter(filter_df["sentiment"], 1600)

    filter_df["sentiment_cycle"] = cycle
    filter_df["sentiment_trend"] = trend

    filter_df = filter_df[["date", "sentiment_cycle"]]
    sent = sent.drop(columns=["sentiment"])
    filter_df = filter_df.rename(columns={"sentiment_cycle": "sentiment"})
    sent = pd.merge(sent, filter_df, on="date", how="outer")
    sent = sent.groupby("date").mean().reset_index()

    sent = sent.rename(columns={"sentiment": url_map["document"][i]})
    sentiment = pd.merge(sentiment, sent, how="outer", left_on="date", right_on="date")

In [422]:
fedfunds = pd.read_csv(
    "/Users/kylenabors/Documents/Database/Market Data/Fed Funds/Fed Funds.csv"
)
fedfunds["date"] = pd.to_datetime(fedfunds["date"])
sentiment = pd.merge(sentiment, fedfunds, how="left", left_on="date", right_on="date")

In [423]:
# Resample the final 'sentiment' DataFrame to quarterly frequency
sentiment = sentiment.resample("Q", on="date").mean().reset_index()

In [424]:
# Create lagged columns
for column in sentiment.columns:
    if column != "date":  # Skip the 'date' column
        sentiment[f"{column}_1"] = sentiment[column].shift(1)

for i in range(1, 5):
    sentiment[f"fedfunds_forward_{i}"] = sentiment["fedfunds"].shift(-i)

In [425]:
sentiment = sentiment[
    (sentiment["date"] >= "2000-01-01") & (sentiment["date"] <= "2023-01-30")
]

In [426]:
# Define the independent variables for each regression
x = ["fed_minutes", "fed_minutes_1", "fedfunds_1"]

yvar0 = "fedfunds"
yvar1 = "fedfunds_forward_1"
yvar2 = "fedfunds_forward_2"
yvar3 = "fedfunds_forward_3"
yvar4 = "fedfunds_forward_4"


# Prepare the exogenous variables for each regression
exog0 = sm.add_constant(sentiment[x]).dropna()
exog1 = sm.add_constant(sentiment[x]).dropna()
exog2 = sm.add_constant(sentiment[x]).dropna()
exog3 = sm.add_constant(sentiment[x]).dropna()
exog4 = sm.add_constant(sentiment[x]).dropna()


# Fit the regression models
reg0 = sm.OLS(endog=sentiment[yvar0].loc[exog0.index], exog=exog0).fit(cov_type="HC0")
reg1 = sm.OLS(endog=sentiment[yvar1].loc[exog1.index], exog=exog1).fit(cov_type="HC0")
reg2 = sm.OLS(endog=sentiment[yvar2].loc[exog2.index], exog=exog2).fit(cov_type="HC0")
reg3 = sm.OLS(endog=sentiment[yvar3].loc[exog3.index], exog=exog3).fit(cov_type="HC0")
reg4 = sm.OLS(endog=sentiment[yvar4].loc[exog4.index], exog=exog4).fit(cov_type="HC0")


# Create the Stargazer object
stargazer = Stargazer([reg0, reg1, reg2, reg3, reg4])

# Customize the Stargazer output
stargazer.show_model_numbers(False)
stargazer.significant_digits(3)

# Display the Stargazer output
display(HTML(stargazer.render_html()))
print(stargazer.render_latex())

0,1,2,3,4,5
,,,,,
,,,,,
,,,,,
const,0.029,0.151,0.337***,0.566***,0.802***
,(0.050),(0.092),(0.128),(0.155),(0.174)
fed_minutes,1.875***,3.017***,3.922***,4.283**,4.969**
,(0.680),(1.130),(1.510),(1.751),(1.933)
fed_minutes_1,1.688***,3.209***,4.350***,5.552***,5.888***
,(0.529),(1.038),(1.429),(1.696),(1.848)
fedfunds_1,0.980***,0.897***,0.776***,0.627***,0.477***


\begin{table}[!htbp] \centering
\begin{tabular}{@{\extracolsep{5pt}}lccccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
\hline \\[-1.8ex]
 const & 0.029$^{}$ & 0.151$^{}$ & 0.337$^{***}$ & 0.566$^{***}$ & 0.802$^{***}$ \\
& (0.050) & (0.092) & (0.128) & (0.155) & (0.174) \\
 fed_minutes & 1.875$^{***}$ & 3.017$^{***}$ & 3.922$^{***}$ & 4.283$^{**}$ & 4.969$^{**}$ \\
& (0.680) & (1.130) & (1.510) & (1.751) & (1.933) \\
 fed_minutes_1 & 1.688$^{***}$ & 3.209$^{***}$ & 4.350$^{***}$ & 5.552$^{***}$ & 5.888$^{***}$ \\
& (0.529) & (1.038) & (1.429) & (1.696) & (1.848) \\
 fedfunds_1 & 0.980$^{***}$ & 0.897$^{***}$ & 0.776$^{***}$ & 0.627$^{***}$ & 0.477$^{***}$ \\
& (0.026) & (0.045) & (0.062) & (0.073) & (0.074) \\
\hline \\[-1.8ex]
 Observations & 92 & 92 & 92 & 92 & 92 \\
 $R^2$ & 0.949 & 0.839 & 0.695 & 0.549 & 0.424 \\
 Adjusted $R^2$ & 0.947 & 0.834 & 0.685 & 0.533 & 0.405 \\
 Residual Std. Error & 0.429 (df=88) & 0.752 (df=88) & 1.019 (df=88) & 1.217 (df=88) & 1.351 (df=88) \\
 F Statistic & 

In [427]:
# Define the independent variables for each regression
x0 = [
    "fed_minutes",
    "fed_minutes_1",
    "credit",
    "credit_1",
    "equity_valuation",
    "equity_valuation_1",
    "safe_assets",
    "safe_assets_1",
    "funding",
    "funding_1",
    "volatility",
    "volatility_1",
    "fedfunds_1",
]

x1 = [
    "fed_minutes",
    "fed_minutes_1",
    "credit",
    "credit_1",
    "equity_valuation",
    "equity_valuation_1",
    "safe_assets",
    "safe_assets_1",
    "funding",
    "funding_1",
    "volatility",
    "volatility_1",
    "fedfunds_1",
]

x2 = [
    "fed_minutes",
    "fed_minutes_1",
    "credit",
    "credit_1",
    "equity_valuation",
    "equity_valuation_1",
    "safe_assets",
    "safe_assets_1",
    "funding",
    "funding_1",
    "volatility",
    "volatility_1",
    "fedfunds_1",
]

x3 = [
    "fed_minutes",
    "fed_minutes_1",
    "credit",
    "credit_1",
    "equity_valuation",
    "equity_valuation_1",
    "safe_assets",
    "safe_assets_1",
    "funding",
    "funding_1",
    "volatility",
    "volatility_1",
    "fedfunds_1",
]

x4 = [
    "fed_minutes",
    "fed_minutes_1",
    "credit",
    "credit_1",
    "equity_valuation",
    "equity_valuation_1",
    "safe_assets",
    "safe_assets_1",
    "funding",
    "funding_1",
    "volatility",
    "volatility_1",
    "fedfunds_1",
]


# Define the dependent variables for each regression
yvar0 = "fedfunds"
yvar1 = "fedfunds_forward_1"
yvar2 = "fedfunds_forward_2"
yvar3 = "fedfunds_forward_3"
yvar4 = "fedfunds_forward_4"


# Prepare the exogenous variables for each regression
exog0 = sm.add_constant(sentiment[x0]).dropna()
exog1 = sm.add_constant(sentiment[x1]).dropna()
exog2 = sm.add_constant(sentiment[x2]).dropna()
exog3 = sm.add_constant(sentiment[x3]).dropna()
exog4 = sm.add_constant(sentiment[x4]).dropna()


# Fit the regression models
reg0 = sm.OLS(endog=sentiment[yvar0].loc[exog0.index], exog=exog0).fit(cov_type="HC0")
reg1 = sm.OLS(endog=sentiment[yvar1].loc[exog1.index], exog=exog1).fit(cov_type="HC0")
reg2 = sm.OLS(endog=sentiment[yvar2].loc[exog2.index], exog=exog2).fit(cov_type="HC0")
reg3 = sm.OLS(endog=sentiment[yvar3].loc[exog3.index], exog=exog3).fit(cov_type="HC0")
reg4 = sm.OLS(endog=sentiment[yvar4].loc[exog4.index], exog=exog4).fit(cov_type="HC0")


# Create the Stargazer object
stargazer = Stargazer([reg0, reg1, reg2, reg3, reg4])

stargazer.title("Fed Minute Sentiment Regressed on US 5y5y Extreme Inflation Prob.")
stargazer.custom_columns(
    [
        "Fedfunds(t)",
        "Fedfunds(t+1)",
        "Fedfunds(t+2)",
        "Fedfunds(t+3)",
        "Fedfunds(t+4)",
    ]
)

stargazer.covariate_order(
    [
        "fed_minutes",
        "fed_minutes_1",
        "credit",
        "credit_1",
        "equity_valuation",
        "equity_valuation_1",
        "safe_assets",
        "safe_assets_1",
        "funding",
        "funding_1",
        "volatility",
        "volatility_1",
        "fedfunds_1",
    ]
)

stargazer.rename_covariates(
    {
        "fed_minutes": "Fed Minutes",
        "fed_minutes_1": "Fed Minutes (t-1)",
        "credit": "Credit",
        "credit_1": "Credit (t-1)",
        "equity_valuation": "Equity Valuation",
        "equity_valuation_1": "Equity Valuation (t-1)",
        "safe_assets": "Safe Assets",
        "safe_assets_1": "Safe Assets (t-1)",
        "funding": "Funding",
        "funding_1": "Funding (t-1)",
        "volatility": "Volatility",
        "volatility_1": "Volatility (t-1)",
        "fedfunds_1": "Fed Funds (t-1)",
    }
)

# Customize the Stargazer output
stargazer.show_model_numbers(False)
stargazer.significant_digits(3)

# Display the Stargazer output
display(HTML(stargazer.render_html()))

# Modify the LaTeX output to remove all \\[-1.8ex]
latex_output = stargazer.render_latex().replace("\\[-1.8ex]", "")
latex_output = latex_output.replace("!htbp", "H")
print(latex_output)

0,1,2,3,4,5
,,,,,
,,,,,
,Fedfunds(t),Fedfunds(t+1),Fedfunds(t+2),Fedfunds(t+3),Fedfunds(t+4)
,,,,,
Fed Minutes,-0.532,0.331,0.754,1.325,3.086
,(0.746),(1.240),(1.757),(2.104),(2.290)
Fed Minutes (t-1),1.088,2.286,3.145*,4.511**,4.827**
,(0.711),(1.443),(1.673),(1.782),(1.882)
Credit,-0.177,-0.200,-0.314,-0.508*,-0.614**
,(0.141),(0.193),(0.254),(0.276),(0.288)


\begin{table}[H] \centering
  \caption{Fed Minute Sentiment Regressed on US 5y5y Extreme Inflation Prob.}
\begin{tabular}{@{\extracolsep{5pt}}lccccc}
\\hline
\hline \
\ & \multicolumn{1}{c}{Fedfunds(t)} & \multicolumn{1}{c}{Fedfunds(t+1)} & \multicolumn{1}{c}{Fedfunds(t+2)} & \multicolumn{1}{c}{Fedfunds(t+3)} & \multicolumn{1}{c}{Fedfunds(t+4)}  \\
\hline \
 Fed Minutes & -0.532$^{}$ & 0.331$^{}$ & 0.754$^{}$ & 1.325$^{}$ & 3.086$^{}$ \\
& (0.746) & (1.240) & (1.757) & (2.104) & (2.290) \\
 Fed Minutes (t-1) & 1.088$^{}$ & 2.286$^{}$ & 3.145$^{*}$ & 4.511$^{**}$ & 4.827$^{**}$ \\
& (0.711) & (1.443) & (1.673) & (1.782) & (1.882) \\
 Credit & -0.177$^{}$ & -0.200$^{}$ & -0.314$^{}$ & -0.508$^{*}$ & -0.614$^{**}$ \\
& (0.141) & (0.193) & (0.254) & (0.276) & (0.288) \\
 Credit (t-1) & -0.133$^{}$ & -0.396$^{**}$ & -0.572$^{**}$ & -0.599$^{**}$ & -0.603$^{**}$ \\
& (0.128) & (0.183) & (0.253) & (0.275) & (0.282) \\
 Equity Valuation & -0.079$^{}$ & -0.270$^{}$ & -0.334$^{}$ & -0.284$^{}$ &