In [None]:
#Base Libraries
import os 
import sys
import json
import csv

#Core Libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pygwalker as pyg
import datetime as dt
#Model Libraries
from bertopic import BERTopic
from sklearn.metrics.pairwise import cosine_similarity
from umap import UMAP
#import umap.plot
from sentence_transformers import SentenceTransformer

#ML Libraries
import torch 


In [None]:

#Importing Configs
# Define the path where config.py is located
os.chdir('/Users/kylenabors/Documents/GitHub/Finance-ML-Modeling')
config_file_path = os.getcwd()
print(config_file_path)

# Add this path to the sys.path
sys.path.append(config_file_path)

import config

#Configs
database_file = config.database
database_folder = config.database_folder
bert_models = config.bert_models
bert_models_local = config.bert_models_local

In [None]:
Body = 'Fed'
Model = 'Monetary Policy Report'
Model_Subfolder = f'/{Body} Texts/{Model}'
Model_Folder = config.texts
Model_Folder = Model_Folder + Model_Subfolder

df = pd.read_csv(f"{Model_Folder}/{Model}_texts.csv")  
docs = df["segment"].to_list()
timestamps = df['date'].to_list()
type = df['type'].to_list()

#Initial Model
topic_model_policy = torch.load(f"{bert_models_local}/{Body}/{Model}/topic_model_{Model}.pt")

#Edited Model
topic_model_policy= torch.load(f"{bert_models_local}/{Body}/{Model}/topic_model_{Model}_edited.pt")
print("Done Loading Model")

topic_model_policy.set_topic_labels({0: "Inflation", 1: "Bank", 2: "Employment", 3: "Spending", 4: "Uncertainty"})
topic_model_policy.custom_labels_

In [None]:
topics_over_time = topic_model_policy.topics_over_time(docs, timestamps, nr_bins=100)

df_tot = pd.DataFrame(topics_over_time, columns=['Topic', 'Words', 'Frequency', 'Timestamp'])
df_tot['Timestamp'] = pd.to_datetime(df_tot['Timestamp'], format='%Y-%m-%d').dt.to_period('D')
df_tot.to_csv(f"{bert_models_local}/tot.csv", index=True)

In [None]:
a = [0,1,2,3,4]
df_tot.drop(['Words'], axis=1, inplace=True)
df_tot = df_tot[df_tot['Topic'].isin(a)==True]
print(df_tot.head())

df_tpt = df_tot.pivot_table(index='Timestamp', columns='Topic', values='Frequency')

df_tpt.rename(columns={0: "Inflation", 1: "Bank", 2: "Employment", 3: "Spending", 4: "Uncertainty"}, inplace=True)
print(df_tpt.head())

In [None]:
df_tpt.columns
df_tpt.reset_index(inplace=True)
print(df_tpt.head())
#df_tpt.drop(['Topic'], axis = 1 , inplace=True)

In [None]:
funds = pd.read_excel('/Users/kylenabors/Documents/Database/Other Data/FedFundsRate.xlsx')
print(funds.head())

In [None]:
df_tpt_m = df_tpt.copy(deep=True)
df_tpt['Timestamp'] = df_tpt["Timestamp"].dt.to_timestamp(freq='D')
df_tpt_m["Timestamp"] = df_tpt_m["Timestamp"].dt.to_timestamp(freq='M')
df_tpt_m['Timestamp'] = df_tpt_m['Timestamp'].dt.to_period('M')
print(df_tpt_m.head())

In [None]:
energy = pd.read_csv('/Users/kylenabors/Documents/Database/Other Data/Energy Prices/US Energy CPI.csv')
print(energy.head())
print(df_tpt_m.head())
energy["DATE"] = pd.to_datetime(energy["DATE"]).dt.to_period('M')
print(energy.head())
df_tpt_m.merge(energy, left_on='Timestamp', right_on='DATE', how='left')

In [None]:
funds = pd.read_excel('/Users/kylenabors/Documents/Database/Other Data/FedFundsRate.xlsx', sheet_name='Monthly')
funds['Date Adjusted'] = funds['Date Adjusted'].dt.to_period(freq='M')
df_tpt_m = df_tpt_m.merge(funds, left_on='Timestamp', right_on='Date Adjusted', how='left')

In [None]:
df_tpt_m.drop(['DATE', 'Date Adjusted'], axis=1, inplace=True)

In [None]:
change_df_tpt_m = df_tpt_m.copy(deep=True)
change_df_tpt_m['Inflation'] = change_df_tpt_m['Inflation'].diff()
change_df_tpt_m['Bank'] = change_df_tpt_m['Bank'].diff()
change_df_tpt_m['Employment'] = change_df_tpt_m['Employment'].diff()
change_df_tpt_m['Spending'] = change_df_tpt_m['Spending'].diff()
change_df_tpt_m['Uncertainty'] = change_df_tpt_m['Uncertainty'].diff()
change_df_tpt_m['FEDFUNDS_Change'] = change_df_tpt_m['FEDFUNDS'].diff()
print(change_df_tpt_m.head())

In [None]:
topics_per_class = topic_model_policy.topics_per_class(docs, classes = type)

# Save topic-terms barcharts as HTML file
topic_model_policy.visualize_barchart(top_n_topics = 100, n_words=8, custom_labels=True).write_html(f"{bert_models}/barchart.html")

In [None]:
df_tpt_m.to_csv(f"{bert_models_local}/{Body}/{Model}/tpt monthly merged.csv", index=True)
change_df_tpt_m.to_csv(f"{bert_models_local}/{Body}/{Model}/tpt change monthly merged.csv", index=True)

In [None]:
gwalker = pyg.walk(df_tpt_m)

In [None]:
gwalker = pyg.walk(change_df_tpt_m)