In [77]:
#Base Libraries
import os 
import sys
import json
import csv

#Core Libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pygwalker as pyg
import datetime as dt
#Model Libraries
from bertopic import BERTopic
from sklearn.metrics.pairwise import cosine_similarity
from umap import UMAP
#import umap.plot
from sentence_transformers import SentenceTransformer

#ML Libraries
import torch 


In [78]:

#Importing Configs
# Define the path where config.py is located
os.chdir('/Users/kylenabors/Documents/GitHub/Finance-ML-Modeling')
config_file_path = os.getcwd()
print(config_file_path)

# Add this path to the sys.path
sys.path.append(config_file_path)

import config

#Configs
database_file = config.database
database_folder = config.database_folder
bert_models = config.bert_models
bert_models_local = config.bert_models_local

/Users/kylenabors/Documents/GitHub/Finance-ML-Modeling


In [79]:
Body = 'Fed'
Model = 'Beige and Monetary'
Model_Subfolder = f'/{Body} Texts/{Model}'
Model_Folder = config.texts
Model_Folder = Model_Folder + Model_Subfolder


#-------------------------------------------------------------------------------------------------------------------
df = pd.read_csv(f"{Model_Folder}/{Model}_texts.csv")  
docs = df["segment"].to_list()
timestamps = df['date'].to_list()
type = df['type'].to_list()

#Initial Model
#topic_model = torch.load(f"{bert_models_local}/topic_model_{Model}.pt")

#Edited Model
topic_model_policy= torch.load(f"{bert_models_local}/topic_model_{Model}_edited.pt")
print("Done Loading Model")

topic_model_policy.set_topic_labels({0: "Employment", 
                                     1: "Housing", 
                                     2: "Banking", 
                                     3: "Inflation", 
                                     4: "Agriculture", 
                                     5: "Transportation", 
                                     6: "Growth", 
                                     7: "Oil"})
topic_model_policy.custom_labels_


Wed Sep 20 18:37:07 2023 Building and compiling search function
Done Loading Model


['-1_residential_retail_labor_wage',
 'Employment',
 'Housing',
 'Banking',
 'Inflation',
 'Agriculture',
 'Transportation',
 'Growth',
 'Oil',
 '8_tourism_hospitality_seasonal_montana',
 '9_bureau_data_department_statistics',
 '10_raw_rose_goods_selling',
 '11_restaurant_dining_meat_industry',
 '12_staff_closed_fill_cutting',
 '13_steel_utilization_service_capacity',
 '14_beige_book_information_informal',
 '15_dow_indices_stock_merger',
 '16_pandemic_poultry_flu_covid',
 '17_affordable_insurance_medical_hospital',
 '18_nonfinancial_construction_moderate_development',
 '19_tax_stimulus_sequestration_federal',
 '20_confidence_interval_width_pa',
 '21_mall_attendance_broadway_ticket',
 '22_semiconductor_core_memory_chips',
 '23_south_northern_regional_region',
 '24_hurricane_casino_atlantic_storm',
 '25_range_number_distribution_table',
 '26_policy_gap_democratic_transparency',
 '27_pressure_downward_upward_home',
 '28_legal_underwriting_regulatory_discretionary',
 '29_saw_robust_freight

In [80]:
topics_over_time = topic_model_policy.topics_over_time(docs, timestamps, nr_bins=200)

df_tot = pd.DataFrame(topics_over_time, columns=['Topic', 'Words', 'Frequency', 'Timestamp'])
df_tot['Timestamp'] = pd.to_datetime(df_tot['Timestamp'], format='%Y-%m-%d').dt.to_period('D')
df_tot.to_csv(f"{bert_models_local}/tot.csv", index=True)
print(df_tot.head())

139it [03:39,  1.58s/it]

   Topic                                             Words  Frequency  \
0     -1          fi, residential, housing, rate, increase        382   
1      0  compensation, employment, wage, inventory, wages        165   
2      1           san, residential, housing, york, rental        158   
3      2        reserve, federal, monetary, loan, treasury        138   
4      3        fi, spending, quarters, nominal, inflation        115   

    Timestamp  
0  2007-01-10  
1  2007-01-10  
2  2007-01-10  
3  2007-01-10  
4  2007-01-10  





In [81]:

a = [0,1,2,3,4,5,6,7]
df_tot.drop(['Words'], axis=1, inplace=True)
df_tot = df_tot[df_tot['Topic'].isin(a)==True]

df_tpt = df_tot.pivot_table(index='Timestamp', columns='Topic', values='Frequency')

df_tpt.rename(columns={0: "Employment", 
                        1: "Housing", 
                        2: "Banking", 
                        3: "Inflation", 
                        4: "Agriculture", 
                        5: "Transportation", 
                        6: "Growth", 
                        7: "Oil"}, inplace=True)
print(df_tpt.head())


Topic       Employment  Housing  Banking  Inflation  Agriculture  \
Timestamp                                                          
2007-01-10       165.0    158.0    138.0      115.0         37.0   
2007-02-16       135.0    145.0     28.0       20.0         36.0   
2007-04-17       142.0    130.0     34.0       26.0         37.0   
2007-05-17       124.0    122.0     45.0       41.0         39.0   
2007-07-16       184.0    136.0    151.0      128.0         41.0   

Topic       Transportation  Growth   Oil  
Timestamp                                 
2007-01-10            33.0    37.0  62.0  
2007-02-16            31.0    23.0  45.0  
2007-04-17            32.0    37.0  38.0  
2007-05-17            22.0    33.0  40.0  
2007-07-16            33.0    48.0  55.0  


In [82]:
df_tpt.columns
df_tpt.reset_index(inplace=True)

#df_tpt.drop(['Topic'], axis = 1 , inplace=True)

In [83]:
df_tpt_m = df_tpt.copy(deep=True)
df_tpt['Timestamp'] = df_tpt["Timestamp"].dt.to_timestamp(freq='D')
df_tpt_m["Timestamp"] = df_tpt_m["Timestamp"].dt.to_timestamp(freq='M')
df_tpt_m['Timestamp'] = df_tpt_m['Timestamp'].dt.to_period('M')


In [84]:
energy = pd.read_csv('/Users/kylenabors/Documents/Database/Other Data/Energy Prices/US Energy CPI.csv')
energy["DATE"] = pd.to_datetime(energy["DATE"]).dt.to_period('M')
df_tpt_m = df_tpt_m.merge(energy, left_on='Timestamp', right_on='DATE', how='left')

In [85]:
df_tpt_m.drop(['DATE'], axis=1, inplace=True)

In [86]:
funds = pd.read_excel('/Users/kylenabors/Documents/Database/Other Data/FedFundsRate.xlsx', sheet_name='Monthly')
funds['Date Adjusted'] = funds['Date Adjusted'].dt.to_period(freq='M')
df_tpt_m = df_tpt_m.merge(funds, left_on='Timestamp', right_on='Date Adjusted', how='left')


In [87]:
df_tpt_m.drop(['DATE', 'Date Adjusted'], axis=1, inplace=True)

In [88]:
topics_per_class = topic_model_policy.topics_per_class(docs, classes = type)

# Save topic-terms barcharts as HTML file
topic_model_policy.visualize_barchart(top_n_topics = 100, n_words=8, custom_labels=True).write_html(f"{bert_models}/barchart.html")

2it [00:05,  2.96s/it]


In [89]:

df_tpc = pd.DataFrame(topics_per_class, columns=['Topic', 'Words', 'Frequency', 'Class'])
df_tpc.to_csv(f"{bert_models_local}/tpc.csv", index=True)

In [90]:
categories_all = df_tpc['Class'].to_list()
categories = list(set(categories_all))

topics = topic_model_policy.get_topics()


In [91]:

pivot_df_tot = df_tot.pivot(index='Timestamp', columns='Topic', values='Frequency')
pivot_df_tot = pivot_df_tot.fillna(0)
pivot_df_tot.to_csv(f"{bert_models_local}/pivot_df_tot.csv", index=True)

In [92]:
print(df_tpt_m.head(10))
Employment_mean = df_tpt_m['Employment'].mean()
Housing_mean = df_tpt_m['Housing'].mean()
Banking_mean = df_tpt_m['Banking'].mean()
Inflation_mean = df_tpt_m['Inflation'].mean()
Agriculture_mean = df_tpt_m['Agriculture'].mean()
Transportation_mean = df_tpt_m['Transportation'].mean()
Growth_mean = df_tpt_m['Growth'].mean()
Oil_mean = df_tpt_m['Oil'].mean()



  Timestamp  Employment  Housing  Banking  Inflation  Agriculture  \
0   2007-01       165.0    158.0    138.0      115.0         37.0   
1   2007-02       135.0    145.0     28.0       20.0         36.0   
2   2007-04       142.0    130.0     34.0       26.0         37.0   
3   2007-05       124.0    122.0     45.0       41.0         39.0   
4   2007-07       184.0    136.0    151.0      128.0         41.0   
5   2007-08       141.0    134.0     78.0       36.0         35.0   
6   2007-10       122.0    114.0     53.0       34.0         37.0   
7   2007-11        12.0      9.0      9.0        3.0          7.0   
8   2008-01       110.0    125.0     58.0       34.0         25.0   
9   2008-02       161.0    154.0    280.0      172.0         34.0   

   Transportation  Growth   Oil  CPIENGSL  FEDFUNDS  
0            33.0    37.0  62.0   190.281      5.25  
1            31.0    23.0  45.0   192.310      5.26  
2            32.0    37.0  38.0   203.307      5.25  
3            22.0    33.

In [93]:
df_tpt_m['Employment_Mean_Diff'] = df_tpt_m['Employment'] - Employment_mean
df_tpt_m['Housing_Mean_Diff'] = df_tpt_m['Housing'] - Housing_mean
df_tpt_m['Banking_Mean_Diff'] = df_tpt_m['Banking'] - Banking_mean
df_tpt_m['Inflation_Mean_Diff'] = df_tpt_m['Inflation'] - Inflation_mean
df_tpt_m['Agriculture_Mean_Diff'] = df_tpt_m['Agriculture'] - Agriculture_mean
df_tpt_m['Transportation_Mean_Diff'] = df_tpt_m['Transportation'] - Transportation_mean
df_tpt_m['Growth_Mean_Diff'] = df_tpt_m['Growth'] - Growth_mean
df_tpt_m['Oil_Mean_Diff'] = df_tpt_m['Oil'] - Oil_mean


In [94]:

change_df_tpt_m = df_tpt_m.copy(deep=True)
change_df_tpt_m['Employment'] = change_df_tpt_m['Employment'].diff()
change_df_tpt_m['Housing'] = change_df_tpt_m['Housing'].diff()
change_df_tpt_m['Banking'] = change_df_tpt_m['Banking'].diff()
change_df_tpt_m['Inflation'] = change_df_tpt_m['Inflation'].diff()
change_df_tpt_m['Agriculture'] = change_df_tpt_m['Agriculture'].diff()
change_df_tpt_m['Transportation'] = change_df_tpt_m['Transportation'].diff()
change_df_tpt_m['Growth'] = change_df_tpt_m['Growth'].diff()
change_df_tpt_m['Oil'] = change_df_tpt_m['Oil'].diff()
change_df_tpt_m['CPIENGLS'] = change_df_tpt_m['CPIENGSL'].diff()
change_df_tpt_m['FEDFUNDS_Change'] = change_df_tpt_m['FEDFUNDS'].diff()


In [95]:
df_tpt_m['FEDFUNDS_Diff'] = df_tpt_m['FEDFUNDS'].diff()

In [96]:
df_tpt_m.to_csv(f"{bert_models_local}/tpt monthly merged.csv", index=True)
change_df_tpt_m.to_csv(f"{bert_models_local}/tpt change monthly merged.csv", index=True)


In [97]:
gwalker = pyg.walk(df_tot)

Box(children=(HTML(value='<div id="ifr-pyg-6" style="height: auto">\n    <head>\n        <meta http-equiv="Con…

In [98]:
gwalker = pyg.walk(change_df_tpt_m)


Box(children=(HTML(value='<div id="ifr-pyg-7" style="height: auto">\n    <head>\n        <meta http-equiv="Con…