In [117]:
#Base Libraries
import os 
import sys
import json
import csv

#Core Libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pygwalker as pyg
import datetime as dt
#Model Libraries
from bertopic import BERTopic
from sklearn.metrics.pairwise import cosine_similarity
from umap import UMAP
#import umap.plot
from sentence_transformers import SentenceTransformer

#ML Libraries
import torch 


In [118]:

#Importing Configs
# Define the path where config.py is located
os.chdir('/Users/kylenabors/Documents/GitHub/Finance-ML-Modeling')
config_file_path = os.getcwd()
print(config_file_path)

# Add this path to the sys.path
sys.path.append(config_file_path)

import config

#Configs
database_file = config.database
database_folder = config.database_folder
bert_models = config.bert_models
bert_models_local = config.bert_models_local

/Users/kylenabors/Documents/GitHub/Finance-ML-Modeling


In [119]:
freq_ts = pd.read_csv('/Users/kylenabors/Documents/GitHub/Finance-ML-Modeling/Models/Word2Vec Models/keyword_info_ts.csv')
print(freq_ts.head())

  Year-Month  credit  energy  inflation  interest  market  trade  \
0    2007-02      19      39          2        26      42     16   
1    2007-07      26      32          3        19      37      8   
2    2008-02      68      30          2        27      71      8   
3    2008-07      69      32          7        31      92      9   
4    2009-02      96      28          3        35     132      7   

   unemployment  CPIENGSL        DATE  FEDFUNDS      CPI  
0            11   192.310  2007-01-31      5.26  203.499  
1            11   209.639  2007-06-30      5.26  208.299  
2            26   229.731  2008-01-31      2.98  211.693  
3            22   271.149  2008-06-30      2.01  219.964  
4            32   183.721  2009-01-31      0.22  212.193  


In [120]:
freq_ts_m = freq_ts.copy(deep=True)
freq_ts_m["Year-Month"] = pd.to_datetime(freq_ts_m["Year-Month-Day"])
freq_ts_m['Year-Month'] = freq_ts_m['Year-Month'].dt.to_period('M')
print(freq_ts_m.head())

KeyError: 'Year-Month-Day'

In [None]:
freq_ts_m = freq_ts_m.pivot_table(index='Year-Month', columns='Keyword', values='Frequency')
print(freq_ts_m.head())

Keyword     credit  energy  inflation  interest  market  trade  unemployment
Year-Month                                                                  
2007-02         19      39          2        26      42     16            11
2007-07         26      32          3        19      37      8            11
2008-02         68      30          2        27      71      8            26
2008-07         69      32          7        31      92      9            22
2009-02         96      28          3        35     132      7            32


In [None]:
freq_ts_m.columns
freq_ts_m.reset_index(inplace=True)
print(freq_ts_m.head())

Keyword Year-Month  credit  energy  inflation  interest  market  trade  \
0          2007-02      19      39          2        26      42     16   
1          2007-07      26      32          3        19      37      8   
2          2008-02      68      30          2        27      71      8   
3          2008-07      69      32          7        31      92      9   
4          2009-02      96      28          3        35     132      7   

Keyword  unemployment  
0                  11  
1                  11  
2                  26  
3                  22  
4                  32  


In [None]:
energy = pd.read_csv('/Users/kylenabors/Documents/Database/Other Data/Energy Prices/US Energy CPI.csv')
energy["DATE"] = pd.to_datetime(energy["DATE"]).dt.to_period('M')
freq_ts_m = freq_ts_m.merge(energy, left_on='Year-Month', right_on='DATE', how='left')
print(freq_ts_m.head())
freq_ts_m.drop(columns=['DATE'], inplace=True)

  Year-Month  credit  energy  inflation  interest  market  trade  \
0    2007-02      19      39          2        26      42     16   
1    2007-07      26      32          3        19      37      8   
2    2008-02      68      30          2        27      71      8   
3    2008-07      69      32          7        31      92      9   
4    2009-02      96      28          3        35     132      7   

   unemployment     DATE  CPIENGSL  
0            11  2007-02   192.310  
1            11  2007-07   209.639  
2            26  2008-02   229.731  
3            22  2008-07   271.149  
4            32  2009-02   183.721  


In [None]:
funds = pd.read_excel('/Users/kylenabors/Documents/Database/Other Data/FedFundsRate.xlsx', sheet_name='Monthly')
funds['Date Adjusted'] = funds['Date Adjusted'].dt.to_period(freq='M')
freq_ts_m = freq_ts_m.merge(funds, left_on='Year-Month', right_on='Date Adjusted', how='left')

In [None]:
cpi = pd.read_excel('/Users/kylenabors/Documents/Database/Other Data/Urban Consumer CPI.xlsx', sheet_name='Monthly')
print(cpi.head())
cpi["Year-Month"] = pd.to_datetime(cpi["Year-Month"]).dt.to_period('M')
freq_ts_m = freq_ts_m.merge(cpi, left_on='Year-Month', right_on='Year-Month', how='left')
freq_ts_m.drop(columns=['Date Adjusted'], inplace=True)
print(freq_ts_m.head())

  Year-Month  Value
0    2000-01  168.8
1    2000-02  169.8
2    2000-03  171.2
3    2000-04  171.3
4    2000-05  171.5
  Year-Month  credit  energy  inflation  interest  market  trade  \
0    2007-02      19      39          2        26      42     16   
1    2007-07      26      32          3        19      37      8   
2    2008-02      68      30          2        27      71      8   
3    2008-07      69      32          7        31      92      9   
4    2009-02      96      28          3        35     132      7   

   unemployment  CPIENGSL       DATE  FEDFUNDS    Value  
0            11   192.310 2007-01-31      5.26  203.499  
1            11   209.639 2007-06-30      5.26  208.299  
2            26   229.731 2008-01-31      2.98  211.693  
3            22   271.149 2008-06-30      2.01  219.964  
4            32   183.721 2009-01-31      0.22  212.193  


In [None]:
freq_ts_m.rename(columns={'Value_x': 'Energy CPI', 'Value_y': 'Fed Funds Rate', 'Value': 'CPI'}, inplace=True)
freq_ts_m.drop(columns=['DATE'], inplace=True)
print(freq_ts_m.head())

  Year-Month  credit  energy  inflation  interest  market  trade  \
0    2007-02      19      39          2        26      42     16   
1    2007-07      26      32          3        19      37      8   
2    2008-02      68      30          2        27      71      8   
3    2008-07      69      32          7        31      92      9   
4    2009-02      96      28          3        35     132      7   

   unemployment  CPIENGSL       DATE  FEDFUNDS      CPI  
0            11   192.310 2007-01-31      5.26  203.499  
1            11   209.639 2007-06-30      5.26  208.299  
2            26   229.731 2008-01-31      2.98  211.693  
3            22   271.149 2008-06-30      2.01  219.964  
4            32   183.721 2009-01-31      0.22  212.193  


In [None]:
freq_ts_m.to_csv('/Users/kylenabors/Documents/GitHub/Finance-ML-Modeling/Models/Word2Vec Models/keyword_info_ts.csv', index=False)

In [None]:
gwalker = pyg.walk(freq_ts_m)

Box(children=(HTML(value='<div id="ifr-pyg-7" style="height: auto">\n    <head>\n        <meta http-equiv="Con…