In [None]:
# Download all sp500 data for testing
import utils
import os
print("Downloading data...")
if not os.path.exists("test_data/sp500_possible_replacements.csv"):
    utils.get_sp500_possible_replacements("test_data/")
utils.load_sp500_data("test_data/", start="2024-01-01")

In [None]:
# Get the range of dates for each company in the S&P 500
import utils
import pandas as pd
sp500df = utils.get_repo_data("sp500_companies.csv")
companies = set()
companies_dict = {}
for index, row in sp500df.iterrows():
    for ticker in row['tickers'].split(","):
        if ticker in companies:
            continue
        companies.add(ticker)
        # get last occurrence of the ticker (if it exists in the splitted list)
        last_occurrence = sp500df.index[sp500df['tickers'].apply(lambda x: ","+ticker+"," in x or x.endswith(","+ticker) or x.startswith(ticker+","))].max()
        # get the last date of that ticker (the entry after the last occurrence)
        if last_occurrence == sp500df.index[-1]:
            last_date = None
        else:
            last_date = sp500df.index[sp500df.index.get_loc(last_occurrence) + 1] - pd.Timedelta(days=1)
        companies_dict[ticker] = (index, last_date)

# print the dictionary with one entry per line
for key, value in sorted(companies_dict.items()):
    print(key, "\t", value[0], "\t", value[1])


In [None]:
# Get the margin stats from the downloaded data
import pandas as pd
import numpy as np
df = pd.read_csv("repoData/margin_stats.csv", index_col=0)
# index to datetime
df.index = pd.to_datetime(df.index)
#rename column
df.replace(np.nan, 0, inplace=True)
df = df.rename(columns={"Debit Balances in Customers' Securities Margin Accounts":"Debit"})
df = df.rename(columns={"Free Credit Balances in Customers' Cash Accounts":"Credit Cash"})
df = df.rename(columns={"Free Credit Balances in Customers' Securities Margin Accounts":"Credit Securities"})
try:
    df["Credit"] = df["Credit Cash"] + df["Credit Securities"]
    df.drop(["Credit Cash", "Credit Securities"], axis=1, inplace=True)
except:
    pass
# calculate Leverage Ratio
df["Leverage Ratio"] = df["Debit"] / df["Credit"]
df["Leverage Ratio"] -= 1.5
df["Leverage Ratio"] = (np.tanh(df["Leverage Ratio"]*2) + 1) *50
# plot the leverage ratio
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
plt.plot(df.index, df["Leverage Ratio"])
plt.title("Leverage Ratio")
plt.xlabel("Date")
plt.ylabel("Leverage Ratio")
plt.show()
# save the leverage ratio
#df.to_csv("repoData/margin_stats.csv")

In [None]:
# load aaii data from the excel file to the correct format
import pandas as pd
import numpy as np
def fetch_investor_sentiment_data():
    # Fetch sentiment data from AAII
    # 1. Check for file in data directory
    # TODO validate the dates of the data
    # TODO scrape the recent data from the website (https://www.aaii.com/sentimentsurvey/sent_results)
    date_parser = lambda x: pd.to_datetime(x, format="%m-%d-%Y", errors='coerce')

    aaii_sentiment = pd.read_excel("data/downloads/" + "sentiment.xls", index_col=0, parse_dates=True, date_parser=date_parser)
    # only keep the 6th column
    aaii_sentiment = aaii_sentiment.iloc[:, 5]
    # remove all rows after the first NaN in index column
    aaii_sentiment = aaii_sentiment.iloc[4:pd.Series(aaii_sentiment.index.isna()[4:]).idxmax() + 4]
    aaii_sentiment.ffill(inplace=True)
    aaii_sentiment.replace(np.nan, 0, inplace=True)
    # normalize with tanh
    aaii_sentiment = ((np.tanh(aaii_sentiment * 3) + 1) / 2) * 100
    # save the data
    aaii_sentiment.to_csv("aaii_sentiment.csv")

fetch_investor_sentiment_data()

# Test Metrics

In [None]:
import os
from metrics import *
import matplotlib
matplotlib.use('tkagg')
import plotting
data_dir = "test_data/"
if not os.path.exists(data_dir):
    os.mkdir(data_dir)
start_date = '2000-01-01'
end_date = '2024-12-08'

In [None]:
def showMetricsCalculation(metric : Metric, y_axis = ["left", "left", "right"]):
    if True:
        sp500 = SP500Momentum()
        sp500.get()
    # plot all stages (data, processed, result)
    graph_list = [
        #plotting.Graph(metric.data, y_axis=y_axis[0], df_color="black", df_label="data"), 
        plotting.Graph(metric.processed, y_axis=y_axis[1], df_color="blue", df_label="processed"), 
        plotting.Graph(metric.result, y_axis=y_axis[2], df_color="red", df_label="result"),
        #plotting.Graph(metric.test, y_axis=y_axis[2], df_color="blue", df_label="result"),
        #plotting.Graph(sp500.data, y_axis=y_axis[2], df_color="green", df_label="train"),
        #plotting.Graph(metric.test, y_axis=y_axis[2], df_color="orange", df_label="test")
    ]
    plotting.plot_graph(graph_list, neutral_line=None, title=metric.name)

In [None]:
Metric.setPreferences(data_dir, start_date, end_date)
metrics = []
metrics.append(T10YearYield())
metrics.append(JunkBondSpread())
metrics.append(SaveHavenDemand())
metrics.append(ConsumerSentiment())
metrics.append(SP500Momentum())
metrics.append(PutCallRatio())
metrics.append(InsiderTransactions())
metrics.append(AAIISentiment())
metrics.append(MarginStats())
metrics.append(VIX())
for metric in metrics:
    metric.get()
    showMetricsCalculation(metric)
    break