In [None]:
import os
import sys
import json
import pandas as pd
from datetime import datetime
from plotly import graph_objects as go
import plotly.express as px

# Project imports
sys.path.append(os.getcwd())
from src.py.analysis.yahoo.stocks.finance_df_utils import load_stock_csv, df_add_data, get_figure, filter_df_by_date, add_vline_annotation, save_fig, get_safe_filename, get_grouped_df


In [None]:
path_index = "data/scraped/yahoo/crypto/index.json"
path_csv_root = "data/scraped/yahoo/crypto/csv"
path_output_root = "data/analysis/yahoo/crypto"

if os.path.exists(path_output_root) is False:
	os.makedirs(path_output_root)

In [None]:
# Load index
index = json.load(open(path_index, "r"))["urls"] # type: list
print(f"Loaded index with {len(index)} crypto currency links")
print("")
print(f"First entry: '{index[0]}'")

# Parse just the crypto symbol from url
index = [x.split("/")[-1].split("?")[0] for x in index]
print(f"First entry (parsed): '{index[0]}'")

In [None]:
# Load all CSVs
dfs_dict = {}
dfs_fails = {}
print(f"Loading {len(index)} crypto CSVs.")
print("")
for i, symbol in enumerate(index):
	print(f"{i+1}/{len(index)} [{symbol}]     ", end="\r")
	try:
		# Load CSV
		df = load_stock_csv(os.path.join(path_csv_root, f"{symbol}.csv"))
		# Check if data is within range (first date < 2018-10-01)
		if df.index[0] > datetime(2018, 10, 1):
			raise Exception("Data starts after 2018-10-01.")
		# Check if data is within range (last date > 2023-12-01)
		if df.index[-1] < datetime(2023, 12, 1):
			raise Exception("Data ends before 2023-12-26.")
		# Plotting interval is 2019-01-01 to 2023-12-26
		# start_date is set to 2018-10-01 to leave some for window
		df = filter_df_by_date(df, start_date="2018-10-01", end_date="2023-12-26")
		dfs_dict[symbol] = df
	except Exception as e:
		if "no such file or directory" in str(e).lower():
			e = "No CSV file."
		dfs_fails[symbol] = str(e)
print("")
print(f"Loaded {len(dfs_dict)} crypto CSVs.")

In [None]:
# Print fails grouped and sorted by error
print(f"Failed to load {len(dfs_fails)} crypto CSVs.")
print("")
dfs_fails_grouped = { str(e): [] for e in set(dfs_fails.values()) }
for ticker, e in dfs_fails.items():
	dfs_fails_grouped[str(e)].append(ticker)

for e, tickers in sorted(dfs_fails_grouped.items(), key=lambda x: len(x[1]), reverse=True):
	print(f"{len(tickers)} cryptos: {e}")
	print(f"{', '.join(tickers[:10])}...")
	print("")


In [None]:
# Print first 10 unique crypto symbols
print(f"First 10 crypto symbols:")
for i, symbol in enumerate(list(dfs_dict.keys())[:10]):
	print(f"{i+1:2d}: {symbol}")

In [None]:
# Add Symbol column to start of each DataFrame
for symbol, df in dfs_dict.items():
	if "Symbol" in df.columns: # ensure idempotence
		continue
	df["Symbol"] = symbol
	df = df[["Symbol"] + df.columns[:-1].tolist()]
	dfs_dict[symbol] = df

dfs_dict["BTC-USD"].head()

In [None]:
# Plot first 5 crypto symbols on the same plot
fig = px.line()
for symbol, df in list(dfs_dict.items())[:5]:
	fig.add_trace(go.Scatter(x=df.index, y=df["Close"], name=symbol))
fig.update_layout(title="First 5 crypto symbols")
fig.show()

In [None]:
# TODO: move to utils

events = []

# COVID-19 market crash
events.append({
	"date": datetime(2020, 2, 20),
	"annotation": "MC",
	"description": "COVID-19 market crash"
})

# COVID-19 market crash end
events.append({
	"date": datetime(2020, 4, 7),
	"annotation": "MC end",
	"description": "COVID-19 market crash end"
})

In [None]:
def get_grouped_fig(dfs_dict: dict, symbols: list, title: str, events: list) -> go.Figure:
	dfs_dict_filtered = { symbol: dfs_dict[symbol] for symbol in symbols }
	df_n = pd.concat(dfs_dict_filtered.values())
	df_n = get_grouped_df(df_n, start_date="2019-01-01")
	fig = get_figure(df_n, title)
	for event in events:
		add_vline_annotation(fig, event)
	return fig
	
# Example with just first entry (BTC-USD)
btc_symbol = list(dfs_dict.keys())[0]
fig = get_grouped_fig(dfs_dict, [btc_symbol], f"Bitcoin ({btc_symbol}) trends", events)
fig.show()

In [None]:
# Get first n symbols
n = 1000
symbols = list(dfs_dict.keys())[:n]
fig = get_grouped_fig(dfs_dict, symbols, f"Market trends for first {len(symbols)} cryptocurrencies", events)
fig.show()

# Save figure
filename = f"crypto-market-trends-first-{len(symbols)}"
path_output = os.path.join(path_output_root, f"{filename}.png")
save_fig(fig, path_output)