# Import

In [1]:
%run init.ipynb

# Bitcoin

Idea: use the new price data to extend the current data

## Suply

In [None]:
with open("../../temp/total-bitcoins.json") as f:
    total_btc = json.load(f)["total-bitcoins"]
total_btc[:3]

In [None]:
total_btc = pd.DataFrame(total_btc)
total_btc.head(3)

In [None]:
total_btc = pd.DataFrame({
    "ts":(total_btc["x"]/10**3).map(datetime.fromtimestamp),
    "n_coins":total_btc["y"]
}).set_index("ts").squeeze()
total_btc.head(3)

In [None]:
total_btc = total_btc.resample("ME").last()
total_btc

In [None]:
dump_pickle(total_btc, "total_btc")

## Volume

### The Block

In [None]:
block_data = pd.read_csv(
    "../../temp/Volume_of_Bitcoin_Futures_(BTC_not_USD)_2025_12_18.csv",
    index_col="dt",
    parse_dates=True,
    date_format="%d/%m/%y %H:%M"
)
block_data.head(3)

In [None]:
block_data = block_data.resample("ME")["volume"].sum()
block_data.head(3)

In [None]:
dump_pickle(block_data, "btc_volume_block")

### CMEG

* Downloaded data manually
* Starts on 2013-01

In [None]:
file_exists = pd.Series(
    False, 
    index=pd.date_range(start="2013-01-01", end="2025-08-01", freq="ME").strftime("%Y%m") + ".zip"
)
file_exists

In [None]:
for i in file_exists.index.copy():
    file_exists.loc[i] = Path(f"../../temp/{i}").exists()
file_exists

In [None]:
file_exists[file_exists == False]
# all files are present

In [None]:
expected_files = list(
    "CMEG_" + pd.date_range(start="2013-01-01", end="2025-08-01", freq="ME").strftime("%Y%m") + ".pdf"
)
expected_files[:3]

In [None]:
# ignore dot files
got_files = list(i.name for i in Path("../../temp/").iterdir() if not i.name.startswith("."))
got_files[:3]

In [None]:
assert len(expected_files) == len(got_files), f"The number of actual files does not equal what is expected. Expected: {len(expected_files)}, got {len(got_files)}"

assert len(got_files) == len(set(got_files)), "There are duplicated in fetched files"

for exp, got in zip(sorted(expected_files), sorted(got_files)):
    assert exp == got, f"File names don't match: {exp} vs. {got}"
else:
    print(f"All expected files have been fetched and processed!")

In [None]:
srs = pd.Series()

for file in Path("../../temp/").iterdir():
    if not file.name.startswith("CMEG"):
        continue

    for name in ["BITCOIN FUTURES", "BITCOIN"]:            
        try:
            volume = extract_asset_volume(
                path=file,
                asset=name
            )
            break
        except ValueError:
            volume = -1
    srs.loc[datetime(int(file.name[5:9]), int(file.name[9:11]), 1)] = volume
    print(f"{datetime(int(file.name[5:9]), int(file.name[9:11]), 1)}: {volume}")

srs = srs.sort_index()
srs[srs == -1] = np.nan
srs = srs * 5
srs

In [None]:
dump_pickle(srs, "CMEG_volume")

## Concat

In [None]:
block_volume = load_pickle("btc_volume_block")
block_volume.index.name = ""
block_volume

In [None]:
cmeg_volume = load_pickle("CMEG_volume")
cmeg_volume = cmeg_volume.resample("ME").last()
cmeg_volume

In [None]:
total_volume = (block_volume + cmeg_volume).dropna()
total_volume

In [None]:
total_btc = load_pickle("total_btc")
total_btc

In [None]:
df = pd.concat(
    [total_btc, total_volume],
    axis="columns",
    join="outer"
)
df = df.rename(columns={"n_coins":"supply", 0:"trading_volume"})
df = df.reindex(
    pd.date_range("2010-01-01", "2025-08-01", freq="ME")
)
df = df.dropna(axis="rows", how="any")
df

In [None]:
df = df.reset_index()
df["index"] = "'" + df["index"].dt.strftime("%F") + "'"
df

In [None]:
dry_insert_into_db(
    conn_str=CONNECTION_STRING, 
    table="bitcoin_trading_metadata",
    data=df.to_numpy()
)

In [None]:
insert_into_db(
    conn_str=CONNECTION_STRING, 
    table="bitcoin_trading_metadata",
    data=df.to_numpy()
)

In [None]:
logger.info("Bitcoin supply and demand have been acquired and inserted")

# S&P 500

## Supply

* downloaded data manually from the source

In [None]:
df = pd.read_excel(
    io = "../../temp/snp_500_data.xlsx",
    sheet_name = "Historisch",
    header = 0,
    usecols = ["per", "Umlaufende Anteile"],
    na_values = "--",
    thousands = ",",
    decimal = "."
)
df.head(3)

In [None]:
# split string dates into day, month, year
# some have form "08.Okt.2025", some are missing a full stop after the month "14.März2025"
# So we:
#     match the day into the first group, 
#     then skip the point, 
#     match month, 
#     optional point, 
#     match the year
pattern = re.compile(r"(\d{1,2})\.(Jan|Feb|März|Apr|Mai|Juni|Juli|Aug|Sept|Okt|Nov|Dez)\.?(\d{4})")
month_mapping = {k:v for k, v in zip("Jan|Feb|März|Apr|Mai|Juni|Juli|Aug|Sept|Okt|Nov|Dez".split("|"),range(1,13))}
dates = []

for i in df["per"]:
    match_object = re.match(pattern, i)
    group = list(match_object.groups())
    group[1] = month_mapping[group[1]]
    group = list(map(int, group))
    group = group[::-1]

    dates.append(
        datetime(*group)
    )
dates[:3]

In [None]:
df["per"] = dates
df = df.rename(columns={"per":"dt", "Umlaufende Anteile": "outstanding_supply"})
df = df.set_index("dt")
df = df.squeeze().sort_index()
df

In [None]:
# the supply does show significant downward fluctuations.
# Thus, monthly average will be taken
df.plot(kind="line")

In [None]:
df = (df
    .resample("ME")
    .mean()
    .loc["2010-01-01":"2025-08-01"]
    .copy()
)
df

In [None]:
df.plot(kind="line")

In [None]:
dump_pickle(
    df,
    "snp_aggregated_supply_data"
)

## Volume

In [2]:
TICKER_TYPE = "RIC"

with open("../../temp/www.ishares.com_cookies.json") as f:
    cookies = json.load(f)

driver = webdriver.Chrome()
driver.get("https://www.ishares.com/")

for cookie in cookies: # cookies from a logged-in page must be already defined.
    driver.add_cookie({
        "name":cookie["name"],
        "value":cookie["value"],
        "domain":cookie["domain"]}
        )

driver.get("https://www.ishares.com/uk/individual/en/products/253743/ishares-sp-500-b-ucits-etf-acc-fund")

html_dump = driver.page_source
driver.close()

soup = BeautifulSoup(html_dump, "html.parser")
table = soup.find("table",attrs={"id":"listingsTable"})
df = pd.read_html(
    StringIO(str(table)),
    na_values="-"
)[0]

del driver, html_dump, soup, table

na_exchanges = list(df.loc[df[TICKER_TYPE].isna(), "Exchange"])
yf_tickers = list(df[TICKER_TYPE].dropna())

print(f"{na_exchanges=}\n")
print(f"{yf_tickers=}")

na_exchanges=['Bolsa De Valores De Colombia', 'Santiago Stock Exchange']

yf_tickers=['CSPXN.MX', 'CSSPX.MI', 'SXR8.DE', 'CSP1.AS', 'CSPX.L', 'CSP1.L', 'CSSPX.S', 'iSFF702.TA']


# NASDAQ

# Dow Jones