In [None]:
import os
import json
import pandas as pd
from plotly import graph_objs as go

In [None]:
path_root = "data/scraped/yahoo/currencies"
# path_index = "data/scraped/yahoo/currencies/index.json"
path_index = os.path.join(path_root, "index.json")
# path_csv_root = "data/scraped/yahoo/currencies/csv"
path_csv_root = os.path.join(path_root, "csv")

In [None]:
index = json.load(open(path_index, "r"))
print(f"Currency names: {len(index)}")
for key, currency in index.items(): # key is the currency code and is the same as name
		print(f"- {currency['name']}")

In [None]:
# desired_currencies = ["USD-EUR", "USD-GBP", "USD-JPY"]
base_currency = "USD"
desired_currencies = ["EUR-USD", "GBP-USD", "USD-JPY"]
# Print if you find the desired currencies in the index
for currency in desired_currencies:
	if currency in index:
		print(f"OK - Found {currency} in index")
	else:
		print(f"ERR - Did not find {currency} in index")

In [None]:
def get_df(currency: str) -> pd.DataFrame:
	df = pd.read_csv(os.path.join(path_csv_root, f"{currency}.csv"))
	df["Date"] = pd.to_datetime(df["Date"])
	df.set_index("Date", inplace=True)
	df = df[["Close"]]
	return df

eur_usd = get_df("EUR-USD")
eur_usd.head()

In [None]:
def invert_currency_pair(df: pd.DataFrame) -> pd.DataFrame:
	df_copy = df.copy()
	df_copy["Close"] = 1 / df_copy["Close"]
	return df_copy

def invert_currency_pair_name(currency_pair: str) -> str:
	parts = currency_pair.split("-")
	return f"{parts[1]}-{parts[0]}"

def get_plot_pairs(dfs: dict, base_currency: str) -> go.Figure:
	fig = go.Figure()
	title = f"Currency pairs with {base_currency}"
	for currency, df in dfs.items():
		name = currency
		if currency.startswith(base_currency):
			df = invert_currency_pair(df)
			name = invert_currency_pair_name(currency)
		fig.add_trace(go.Scatter(x=df.index, y=df["Close"], name=name))
	fig.update_layout(title_text=title)
	return fig

fig = get_plot_pairs({currency: get_df(currency) for currency in desired_currencies}, base_currency)
fig.show()

In [None]:
# Explanation:
# EUR-USD # if currency.startswith(base_currency):
# -> for 1 EUR you got 1.43 USD in 2010
#
# USD-EUR # if not currency.startswith(base_currency):
# -> for 1 USD you got 0.69 EUR in 2010

# When adjusting local prices (DE (EUR), UK (GBP), JP (JPY)) for the base currency (USD), we need to have it (USD) on the right side of the pair
# then the formula for calculating series adjusted prices is:
# adjusted_price = local_price * x_rate

In [None]:
def get_plot_pairs_index(dfs: dict, base_currency: str, index: str) -> go.Figure:
	fig = go.Figure()
	title = f"Currency pairs with {base_currency} - normalized to '{index}' = 100"
	for currency, df in dfs.items():
		name = currency
		if currency.startswith(base_currency):
			df = invert_currency_pair(df)
			name = invert_currency_pair_name(currency)
		# set index as 100
		value_at = df.loc[index, "Close"]
		df["Close"] = df["Close"] / value_at * 100 # type: ignore
		# resample to monthly
		df = df.resample("M").mean()
		fig.add_trace(go.Scatter(x=df.index, y=df["Close"], name=name))
	fig.update_layout(title_text=title)
	return fig

fig = get_plot_pairs_index({currency: get_df(currency) for currency in desired_currencies}, base_currency, "2010-01-01")
fig.show()

In [None]:
# DONE:
# - resample to monthly and export selected for easier processing of other data

In [None]:
def get_df_joint(curriencies: list, base_currency: str, resample: str) -> pd.DataFrame:
	dfs = {currency: get_df(currency) for currency in curriencies}
	# invert if necessary
	for currency in list(dfs.keys()):
		df_currency = dfs[currency]
		if currency.startswith(base_currency):
			del dfs[currency]
			df_currency = invert_currency_pair(df_currency)
			currency = invert_currency_pair_name(currency)
		dfs[currency] = df_currency
	df = pd.concat(dfs, axis=1)
	df.columns = df.columns.droplevel(1)
	df.fillna(method="ffill", inplace=True)
	# resample to monthly
	if resample in ["D", "W", "M", "Q", "A", "Y"]:
		df = df.resample(resample).mean()
		# if M or Q, drop days component of the index
		if resample in ["M", "Q"]:
			df.index = df.index.to_period("M") # type: ignore
		if resample in ["A", "Y"]:
			df.index = df.index.to_period(resample) # type: ignore
		if resample not in ["W", "D"]:
			df.index = df.index.to_timestamp() # type: ignore
	df.fillna(method="ffill", inplace=True) # refill if resampled missing
	return df

df = get_df_joint(desired_currencies, base_currency, "D")
df.head()

In [None]:
# Save to CSV
df = get_df_joint(desired_currencies, base_currency, "D")
path_csv = os.path.join(path_root, "currency_pairs_merged_selection.csv")
df.to_csv(path_csv)

In [None]:
# # print number of missing values
# df.isna().sum()

# # print all rows with missing values
# print(df[df.isna().any(axis=1)])

# # fill missing values with the last available value
# df.fillna(method="ffill", inplace=True)

# # print number of missing values
# df.isna().sum()

In [None]:
# Plot the joint dataframe - compare M to Y

df = get_df_joint(desired_currencies, base_currency, "M")
fig = go.Figure()
title = f"Currency pairs with {base_currency}"
for currency in df.columns:
	fig.add_trace(go.Scatter(x=df.index, y=df[currency], name=f"M-{currency}"))
fig.update_layout(title_text=title)
# fig.show()

df = get_df_joint(desired_currencies, base_currency, "Y")
# fig = go.Figure()
# title = f"Currency pairs with {base_currency}"
for currency in df.columns:
	fig.add_trace(go.Scatter(x=df.index, y=df[currency], name=f"Y-{currency}"))
# fig.update_layout(title_text=title)
fig.show()

