# Initial data exploration and visualization

In [None]:
from datetime import date, timedelta

import altair as alt
import numpy as np
import pandas as pd
import pandas_market_calendars as mcal
import yfinance as yf

import finstar.evaluation.returns as returns
import finstar.strategies.sma as sma

In [None]:
# determine NYSE market days
end = date.today()
start = date(year=end.year - 10, month=end.month, day=end.day)

nyse = mcal.get_calendar('NYSE')
nyse_days= nyse.schedule(start_date=start, end_date=end).index

In [None]:
days_df = pd.DataFrame(index=nyse_days)
days_df.index.set_names("Date", inplace=True)

In [None]:
ticker_df = pd.read_parquet("../data/raw/20210620-SP500-max/20210620/AAPL.snappy.parquet")
ticker_df

In [None]:
joined = days_df.join(ticker_df.loc[:, "Close"], on="Date", how="left")

In [None]:
joined

In [None]:
joined.isna

In [None]:
hist_df = pd.DataFrame(joined.loc[:, "Close"])
hist_df = hist_df.reset_index()

In [None]:
hist_df.head()

In [None]:
hist_df.tail()

In [None]:
short_sma = 1
long_sma = 200
hist_df["Position"] = sma.sma(hist_df["Close"], short_sma, long_sma)
hist_df.dropna(inplace=True)

hist_df["Returns"] = returns.log_returns(hist_df["Close"])
hist_df["Strategy"] = returns.strategy_returns(hist_df["Position"], hist_df["Returns"])
hist_df["Returns_Sum"] = hist_df["Returns"].cumsum().apply(np.exp)
hist_df["Strategy_Sum"] = hist_df["Strategy"].cumsum().apply(np.exp)

In [None]:
hist_df.loc[:, ["Returns", "Strategy"]].sum().apply(np.exp)

In [None]:
hist_df

In [None]:
plot_df = hist_df.loc[:, ["Date", "Close"]].melt("Date", var_name="Type", value_name="Price")

base = alt.Chart(plot_df).mark_line().encode(
    x="Date:T",
    y="Price:Q",
    color="Type:N"
).interactive()

base

In [None]:
len(hist_df)

In [None]:
plot_df = hist_df.loc[:, ["Date", "Position", "Returns_Sum", "Strategy_Sum"]].melt("Date", var_name="Type", value_name="Returns")

base = alt.Chart(plot_df).mark_line().encode(
    x="Date:T",
    y="Returns:Q",
    color="Type:N"
)

base