In [1]:
import papermill as pm
import scrapbook as sb
import pandas as pd
import sqlite3
from datetime import datetime, timedelta
import os

%load_ext sql

  from pyarrow import HadoopFileSystem


We are instructed to run over all tokens in the database for the last 2 complete weeks in the database. (Note, I interpret the expression "last 2 complete weeks" as the most recent 14 days observed in the database). Therefore we need some preliminary skimming of the database for such conditions. 

In [None]:
conn = sqlite3.connect('data/data.db')
cur = conn.cursor()

In [7]:
%%sql sqlite:///data/data.db

SELECT 
    token
FROM ohlc 
GROUP BY 1

Done.


token
AAVE
ADA
ATOM
AVAX
BTC
COMP
CRV
DOT
ETH
SOL


In [17]:
%%sql tokens <<

SELECT 
    token
FROM ohlc 
GROUP BY 1

 * sqlite:///data/data.db
Done.
Returning data to local variable tokens


We can see that there are in total such tokens above to consider. We can retrieve each token name directly from the **tokens** object so we leave it be in sql form.

In [20]:
%%sql

SELECT 
    ts
FROM ohlc 
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5

 * sqlite:///data/data.db
Done.


ts
2021-12-15 00:00:00
2021-12-14 23:00:00
2021-12-14 22:00:00
2021-12-14 21:00:00
2021-12-14 20:00:00


The last recorded time is **2021-12-15 00:00:00**. Therefore, we can observe (plot and calculate) from **2021-12-01 00:00:00**. I run the **my_template.ipynb** file with the subsequent cells for each token over the most recent 2 weeks.

In [38]:
run_dates = [datetime(2021, 12, 1)]
connection_string = 'sqlite:///data/data.db'

In [39]:
run_dates

[datetime.datetime(2021, 12, 1, 0, 0)]

In [40]:
base_output_folder = os.path.expanduser('my_outputs')

if not os.path.exists(base_output_folder):
        os.makedirs(base_output_folder)

In [44]:
for run_date in run_dates:
    for token in tokens:
        print(f'running for {token[0]} for date {run_date} to {run_date + timedelta(days=14)}', end='\r')
        res = pm.execute_notebook(
            'my_template.ipynb',
            f'{base_output_folder}/my_analysis_{token[0]}_{run_date}.ipynb',
            parameters = {
                'from_date': f'{run_date}',
                'to_date': f'{run_date + timedelta(days=14)}',
                'token': token[0],
                'connection_string': connection_string
            }
        )

running for AAVE for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  4.94cell/s]


running for ADA for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  5.13cell/s]


running for ATOM for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  5.09cell/s]


running for AVAX for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  5.31cell/s]


running for BTC for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  5.13cell/s]


running for COMP for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  4.81cell/s]


running for CRV for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  5.05cell/s]


running for DOT for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  5.03cell/s]


running for ETH for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  5.01cell/s]


running for SOL for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  5.06cell/s]


running for USDT for date 2021-12-01 00:00:00 to 2021-12-15 00:00:00

Executing: 100%|██████████| 22/22 [00:04<00:00,  4.99cell/s]


To check with BTC as an example of whether the above process gave desired results, I try the following few cells.

In [55]:
nb = sb.read_notebook('my_outputs/my_analysis_BTC_2021-12-01 00:00:00.ipynb')

In [62]:
nb.scraps['prices'].data.head()

Unnamed: 0_level_0,open,high,low,close,volume,volumeUSD,token,chain,return,volatility,max_drawdown
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-12-01 00:00:00,57321.41,57451.05,56814.34,56987.97,388.482022,22184300.0,BTC,BTC,-0.005834,,0.0
2021-12-01 01:00:00,56998.35,57726.45,56705.06,57616.41,599.791578,34371530.0,BTC,BTC,0.010967,,0.0
2021-12-01 02:00:00,57618.55,57620.0,56972.97,57030.83,591.6872,33870670.0,BTC,BTC,-0.010215,,-0.010163
2021-12-01 03:00:00,57029.79,57396.87,56841.01,57307.59,702.560364,40078160.0,BTC,BTC,0.004841,0.144076,-0.010163
2021-12-01 04:00:00,57306.55,57456.82,57026.11,57404.01,859.591535,49205030.0,BTC,BTC,0.001681,0.105531,-0.010163


Now I read all notebook files and summarize the values requested by our homework instructions.

In [50]:
nbs = sb.read_notebooks('my_outputs') # read all notebooks

In [73]:
pd.DataFrame([
    pd.Series({
        'token': scrap['token'].data,
        'mean_return': scrap['prices'].data['return'].mean(),
        'mean_volatility': scrap['prices'].data.volatility.mean(),
        'max_drawdown': scrap['prices'].data.max_drawdown.min(),
        'price_range': scrap['prices'].data.close[-1] - scrap['prices'].data.close[0]}) 
    for book, scrap in nbs.notebook_scraps.items()
]).set_index('token')

Unnamed: 0_level_0,mean_return,mean_volatility,max_drawdown,price_range
token,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAVE,-0.001252962,0.237465,-0.404689,-88.318
ADA,-0.0006161726,0.250735,-0.31368,-0.2849
ATOM,-0.0007509726,0.33888,-0.358851,-5.828
AVAX,-0.001001603,0.318885,-0.394209,-35.14
BTC,-0.0005103292,0.169846,-0.205844,-8698.98
COMP,-0.001238412,0.255816,-0.378144,-93.62
CRV,-0.001077334,0.362593,-0.394862,-1.6295
DOT,-0.001149871,0.276084,-0.348681,-11.882
ETH,-0.0005734818,0.196771,-0.218952,-794.46
SOL,-0.0008333406,0.278316,-0.373707,-49.726
