In [1]:
#!pip install -r requirements.txt

In [2]:
import papermill as pm
import scrapbook as sb
import pandas as pd
from datetime import datetime, timedelta
import os
import sqlite3

  from pyarrow import HadoopFileSystem


In [3]:
curr_path = os.getcwd()
parent_path = os.path.dirname(os.path.dirname(os.path.dirname(curr_path)))
connection_string = f'{parent_path}/Lectures/Lecture 4/data/data.db'
events_db = 'crypto_events_database'

In [4]:
def last_dow(d: datetime, dow: int) -> datetime:
    '''
    Gets the date of the last day of week, using datetime convention of Monday is 0 and Sunday is 6
    '''
    dow_d = d.weekday()
    days_delta = (7 - dow) + dow_d if dow_d < dow else dow_d - dow
    return d - timedelta(days=days_delta)

In [5]:
conn = sqlite3.connect(connection_string)
end_date = pd.read_sql(f'''
SELECT 
    max(ts)
FROM ohlc 
''', conn)
end_date.iloc[0,0]

'2021-12-15 00:00:00'

In [6]:
run_date = last_dow(pd.to_datetime(end_date.iloc[0,0]),0)
print("start date: {}, end date: {}".format(run_date - timedelta(days=14), run_date))

start date: 2021-11-29 00:00:00, end date: 2021-12-13 00:00:00


In [7]:
# Verify the dates (check if all dates within the start date and end date are avaliable)
dates = pd.read_sql(f'''
SELECT 
    DISTINCT ts
FROM ohlc 
''', conn)
dates['ts'] = pd.to_datetime(dates['ts']).dt.date
dates = dates.drop_duplicates()
dates['dow'] = dates['ts'].apply(lambda x: x.weekday())
dates.tail(17)

Unnamed: 0,ts,dow
672,2021-11-29,0
696,2021-11-30,1
720,2021-12-01,2
744,2021-12-02,3
768,2021-12-03,4
792,2021-12-04,5
816,2021-12-05,6
840,2021-12-06,0
864,2021-12-07,1
888,2021-12-08,2


In [8]:
tokens = pd.read_sql(f'''
SELECT 
     DISTINCT token
FROM ohlc 
''', conn)
tokens = tokens.values.transpose().tolist()[0]
pd.DataFrame(tokens,columns=['token'])

Unnamed: 0,token
0,BTC
1,ETH
2,USDT
3,SOL
4,ADA
5,DOT
6,AVAX
7,ATOM
8,CRV
9,AAVE


In [9]:
base_output_folder = os.path.expanduser('outputs')

if not os.path.exists(base_output_folder):
        os.makedirs(base_output_folder)

In [10]:
for token in tokens:
    print(f'running for {token} for date {run_date - timedelta(days=14)} to {run_date}', end='\r')
    res = pm.execute_notebook(
        'hw_4_jingyi_han_template.ipynb',
        f'{base_output_folder}/hw_4_jingyi_han_{token}_{run_date}.ipynb',
        parameters = {
            'from_date': f'{run_date - timedelta(days=14)}',
            'to_date': f'{run_date}',
            'token': token,
            'connection_string': connection_string,
            'events_db': events_db
        }
    )

running for BTC for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

running for ETH for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

running for USDT for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

running for SOL for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

running for ADA for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

running for DOT for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

running for AVAX for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

running for ATOM for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

running for CRV for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

running for AAVE for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

running for COMP for date 2021-11-29 00:00:00 to 2021-12-13 00:00:00

Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

In [11]:
nbs = sb.read_notebooks('outputs')

In [12]:
pd.DataFrame([
    pd.Series({
        'token': scrap['token'].data,
        'from_date': scrap['from_date'].data,
        'to_date': scrap['to_date'].data,
        'return':scrap['prices'].data['return'].mean(),
        'volatility': scrap['prices'].data['volatility'].mean(),
        'max_drawdown':scrap['prices'].data['max_drawdown'].min(),
        'spread':scrap['prices'].data['close'].iloc[-1] - scrap['prices'].data['close'].iloc[0]
        }) 
    for book, scrap in nbs.notebook_scraps.items()
])

Unnamed: 0,token,from_date,to_date,return,volatility,max_drawdown,spread
0,AAVE,2021-11-29 00:00:00,2021-12-13 00:00:00,-0.0008404275,0.212356,-0.379501,-58.645
1,ADA,2021-11-29 00:00:00,2021-12-13 00:00:00,-0.0004966113,0.217477,-0.313607,-0.2445
2,ATOM,2021-11-29 00:00:00,2021-12-13 00:00:00,-0.0003163733,0.299425,-0.383038,-2.756
3,AVAX,2021-11-29 00:00:00,2021-12-13 00:00:00,-0.0006933415,0.286536,-0.388197,-22.93
4,BTC,2021-11-29 00:00:00,2021-12-13 00:00:00,-0.000388808,0.154894,-0.21369,-6999.84
5,COMP,2021-11-29 00:00:00,2021-12-13 00:00:00,-0.001094735,0.224354,-0.384274,-88.28
6,CRV,2021-11-29 00:00:00,2021-12-13 00:00:00,-0.0007114679,0.328177,-0.40427,-1.0362
7,DOT,2021-11-29 00:00:00,2021-12-13 00:00:00,-0.0005483337,0.245633,-0.35632,-6.005
8,ETH,2021-11-29 00:00:00,2021-12-13 00:00:00,-0.0001000714,0.174104,-0.19012,-141.71
9,SOL,2021-11-29 00:00:00,2021-12-13 00:00:00,-0.0004254425,0.242432,-0.321246,-26.687


In [13]:
#[nbs[nb_name].reglue('summary_plot') for nb_name in nbs]

In [14]:
conn.close()