In [1]:
import papermill as pm
import scrapbook as sb
import pandas as pd
from datetime import datetime, timedelta
import os

  from pyarrow import HadoopFileSystem


2. Create a runner notebook using papermill to run the template notebook across all tokens in the database and the last 2 complete weeks in the database.  Summarize the following in a single DataFrame in the runner notebook
    - avg hourly return by run
    - avg volatility by run
    - maximum drawdown over the entire period of each run
    - range of last close price - first close price

In [2]:
connection_string = 'sqlite:///../../../Lectures/Lecture 4/data/data.db'

# Get all tokens

In [3]:
all_tokens = pd.read_sql('''
SELECT DISTINCT
    token
FROM
    ohlc
''', connection_string)

all_tokens = all_tokens.values.reshape(-1,).tolist()
all_tokens

['BTC',
 'ETH',
 'USDT',
 'SOL',
 'ADA',
 'DOT',
 'AVAX',
 'ATOM',
 'CRV',
 'AAVE',
 'COMP']

# Get the last two complete weeks

## Get the end of date of database

In [4]:
end_date_db = pd.read_sql('''
SELECT
    MAX(ts)  
FROM
    ohlc
''', connection_string)

# end_date_db = pd.to_datetime(end_date_db).values
end_date_db = datetime.strptime(end_date_db.values.item(), '%Y-%m-%d %H:%M:%S')
end_date_db

datetime.datetime(2021, 12, 15, 0, 0)

## Get the last two complete weeks according to the end of date of database

In [5]:
def last_date_of_week(d: datetime, to_day_of_last_week: int) -> datetime:
    '''
    Get the date of the last day of week,
    using datetime convention of Monday is 0 and Sunday is 6
    '''
    day_of_week_for_d = d.weekday()
    days_delta = (7 - to_day_of_last_week) + day_of_week_for_d if day_of_week_for_d < to_day_of_last_week else day_of_week_for_d - to_day_of_last_week
    return d - timedelta(days=days_delta)

- Calculation Explanation of the above function:  
A. if `day_of_week_for_d` < `to_day_of_last_week`:  
the last date of a week is at the last week, the gap consists of two part:
(1) number of the remaining days at last week, (2) number of days at this week  
B. if `day_of_week_for_d` >= `to_day_of_last_week`:  
last date of week is at the same week of d, so the gap is
`day_of_week_for_d` - `to_day_of_last_week`

In [6]:
end_dates_of_last2weeks = [(last_date_of_week(end_date_db, 0) - timedelta(days=7*i)) for i in range(2)]
# `0` means Monday, i.e. the last timestamp of selected dataframe is 23:00:00 on Sunday
# `2` means two weeks
end_dates_of_last2weeks = end_dates_of_last2weeks[::-1]
end_dates_of_last2weeks

[datetime.datetime(2021, 12, 6, 0, 0), datetime.datetime(2021, 12, 13, 0, 0)]

In [7]:
output_folder = os.path.expanduser('outputs')

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Notebook execution

In [8]:
for end_date_of_week in end_dates_of_last2weeks:
    start_date_of_week = end_date_of_week - timedelta(days=7)
    for token in all_tokens:
        print(f'running for {token} for date {start_date_of_week} to date {end_date_of_week}', end='\r')
        date_label = str(start_date_of_week).split(' ')[0]
        res = pm.execute_notebook('analysis_template.ipynb',
                                  f'{output_folder}/analysis_{token}_{date_label}.ipynb',
                                  parameters = {
                                      'from_date': f'{start_date_of_week}',
                                      'to_date': f'{end_date_of_week}',
                                      'token': token,
                                      'connection_string': connection_string
                                  }
                                 )

running for BTC for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for ETH for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for USDT for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for SOL for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for ADA for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for DOT for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for AVAX for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for ATOM for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for CRV for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for AAVE for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for COMP for date 2021-11-29 00:00:00 to date 2021-12-06 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for BTC for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for ETH for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for USDT for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for SOL for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for ADA for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for DOT for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for AVAX for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for ATOM for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for CRV for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for AAVE for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

running for COMP for date 2021-12-06 00:00:00 to date 2021-12-13 00:00:00

Executing:   0%|          | 0/20 [00:00<?, ?cell/s]

In [9]:
notebooks = sb.read_notebooks('outputs')

In [10]:
info_by_run = pd.DataFrame([
    pd.Series({
        'token': scrap['token'].data,
        'from_date': scrap['from_date'].data,
        'to_date': scrap['to_date'].data,
        'avg_return': scrap['prices'].data['return'].mean(),
        'avg_volatility': scrap['prices'].data['volatility'].mean(),
        'max_drawdown': scrap['prices'].data['mdd'].min(),
        'range_of_close': scrap['prices'].data['close'].iloc[-1] - scrap['prices'].data['close'].iloc[0]
    })
    for book, scrap in notebooks.notebook_scraps.items()
])

info_by_run

Unnamed: 0,token,from_date,to_date,avg_return,avg_volatility,max_drawdown,range_of_close
0,AAVE,2021-11-29 00:00:00,2021-12-06 00:00:00,-0.001305232,0.221054,-0.360386,-46.808
1,AAVE,2021-12-06 00:00:00,2021-12-13 00:00:00,-0.0003755434,0.2004,-0.198313,-11.673
2,ADA,2021-11-29 00:00:00,2021-12-06 00:00:00,-0.0009344545,0.226749,-0.287682,-0.2305
3,ADA,2021-12-06 00:00:00,2021-12-13 00:00:00,-0.000130232,0.203766,-0.196004,-0.0297
4,ATOM,2021-11-29 00:00:00,2021-12-06 00:00:00,-0.0006543502,0.340796,-0.308052,-2.837
5,ATOM,2021-12-06 00:00:00,2021-12-13 00:00:00,-9.716086e-07,0.254383,-0.141268,-0.004
6,AVAX,2021-11-29 00:00:00,2021-12-06 00:00:00,-0.001575971,0.320661,-0.407316,-25.6
7,AVAX,2021-12-06 00:00:00,2021-12-13 00:00:00,8.111556e-05,0.249173,-0.181074,1.18
8,BTC,2021-11-29 00:00:00,2021-12-06 00:00:00,-0.0008976678,0.175543,-0.231254,-7979.41
9,BTC,2021-12-06 00:00:00,2021-12-13 00:00:00,9.997437e-05,0.134449,-0.09362,833.11
