In [1]:
import numpy as np
import pandas as pd
import papermill as pm
import scrapbook as sb
import os

  from pyarrow import HadoopFileSystem


In [2]:
base_output_folder = os.path.expanduser('outputs')

if not os.path.exists(base_output_folder):
        os.makedirs(base_output_folder)

In [3]:
# get all the tokens in the database
db = 'sqlite:///data/data.db'
tokens = pd.read_sql(
'''
SELECT distinct(token)
FROM ohlc
''',db)
tokens = list(tokens['token'])

In [4]:
for token in tokens:
    # choose the last 2 complete weeks in the database in a token by token basis
    # Assume a 'complete week' starts from Monday and ends on Sunday
    days = pd.read_sql(
    f'''
    SELECT ts
    FROM ohlc
    WHERE token = '{token}'
    ORDER BY 1 DESC
    ''',db)
    days['ts'] = pd.to_datetime(days['ts'])
    days.set_index('ts',inplace=True)
    days = days.between_time('00:00:00', '00:00:00').index.to_list()
    i = 0
    while days[i].weekday() !=6:
        i+=1
    end_date = days[i]
    start_date = days[i+14-1]
    end_date = f'{end_date.year}-{end_date.month}-{end_date.day}'
    start_date = f'{start_date.year}-{start_date.month}-{start_date.day}'
    print(f'running for {token} for date {start_date} to {end_date}', end='\r')
    res = pm.execute_notebook(
        'data_analysis_template.ipynb',
        f'{base_output_folder}/market_analysis_{token}_{end_date}.ipynb',
        parameters = {
            'start_date': start_date,
            'end_date': end_date,
            'token': token,
            'db': db,
        }
    )

running for BTC for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

running for ETH for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

running for USDT for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

running for SOL for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

running for ADA for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

running for DOT for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

running for AVAX for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

running for ATOM for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

running for CRV for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

running for AAVE for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

running for COMP for date 2021-11-29 to 2021-12-12

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

In [5]:
nbs = sb.read_notebooks('outputs')

In [7]:
pd.DataFrame(
    [pd.Series({
    'token':scrap['token'].data,
    'start_date':scrap['start_date'].data,
    'end_date':scrap['end_date'].data,
    'avg_hourly_ret':scrap['df'].data['return'].mean(),
    'avg_hourly_vol':scrap['df'].data['vol'].mean(),
    'maximum_drawdown':scrap['df'].data['maximum_drawdown'].iloc[-1],
    'close_range':scrap['df'].data.close[-1]-scrap['df'].data.close[-0],
    })
    for _,scrap in nbs.notebook_scraps.items()]
)

Unnamed: 0,token,start_date,end_date,avg_hourly_ret,avg_hourly_vol,maximum_drawdown,close_range
0,AAVE,2021-11-29,2021-12-12,-0.0008621106,0.219778,0.621282,-60.395
1,ADA,2021-11-29,2021-12-12,-0.0004457646,0.218779,0.687173,-0.237
2,ATOM,2021-11-29,2021-12-12,-0.0002576143,0.309231,0.67043,-3.226
3,AVAX,2021-11-29,2021-12-12,-0.0006759914,0.296748,0.61421,-24.97
4,BTC,2021-11-29,2021-12-12,-0.0004805457,0.160116,0.793538,-8464.0
5,COMP,2021-11-29,2021-12-12,-0.001175144,0.230745,0.622129,-93.14
6,CRV,2021-11-29,2021-12-12,-0.0007421424,0.337309,0.605138,-1.2162
7,DOT,2021-11-29,2021-12-12,-0.0007196422,0.254175,0.6492,-8.035
8,ETH,2021-11-29,2021-12-12,-0.0001703973,0.179537,0.811899,-273.04
9,SOL,2021-11-29,2021-12-12,-0.0004416283,0.251964,0.682793,-30.357
