In [1]:
import pandas as pd
import papermill as pm
import scrapbook as sb
import os

  from pyarrow import HadoopFileSystem


In [2]:
def last_two_complete_weeks(ts):
    if pd.Timestamp(ts).hour < 23:
        last_complete_day = pd.Timestamp(ts) - pd.Timedelta(days=1)
    else:
        last_complete_day = pd.Timestamp(ts)   
    day_of_week = last_complete_day.dayofweek
    days_delta = day_of_week if day_of_week < 6 else -1
    end_date = last_complete_day - pd.Timedelta(days=days_delta)
    start_date = end_date - pd.Timedelta(days=14)   
    
    return str(start_date.date()), str(end_date.date())

# last_two_complete_weeks('2021-11-14 22:00:00')  ('2021-10-25', '2021-11-08')
# last_two_complete_weeks('2021-11-14 23:00:00')  ('2021-11-01', '2021-11-15')
# last_two_complete_weeks('2021-11-15 22:00:00')  ('2021-11-01', '2021-11-15')
# last_two_complete_weeks('2021-11-15 23:00:00')  ('2021-11-01', '2021-11-15')

In [3]:
tokens = pd.read_sql('''
SELECT 
    DISTINCT token
FROM 
    ohlc
''', 'sqlite:///data/data.db').values.ravel().tolist()  # ['BTC', 'ETH', 'USDT', 'SOL', 'ADA', 'DOT', 'AVAX', 'ATOM', 'CRV', 'AAVE', 'COMP']

In [4]:
ts_max = pd.read_sql('''
SELECT 
    MAX(ts)
FROM 
    ohlc
''', 'sqlite:///data/data.db').values.ravel().tolist()[0]  # '2021-12-15 00:00:00'

start_date, end_date = last_two_complete_weeks(ts_max)  # '2021-11-29', '2021-12-13'

In [5]:
base_output_folder = os.path.expanduser('output')

if not os.path.exists(base_output_folder):
    os.makedirs(base_output_folder)

In [6]:
for token in tokens:
    print(f'running for {token}', end='\r')
    res = pm.execute_notebook(
        'template.ipynb',
        f'{base_output_folder}/market_analysis_{token}_between_{start_date}_and_{end_date}(excluded).ipynb',
        parameters = {
            'start_date': start_date,
            'end_date': end_date,
            'token': token})

running for BTC

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

running for ETH

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

running for USDT

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

running for SOL

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

running for ADA

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

running for DOT

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

running for AVAX

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

running for ATOM

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

running for CRV

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

running for AAVE

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

running for COMP

Executing:   0%|          | 0/11 [00:00<?, ?cell/s]

In [7]:
pd.DataFrame([
    pd.Series({
        'token': scrap['token'].data,
        'avgReturn': scrap['df'].data['return'].mean(),
        'avgVolatility': scrap['df'].data['volatility'].mean(),
        'maximumDrawdown': scrap['df'].data['maximumDrawdown'].min(),
        'rangeOfClose': scrap['df'].data['close'].iloc[-1] - scrap['df'].data['close'].iloc[0]
    }) for book, scrap in sb.read_notebooks('output').notebook_scraps.items()])

Unnamed: 0,token,avgReturn,avgVolatility,maximumDrawdown,rangeOfClose
0,AAVE,-0.0007697463,0.212356,-0.378718,-58.645
1,ADA,-0.0004241618,0.217477,-0.312827,-0.2445
2,ATOM,-0.0001776982,0.299425,-0.32957,-2.756
3,AVAX,-0.0005540979,0.286536,-0.38579,-22.93
4,BTC,-0.0003569235,0.154894,-0.206462,-6999.84
5,COMP,-0.001015116,0.224354,-0.377871,-88.28
6,CRV,-0.0005385247,0.328177,-0.394862,-1.0362
7,DOT,-0.0004540131,0.245633,-0.3508,-6.005
8,ETH,-6.065377e-05,0.174104,-0.188101,-141.71
9,SOL,-0.0003446384,0.242432,-0.317207,-26.687
