In [1]:
import papermill as pm
import scrapbook as sb
import pandas as pd
from datetime import datetime, timedelta
import os

  from pyarrow import HadoopFileSystem


Get the list of all tokens in the dataset

In [2]:
connection_string = 'sqlite:///../../../Lectures/Lecture 4/data/data.db'
tokens = pd.read_sql('SELECT DISTINCT token FROM ohlc', connection_string).token.to_list()
tokens

['BTC',
 'ETH',
 'USDT',
 'SOL',
 'ADA',
 'DOT',
 'AVAX',
 'ATOM',
 'CRV',
 'AAVE',
 'COMP']

Get the list of all dates in the dataset

In [3]:
timestamps = pd.read_sql('SELECT DISTINCT ts FROM ohlc', connection_string)
timestamps['ts'] = pd.to_datetime(timestamps['ts'])
timestamps.ts.dt.date.sort_values().value_counts().sort_index()

2021-11-01    24
2021-11-02    24
2021-11-03    24
2021-11-04    24
2021-11-05    24
2021-11-06    24
2021-11-07    24
2021-11-08    24
2021-11-09    24
2021-11-10    24
2021-11-11    24
2021-11-12    24
2021-11-13    24
2021-11-14    24
2021-11-15    24
2021-11-16    24
2021-11-17    24
2021-11-18    24
2021-11-19    24
2021-11-20    24
2021-11-21    24
2021-11-22    24
2021-11-23    24
2021-11-24    24
2021-11-25    24
2021-11-26    24
2021-11-27    24
2021-11-28    24
2021-11-29    24
2021-11-30    24
2021-12-01    24
2021-12-02    24
2021-12-03    24
2021-12-04    24
2021-12-05    24
2021-12-06    24
2021-12-07    24
2021-12-08    24
2021-12-09    24
2021-12-10    24
2021-12-11    24
2021-12-12    24
2021-12-13    24
2021-12-14    24
2021-12-15     1
Name: ts, dtype: int64

Extracting out the mondays

In [4]:
timestamps = timestamps.ts.dt.date.unique()
mondays = [t - timedelta(days=7) for t in timestamps if t.weekday() == 0]
mondays

[datetime.date(2021, 10, 25),
 datetime.date(2021, 11, 1),
 datetime.date(2021, 11, 8),
 datetime.date(2021, 11, 15),
 datetime.date(2021, 11, 22),
 datetime.date(2021, 11, 29),
 datetime.date(2021, 12, 6)]

Running the notebook for last 2 weeks and all the tokens and saving it in outputs folder

In [5]:
base_output_folder = os.path.expanduser('outputs')
if not os.path.exists(base_output_folder):
        os.makedirs(base_output_folder)
    
for token in tokens:
    for run_date in mondays[-2:]:
        end_date = run_date + timedelta(days=7)
        print(f'running for {token} for date {run_date} to {end_date}', end='\r')
        res = pm.execute_notebook(
            'hw_4_somanshu_dhingra_template.ipynb',
            f'{base_output_folder}/market_analysis_{token}_{run_date}.ipynb',
            parameters = {
                'from_date': f'{run_date}',
                'to_date': f'{end_date}',
                'token': token,
                'connection_string': connection_string,
            }
        )

running for BTC for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for BTC for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for ETH for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for ETH for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for USDT for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for USDT for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for SOL for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for SOL for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for ADA for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for ADA for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for DOT for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for DOT for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for AVAX for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for AVAX for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for ATOM for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for ATOM for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for CRV for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for CRV for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for AAVE for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for AAVE for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for COMP for date 2021-11-29 to 2021-12-06

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

running for COMP for date 2021-12-06 to 2021-12-13

Executing:   0%|          | 0/17 [00:00<?, ?cell/s]

Reading all the notebooks and creating the required summary dataframe

In [6]:
nbs = sb.read_notebooks('outputs')
pd.DataFrame([
    pd.Series({
        'token': scrap['token'].data,
        'from_date': scrap['from_date'].data,
        'to_date': scrap['to_date'].data,
        'avg_volatility': scrap['prices'].data.volatility.mean(),
        'avg_hourly_return': scrap['prices'].data.returns.mean(),
        'max_drawdown': scrap['prices'].data.drawdown.min(),
        'close_price_range' : (scrap['prices'].data.close.iloc[0] - scrap['prices'].data.close.iloc[-1])
    }) 
    for book, scrap in nbs.notebook_scraps.items()
])

Unnamed: 0,token,from_date,to_date,avg_volatility,avg_hourly_return,max_drawdown,close_price_range
0,AAVE,2021-11-29,2021-12-06,0.221054,-0.001235,-0.433883,46.808
1,AAVE,2021-12-06,2021-12-13,0.2004,-0.000304,-0.219344,11.673
2,ADA,2021-11-29,2021-12-06,0.226749,-0.00085,-0.333333,0.2305
3,ADA,2021-12-06,2021-12-13,0.203766,-7e-05,-0.216532,0.0297
4,ATOM,2021-11-29,2021-12-06,0.340796,-0.000489,-0.360772,2.837
5,ATOM,2021-12-06,2021-12-13,0.254383,0.000112,-0.151733,0.004
6,AVAX,2021-11-29,2021-12-06,0.320661,-0.001428,-0.502779,25.6
7,AVAX,2021-12-06,2021-12-13,0.249173,0.000211,-0.198504,-1.18
8,BTC,2021-11-29,2021-12-06,0.175543,-0.000861,-0.26018,7979.41
9,BTC,2021-12-06,2021-12-13,0.134449,0.000127,-0.098142,-833.11
