# Python - HW4 - Runner

## Zheng Nan Yang

In [3]:
import papermill as pm
import scrapbook as sb
import pandas as pd
from datetime import datetime, timedelta
import os
import sqlite3
from tqdm import tqdm_notebook as tqdm
%load_ext sql

  from pyarrow import HadoopFileSystem


### Data Exploration

In [2]:
conn = sqlite3.connect('data/data.db')

In [3]:
df = pd.read_sql("SELECT * FROM ohlc", conn)

In [4]:
df.head()

Unnamed: 0,ts,open,high,low,close,volume,volumeUSD,token,chain
0,2021-11-01 00:00:00,61421.37,61669.14,61239.6,61343.68,256.433869,15757510.0,BTC,BTC
1,2021-11-01 01:00:00,61346.17,61709.82,61171.22,61610.93,332.481185,20445580.0,BTC,BTC
2,2021-11-01 02:00:00,61610.94,61779.87,61299.89,61333.17,314.25072,19353900.0,BTC,BTC
3,2021-11-01 03:00:00,61333.17,61457.28,60050.0,60589.06,1059.931358,64146250.0,BTC,BTC
4,2021-11-01 04:00:00,60590.23,60655.0,59752.92,59971.89,621.419878,37447440.0,BTC,BTC


#### Finding all tokens in the database

In [5]:
tokens = df['token'].unique().tolist()

In [6]:
tokens

['BTC',
 'ETH',
 'USDT',
 'SOL',
 'ADA',
 'DOT',
 'AVAX',
 'ATOM',
 'CRV',
 'AAVE',
 'COMP']

#### Finding the last 2 complete weeks in the database

In [7]:
date_df = df.drop_duplicates(subset ="ts")[['ts']].sort_values(['ts'])

In [8]:
date_df

Unnamed: 0,ts
0,2021-11-01 00:00:00
1,2021-11-01 01:00:00
2,2021-11-01 02:00:00
3,2021-11-01 03:00:00
4,2021-11-01 04:00:00
...,...
1052,2021-12-14 20:00:00
1053,2021-12-14 21:00:00
1054,2021-12-14 22:00:00
1055,2021-12-14 23:00:00


It looks like the final Sunday in the database is Dec. 12, thus our start date will be Nov. 29 and the end date will be Dec. 13 (not inclusive)

In [9]:
from_date = '2021-11-29'
to_date = '2021-12-13'

### Data Summary

Summarize the following in a single DataFrame in the runner notebook
* avg hourly return by run
* avg volatility by run
* maximum drawdown over the entire period of each run
* range of last close price - first close price

In [10]:
base_output_folder = os.path.expanduser('outputs')

if not os.path.exists(base_output_folder):
        os.makedirs(base_output_folder)

In [11]:
for token in tokens:
    print(f'running for {token} for date {from_date} to {to_date}', end='\r')
    res = pm.execute_notebook(
        'hw_4_template_zheng_nan_yang.ipynb',
        f'{base_output_folder}/market_analysis_{token}_{from_date}.ipynb',
        parameters = {
            'from_date': f'{from_date}',
            'to_date': f'{to_date}',
            'token': token
        }
    )

running for BTC for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

running for ETH for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

running for USDT for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

running for SOL for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

running for ADA for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

running for DOT for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

running for AVAX for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

running for ATOM for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

running for CRV for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

running for AAVE for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

running for COMP for date 2021-11-29 to 2021-12-13

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

In [4]:
nbs = sb.read_notebooks('outputs')

In [5]:
pd.DataFrame([
    pd.Series({
        'token': scrap['token'].data,
        'from_date': scrap['from_date'].data,
        'to_date': scrap['to_date'].data,
        'average hourly return': scrap['prices'].data.log_return.mean(),
        'volatility': scrap['prices'].data.volatility.mean(),
        'max drawdown': scrap['prices'].data.max_drawdown_pct.min(),  #Max drawndown is given as a negative percentage so we want the minimum number
        'range': scrap['prices'].data.close.tail(1).item() - scrap['prices'].data.close.head(1).item()
    }) 
    for book, scrap in nbs.notebook_scraps.items()
])

Unnamed: 0,token,from_date,to_date,average hourly return,volatility,max drawdown,range
0,AAVE,2021-11-29,2021-12-13,-0.0008404275,0.212356,-0.394588,-58.645
1,ADA,2021-11-29,2021-12-13,-0.0004966113,0.217477,-0.322694,-0.2445
2,ATOM,2021-11-29,2021-12-13,-0.0003163733,0.299425,-0.392483,-2.756
3,AVAX,2021-11-29,2021-12-13,-0.0006933415,0.286536,-0.396504,-22.93
4,BTC,2021-11-29,2021-12-13,-0.000388808,0.154894,-0.285516,-6999.84
5,COMP,2021-11-29,2021-12-13,-0.001094735,0.224354,-0.393657,-88.28
6,CRV,2021-11-29,2021-12-13,-0.0007114679,0.328177,-0.439781,-1.0362
7,DOT,2021-11-29,2021-12-13,-0.0005483337,0.245633,-0.368782,-6.005
8,ETH,2021-11-29,2021-12-13,-0.0001000714,0.174104,-0.252795,-141.71
9,SOL,2021-11-29,2021-12-13,-0.0004254425,0.242432,-0.336889,-26.687
