In [147]:
import papermill as pm
import scrapbook as sb
import pandas as pd
import os

import sqlite3
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from datetime import datetime, timedelta

import matplotlib.dates as mdates

In [157]:
def get_data(token, from_date, to_date):
    '''
    This function is used for retrieving the data from SQL database
    
    Inputs:
    token (str) - name of token
    from_date (str) - start date of date range
    to_date (str) - end date of date range
    
    Outputs:
    df (DataFrame) - DataFrame with the data requested
    
    '''
    conn = sqlite3.connect('../../../Lectures/Lecture 4/data/data.db')
    cur = conn.cursor()
    cur.execute(f'''SELECT * FROM ohlc WHERE 
    token = '{token}'
    AND ts >= '{from_date}'
    AND ts < '{to_date}'
    ''')
    
    data = cur.fetchall()
    
    df = pd.DataFrame(data,
                columns=['ts', 'open', 'high', 'low', 
                         'close', 'volume', 'volumeUSD', 
                         'token', 'chain'])
    conn.close()
    
    df['ts'] = pd.to_datetime(df['ts'])
    df.set_index('ts', inplace=True)
    return df

In [150]:
df = get_data()

In [151]:
tokens = list(set(df['token']))
run_dates = [(df.index[-1] - timedelta(days = x)) for x in range(14)]
connection_string = 'sqlite:///data/data.db'
events_db = 'crypto_events_database'

In [152]:
base_output_folder = os.path.expanduser('outputs')

if not os.path.exists(base_output_folder):
        os.makedirs(base_output_folder)

In [153]:
for token in tokens:
    print(f'running for {token} for date {run_dates[-1]} to {run_dates[0]}', end='\r')

    res = pm.execute_notebook(
        'hw_4_artem_shuvalov.ipynb',
        f'{base_output_folder}/market_analysis_{token}_{run_date}.ipynb',
        parameters = {
            'from_date': f'{run_dates[-1]}',
            'to_date': f'{run_dates[0]}',
            'token': token,
            'connection_string': connection_string,
            'events_db': events_db})

Input notebook does not contain a cell with tag 'parameters'


running for AVAX for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'


running for ATOM for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'


running for SOL for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'


running for AAVE for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'


running for USDT for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'


running for ADA for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'


running for CRV for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'


running for COMP for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'


running for BTC for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'


running for DOT for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

Input notebook does not contain a cell with tag 'parameters'


running for ETH for date 2021-12-02 00:00:00 to 2021-12-15 00:00:00

Executing:   0%|          | 0/10 [00:00<?, ?cell/s]

In [158]:
# Read the created notebooks
nbs = sb.read_notebooks('outputs')

In [159]:
# Show the descriptive statistics for the data retrieved

pd.DataFrame([
    pd.Series({
        'token': scrap['token'].data,
        'from_date': datetime.strptime(scrap['from_date'].data, '%Y-%m-%d %H:%M:%S'),
        'to_date': datetime.strptime(scrap['to_date'].data, '%Y-%m-%d %H:%M:%S'),
        'avg_return_pct': scrap['df'].data['return'].mean() * 100,
        'avg_volatility': scrap['df'].data.volatility.mean(),
        'max_dd': scrap['df'].data.max_dd.min(),
        'last_minus_first_close_price': scrap['df'].data.close[-1] - scrap['df'].data.close[0]}) 
    for book, scrap in nbs.notebook_scraps.items()
])

Unnamed: 0,token,from_date,to_date,avg_return_pct,avg_volatility,max_dd,last_minus_first_close_price
0,AAVE,2021-12-02,2021-12-15,-0.127007,0.167643,-0.362095,-82.061
1,ADA,2021-12-02,2021-12-15,-0.06336881,0.179683,-0.31368,-0.2777
2,ATOM,2021-12-02,2021-12-15,-0.06595935,0.23898,-0.358851,-4.963
3,AVAX,2021-12-02,2021-12-15,-0.1058374,0.246056,-0.357131,-33.68
4,BTC,2021-12-02,2021-12-15,-0.0544274,0.113296,-0.186418,-8937.51
5,COMP,2021-12-02,2021-12-15,-0.1235197,0.191621,-0.352782,-87.02
6,CRV,2021-12-02,2021-12-15,-0.1231736,0.265236,-0.346191,-1.5932
7,DOT,2021-12-02,2021-12-15,-0.1103513,0.201529,-0.316263,-10.727
8,ETH,2021-12-02,2021-12-15,-0.056928,0.130935,-0.192979,-746.46
9,SOL,2021-12-02,2021-12-15,-0.1183258,0.186069,-0.373707,-70.978
