<h1>QQQ vs BTC</h1>
<br />
<p>This notebook assumes that you have a Python environment with the following additional modules</p>
<ul>
    <li>pandas_datareader</li>
    <li>cbpro</li>
</ul>

In [54]:
import time as epoch_time
import cbpro
import pandas_datareader.data as web
import numpy as np
import pandas as pd
from datetime import time
from datetime import date
from datetime import datetime
from datetime import timedelta
from scipy import stats
from scipy.stats.mstats import winsorize

start_date = date(2020, 10, 1)
end_date = date.today()

## Get QQ price time series

In [55]:
df = web.DataReader(name='QQQ', data_source='yahoo', start=start_date, end=end_date)
df = df.reset_index().drop_duplicates(subset='Date').set_index('Date')

## Check QQQ prices for outliers (3 sigma values) and winsorize if found

In [56]:
if np.any(stats.zscore(df['Adj Close']) > 3):
    print('Outliers detected, winsorizing...')
    df['Adj Close'] = winsorize(df['Adj Close'])

<h1>Get BTC price time series</h1>
<p>We must walk through time in chunks to avoid rate limits when requesting historical data</p>

In [57]:
if isinstance(start_date, date):
    start_date = datetime.combine(start_date, time(0))

if isinstance(end_date, date):
    end_date = datetime.combine(end_date, time(0))
    
granularity = 60
walk_seconds = 200 * int(granularity)
partial_end = start_date + timedelta(0, walk_seconds)

data = []
public_client = cbpro.PublicClient()
print('Loading BTC historical data in chunks...')
while start_date < end_date:
    res = public_client.get_product_historic_rates(start=start_date, end=partial_end,
                                                   granularity=granularity, product_id='BTC-USD')
    if 'message' not in res and len(res) > 0:
        data += res
        start_date = partial_end
        partial_end += timedelta(0, walk_seconds)
        epoch_time.sleep(.25)
    else:
        print(f'{res}\nError, retrying...')

bf = pd.DataFrame(data=data, columns=['tmStamp', 'low', 'high', 'open', 'close', 'volume'])
bf['tmStamp'] = bf['tmStamp'].apply(lambda x: np.datetime64(
    epoch_time.strftime('%Y-%m-%d %H:%M:%S', epoch_time.localtime(x))))
bf.set_index('tmStamp', inplace=True)

Loading BTC historical data in chunks...


In [58]:
bf_orig = bf.copy()

## Check BTC prices for outliers (3 sigma values) and winsorize if found

In [59]:
if np.any(stats.zscore(bf['close']) > 3):
    print('Outliers detected, winsorizing...')
    bf['close'] = winsorize(bf['close'])

In [74]:
df['dt'] = df.index.date
xs = df.groupby('dt')['Close'].count() > 1
xs
# print(xs.loc(True))

#if len(xs) > 1:
#    print('Datetime frequency greater than daily, downsample...')

dt
2020-10-01    False
2020-10-02    False
2020-10-05    False
2020-10-06    False
2020-10-07    False
              ...  
2021-01-14    False
2021-01-15    False
2021-01-19    False
2021-01-20    False
2021-01-21    False
Name: Close, Length: 77, dtype: bool

In [13]:
cols = {'close': 'BTC', 'tmStamp': 'Date'}
bf = bf.rename(columns=cols)[cols.values()]
idx = bf.set_index('tmStamp')
if pd.infer_freq(idx)
    # xf = bf.resample('24h').mean().reset_index().rename(columns=cols)[cols.values()]
    xf = bf.resample('24h').mean().reset_index()

TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'

In [40]:
res = pd.infer_freq(bf.set_index('Date').index, warn=False)
pd.infer_freq(bf.set_index('Date').index)
bf.set_index('Date').index.freq

In [None]:
## Merge the data

In [11]:
tf = pd.merge(df.rename(columns={'Adj Close': 'QQQ'})[['Date', 'QQQ']], xf, how='inner').set_index('Date')

KeyError: "['Date'] not in index"

In [None]:
## Plot the data

In [None]:
ax = tf.plot(x='BTC', y='QQQ', figsize=(20, 10), kind='scatter', title=f'QQQ Close vs BTC 24h mean between {start_date} and {end_date}')

In [None]:
# Overall correclation
tf['QQQ'].corr(tf['BTC'])
print(f'Total period correlation is {corr:.2f}')

In [None]:
tf['QQQ'].rolling(20).corr(tf['BTC']).plot(figsize=(10, 10))