## Exploratory Time Series Analysis with Reddit Based Lucey Uncertainty Index
* Identifying if our baseline ```Reddit UCRY``` indices serve as a leading indicator for various cryptocurrencies
* Further analysis on its relationship with existing volatility and returns indicators for crypto
* Locate any relationships with other asset classes (E.g. SP500, Gold, etc.)

### Set Up

In [1]:
# NB config
%load_ext autoreload
%autoreload 2

# Load Libraries
import os
os.chdir("..")
import toml
import warnings
from typing import Dict, Optional
import numpy as np
import scipy.stats as stats
import altair as alt
from altair import pipe, limit_rows, to_values
from pathlib import Path
import pandas as pd
from darts import TimeSeries
from darts.utils import statistics as dstats
from sqlalchemy import create_engine

# Suppress warnings
warnings.filterwarnings("ignore")

  VALID_INDEX_TYPES = (pd.DatetimeIndex, pd.RangeIndex, pd.Int64Index)
  times: Union[pd.DatetimeIndex, pd.Int64Index],
  def time_index(self) -> Union[pd.DatetimeIndex, pd.Int64Index]:
  pd.Int64Index,


In [2]:
# Increase Max Rows for Altair
t = lambda data: pipe(data, limit_rows(max_rows=10000), to_values)
alt.data_transformers.register('custom', t)
alt.data_transformers.enable('custom')

DataTransformerRegistry.enable('custom')

In [3]:
# Connect to postgres
pg_config = (
    toml.load(Path() / "config" / "etl_config.toml")
    ["postgres"]

)
pg_engine = (
    create_engine(pg_config["default_local_uri"],
                  echo=True)
)

### Pull Relevant Data

In [4]:
# Set Date Range
START_DATE, END_DATE = "2014-01-01", "2021-12-31"

In [5]:
# Original Lucey UCRY Index Data
index_data_path = Path("/Users/christopherliew/Desktop/Y4S1/HT/crypto_uncertainty_index/pipelines/crypto_index") / "index_data"

ucry_original = pd.read_csv(index_data_path / "ucry_lucey_original.csv")
ucry_original_policy = ucry_original[["Timeline", "UCRY Policy Index"]]
ucry_original_price = ucry_original[["Timeline", "UCRY Price Index"]]

# Process dates
# Break up timeline column
ucry_original_policy[["start_date", "end_date"]] = (
    ucry_original_policy["Timeline"]
    .str.split(" - ", expand=True)
)

ucry_original_policy.rename(columns={"UCRY Policy Index": "index_value"}, inplace=True)
ucry_original_policy.drop(columns=["Timeline"], inplace=True)
ucry_original_policy["type"] = "Lucey-Original-Policy"

ucry_original_price[["start_date", "end_date"]] = (
    ucry_original_price["Timeline"]
    .str.split(" - ", expand=True)
).rename(columns={"UCRY Price Index": "index_value"})

ucry_original_price.rename(columns={"UCRY Price Index": "index_value"}, inplace=True)
ucry_original_price.drop(columns=["Timeline"], inplace=True)
ucry_original_price["type"] = "Lucey-Original-Price"


In [6]:
# UCRY Index Data
# Lucey Price
ucry_query = f"""
    SELECT *
    FROM ucry_index ui
    """

ucry_reddit = pd.read_sql(sql=ucry_query, con=pg_engine)
ucry_reddit["type"] = (
    ucry_reddit["type"]
    .map({"lucey-price": "Lucey-Reddit-Price", "lucey-policy": "Lucey-Reddit-Policy"})
)

ucry_price = ucry_reddit[ucry_reddit.type == "Lucey-Reddit-Price"]
ucry_policy = ucry_reddit[ucry_reddit.type == "Lucey-Reddit-Policy"]

2022-02-13 21:14:10,118 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2022-02-13 21:14:10,119 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-02-13 21:14:10,124 INFO sqlalchemy.engine.Engine select current_schema()
2022-02-13 21:14:10,124 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-02-13 21:14:10,127 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2022-02-13 21:14:10,127 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-02-13 21:14:10,130 INFO sqlalchemy.engine.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2022-02-13 21:14:10,131 INFO sqlalchemy.engine.Engine [generated in 0.00068s] {'name': '\n    SELECT *\n    FROM ucry_index ui\n    '}
2022-02-13 21:14:10,136 INFO sqlalchemy.engine.Engine 
    SELECT *
    FROM ucry_index ui
    
2022-02-13 21:14:10,136 INFO sqlalchemy.engine.Engine [raw sql] {}


In [7]:
# Combined UCRY
ucry_combined = (
    pd.concat([ucry_reddit, ucry_original_price, ucry_original_policy], axis=0)
)
ucry_combined["start_date"] = pd.to_datetime(ucry_combined["start_date"], utc=True)
ucry_combined["end_date"] = pd.to_datetime(ucry_combined["end_date"], utc=True)
ucry_combined.head()

Unnamed: 0,start_date,end_date,doc_count,index_value,type
0,2013-12-30 00:00:00+00:00,2014-01-05 23:59:59.999999+00:00,1.0,99.160104,Lucey-Reddit-Price
1,2014-01-06 00:00:00+00:00,2014-01-12 23:59:59.999999+00:00,5.0,99.426172,Lucey-Reddit-Price
2,2014-01-13 00:00:00+00:00,2014-01-19 23:59:59.999999+00:00,9.0,99.69224,Lucey-Reddit-Price
3,2014-01-20 00:00:00+00:00,2014-01-26 23:59:59.999999+00:00,6.0,99.492689,Lucey-Reddit-Price
4,2014-01-27 00:00:00+00:00,2014-02-02 23:59:59.999999+00:00,4.0,99.359655,Lucey-Reddit-Price


**Load and Process CRIX and VCRIX data**

In [8]:
# Load Data
vcrix = (
    pd.read_csv('/Users/christopherliew/Desktop/Y4S1/HT/crypto_uncertainty_index/etl/raw_data_dump/crix/vcrix.csv')
)[['date', 'vcrix']].rename(columns={'date': 'start_date', 'vcrix': 'index_value'})
vcrix['type'] = 'VCRIX'
vcrix['start_date'] = pd.to_datetime(vcrix['start_date'])
vcrix = vcrix.set_index('start_date')

# Aggregate to Weekly Level
vcrix = (
    vcrix.resample('W-MON')
    .agg({'index_value': 'mean'})
    .reset_index()
)
vcrix['type'] = 'VCRIX'

In [9]:
# Update UCRY combined
ucry_combined = pd.concat([ucry_combined, vcrix], axis=0)
ucry_combined['start_date'] = pd.to_datetime(ucry_combined['start_date'], utc=True)

# Get rid of UTC Time
ucry_combined['start_date'] = ucry_combined['start_date'].dt.tz_localize(None)
ucry_combined['end_date'] = ucry_combined['end_date'].dt.tz_localize(None)

In [10]:
# Crypto Price Data
asset_types_query = """
    SELECT DISTINCT ticker
    FROM asset_prices
    """

asset_types = [i for i in pg_engine.execute(asset_types_query)]

2022-02-13 21:14:10,327 INFO sqlalchemy.engine.Engine 
    SELECT DISTINCT ticker
    FROM asset_prices
    
2022-02-13 21:14:10,328 INFO sqlalchemy.engine.Engine [raw sql] {}


In [11]:
# Get respective time series
combined_df = pd.read_sql(sql="SELECT * FROM asset_prices", con=pg_engine)
btc_usd = combined_df[combined_df.ticker == "BTC-USD"]
eth_usd = combined_df[combined_df.ticker == "ETH-USD"]
luna_usd = combined_df[combined_df.ticker == "LUNA-USD"]
tether_usd = combined_df[combined_df.ticker == "USDT-USD"]
ripple_usd = combined_df[combined_df.ticker == "XRP-USD"]
polkadot_usd = combined_df[combined_df.ticker == "DOT-USD"]
cardano_usd = combined_df[combined_df.ticker == "ADA-USD"]
binance_usd = combined_df[combined_df.ticker == "BNB-USD"]

2022-02-13 21:14:10,383 INFO sqlalchemy.engine.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2022-02-13 21:14:10,384 INFO sqlalchemy.engine.Engine [cached since 0.2536s ago] {'name': 'SELECT * FROM asset_prices'}
2022-02-13 21:14:10,395 INFO sqlalchemy.engine.Engine SELECT * FROM asset_prices
2022-02-13 21:14:10,396 INFO sqlalchemy.engine.Engine [raw sql] {}


In [12]:
# Other Data
sp500 = combined_df[combined_df.ticker == "^GSPC"]
gold = combined_df[combined_df.ticker == "GC=F"]

### Exploratory Analysis: Identifying if UCRY Reddut Lucey (Reddit Based) is a Leading Indicator
#### Overview of Approach
1. Convert to Darts' TimeSeries
2. Price and Volume at different Horizons
   * Adjusted Closing Price (hereafter ```Price```)
   * Trade Volumes 
3. Returns and Volatility at different Horizons
   * Price Returns
4. Compute Cross Correlations for each Pair of Asset Price / Volume / Price Returns across different horizons and UCRY


#### 1. Convert to Darts TimeSeries

In [13]:
# Helper to convert to TS Dataset
def pd_to_darts(df: pd.DataFrame, time_col: str = "date") -> TimeSeries:
    new_df = TimeSeries.from_dataframe(
        df.reset_index(drop=True)
        .drop(columns=["ticker"]),
        time_col=time_col
    )
    return new_df

In [14]:
# Convert to Time Series Datasets
btc_usd_ts = pd_to_darts(btc_usd)
eth_usd_ts = pd_to_darts(eth_usd)
luna_usd_ts = pd_to_darts(luna_usd)
tether_usd_ts = pd_to_darts(tether_usd)
ripple_usd_ts = pd_to_darts(ripple_usd)
polkadot_usd_ts = pd_to_darts(polkadot_usd)
cardano_usd_ts = pd_to_darts(cardano_usd)
binance_usd_ts = pd_to_darts(binance_usd)
sp500_ts = pd_to_darts(sp500)
gold_ts = pd_to_darts(gold)

In [58]:
# Create TS datasets for relevant indices
ucry_reddit_price_ts = TimeSeries.from_dataframe(ucry_combined[ucry_combined.type == "Lucey-Reddit-Price"][["start_date", "index_value"]], time_col="start_date")
ucry_reddit_policy_ts = TimeSeries.from_dataframe(ucry_combined[ucry_combined.type == "Lucey-Reddit-Policy"][["start_date", "index_value"]], time_col="start_date")
ucry_vcrix_ts = TimeSeries.from_dataframe(ucry_combined[ucry_combined.type == "VCRIX"][["start_date", "index_value"]], time_col="start_date")

#### 2 Price and Volume Analyses
##### 2.1 UCRY against Asset Prices Analysis
Plot UCRY price and policy against Prices = Adjusted Closing Price (Weekly Granularity)
1. UCRY vs Prices at ***h = 0 weeks***
2. UCRY vs Prices at ***h = 4 weeks*** 
3. UCRY vs Prices at ***h = 12 weeks***
4. UCRY vs Prices at ***h = 24 weeks***


**Dataset Prep**

In [16]:
# Prep Datasets
asset = (
    combined_df[["date", "ticker", "adj_close", "volume"]]
    .rename(columns={"date": "Date", "ticker": "Ticker", "adj_close": "Price", "volume": "Volume"})
)

# Select relevant UCRY indices
ucry = (
    ucry_combined[["start_date", "type", "index_value"]]
    .rename(columns={"start_date": "Date", "type": "Index", "index_value": "Index Value (Standardised)"})
    [ucry_combined["type"].isin(["Lucey-Reddit-Price", "Lucey-Reddit-Policy", "VCRIX"])]
)
ucry['Date'] = (
    pd.to_datetime(ucry['Date'], utc=True)
    .dt.tz_localize(None)
)

In [17]:
# Generate lags
lags_week = [4, 12, 24, 52]

for lag in lags_week:
    asset[f'Price (Lag={lag})'] = asset['Price'].shift(-lag)

asset.head()

Unnamed: 0,Date,Ticker,Price,Volume,Price (Lag=4),Price (Lag=12),Price (Lag=24),Price (Lag=52)
0,2014-09-15,BTC-USD,398.821014,156903400.0,389.54599,351.631989,274.354004,231.212006
1,2014-09-22,BTC-USD,377.181,186772600.0,354.70401,320.842987,286.393005,232.757004
2,2014-09-29,BTC-USD,320.51001,276657896.0,325.891998,317.239014,267.959991,238.259003
3,2014-10-06,BTC-USD,378.549011,341152804.0,363.264008,264.195007,242.712997,247.050003
4,2014-10-13,BTC-USD,389.54599,156902070.0,387.881989,265.660004,260.597992,261.643005


**Helper to Generate Faceted Chart of Price against UCRY**

In [18]:
def gen_Y_vs_ucry_chart(lags_in_weeks=None, Y='Price'):

    # Select horizon
    y_label = f"{Y} (Lag={lags_in_weeks})" if lags_in_weeks is not None else "Price"
    data = asset[["Date", "Ticker", y_label]]


    # Asset Class Line Chart
    asset_line = alt.Chart().mark_line(interpolate='basis').encode(
        x='Date:T',
        y=alt.Y(f'{y_label}:Q', scale=alt.Scale(type="log"), title=f'Lagged {Y} (Log-Scale) by {0 if lags_in_weeks is None else lags_in_weeks} Weeks'),
        # color='Ticker:N',
        color=alt.value("#FF0000"),
        tooltip=['Date', y_label, 'Ticker']
    )

    # UCRY Lucey Line Chart
    ucry_line = alt.Chart(ucry[ucry.Index.str.contains('Lucey')]).mark_line(interpolate='basis').encode(
        x='Date:T',
        y=alt.Y('Index Value (Standardised):Q', scale=alt.Scale(domain=[93, 108])),
        color=alt.Color('Index:N', scale=alt.Scale(scheme='plasma')),
        strokeDash='Index:N',
        tooltip=['Date', 'Index Value (Standardised)', 'Index']
    )

    # Generate multiple charts for each Ticker
    price_chart_gen = (
        alt.layer(asset_line, ucry_line, data=data.dropna(), title=f"{asset_name} {Y} vs UCRY Indices" )
        .transform_filter(alt.datum.Ticker == asset_name)
        .resolve_scale(y="independent", x="independent")
        for asset_name in asset.Ticker.unique()
    )

    alt.concat(*price_chart_gen, columns=4).display()

**Helper to create Correlation Matrix**

In [19]:
def get_pearson_corr(ts1: pd.DataFrame,
                     ts2: pd.DataFrame,
                     ts1_col: str,
                     ts2_col: str,
                     ts1_query: Optional[str] = None,
                     ts2_query: Optional[str] = None,
                     date_col: str = 'Date'
                     ) -> Dict[str, np.float]:
    # Handle Dates (Same time span)
    ts1 = ts1.dropna()
    if ts1_query:
        ts1 = ts1.query(ts1_query)
    ts2 = ts2.dropna()
    if ts2_query:
        ts2 = ts2.query(ts2_query)
    ts_combined = ts1.merge(ts2, on=date_col)

    # Compute correlation coefficient
    r, p = stats.pearsonr(
        ts_combined[ts1_col],
        ts_combined[ts2_col]
    )
    return {'corr': r, 'p-val': p}

In [20]:
def gen_ucry_Y_corr_mat(asset_df: pd.DataFrame, ucry_df: pd.DataFrame, asset_col: str, ucry_index_col: str):
    tickers = asset_df.Ticker.unique().tolist()
    ucry = ucry_df.Index.unique().tolist()
    corr_mat = pd.DataFrame(columns=tickers, index=ucry)
    for t in tickers:
        for i in ucry:
            corr_mat.loc[i, t] = get_pearson_corr(asset_df,
                                                  ucry_df,
                                                  asset_col,
                                                  ucry_index_col,
                                                  f"Ticker == '{t}'",
                                                  f"Index == '{i}'").get('corr', None)
    return corr_mat

**Prices where h = 0 weeks**

In [21]:
gen_Y_vs_ucry_chart()

In [22]:
gen_ucry_Y_corr_mat(asset, ucry[ucry.Index.str.contains("Lucey")], 'Price', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.101091,0.131824,0.019884,0.300295,0.051938,0.157283,0.064965,0.383884,-0.023505,0.067516
Lucey-Reddit-Policy,0.265019,0.244571,-0.004834,0.273914,0.179743,0.257297,0.037917,0.259864,0.15568,0.01081


In [23]:
get_pearson_corr(asset, ucry[ucry.Index == "VCRIX"], 'Price', 'Index Value (Standardised)')

{'corr': 0.03567562139198773, 'p-val': 0.09562719802446512}

**Price where h = 4 Weeks**

In [24]:
gen_Y_vs_ucry_chart(4)

In [25]:
gen_ucry_Y_corr_mat(asset, ucry, 'Price (Lag=4)', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.117383,0.139804,0.022557,0.403514,0.052793,0.125873,0.043548,-0.017817,-0.021533,0.054573
Lucey-Reddit-Policy,0.276411,0.240693,0.016076,0.263648,0.187642,0.18509,0.039473,-0.016171,0.151218,0.009888
VCRIX,0.23971,0.153108,0.005511,0.085491,0.145123,0.141064,0.389756,-0.008486,0.174187,0.114027


**Price where h = 12 Weeks**

In [26]:
gen_Y_vs_ucry_chart(12)

In [27]:
gen_ucry_Y_corr_mat(asset, ucry, 'Price (Lag=12)', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.135021,0.177718,0.013744,0.04389,0.131996,0.102322,0.241176,-0.036516,-0.023441,0.043838
Lucey-Reddit-Policy,0.308309,0.304112,-0.004232,0.05327,0.27863,0.212292,0.207103,-0.001814,0.121861,0.008962
VCRIX,0.221762,0.204757,-0.037071,0.009349,0.197574,0.128649,0.437493,-0.016698,0.18106,0.144644


**Price where h = 24 Weeks**

In [28]:
gen_Y_vs_ucry_chart(24)

In [29]:
gen_ucry_Y_corr_mat(asset, ucry, 'Price (Lag=24)', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.130248,0.190113,-0.019991,-0.017711,0.153337,-0.002439,0.247382,-0.049308,-0.03831,0.042819
Lucey-Reddit-Policy,0.268254,0.331604,-0.017867,0.03868,0.309985,0.068786,0.256314,0.011396,0.088661,0.012098
VCRIX,0.037098,-0.029091,-0.049255,0.025683,-0.030909,0.034233,-0.317621,0.107868,0.138068,0.135776


##### 2.2 UCRY against Asset Volume Analysis
Plot UCRY price and policy against Prices = Trading Volume (Weekly Granularity)
1. UCRY vs Volume at ***h = 0***
2. UCRY vs Volume at ***h = 4*** 
3. UCRY vs Volume at ***h = 12***
4. UCRY vs Volume at ***h = 24***

In [30]:
# Generate lags
for lag in lags_week:
    asset[f'Volume (Lag={lag})'] = asset['Volume'].shift(-lag)

asset.head()

Unnamed: 0,Date,Ticker,Price,Volume,Price (Lag=4),Price (Lag=12),Price (Lag=24),Price (Lag=52),Volume (Lag=4),Volume (Lag=12),Volume (Lag=24),Volume (Lag=52)
0,2014-09-15,BTC-USD,398.821014,156903400.0,389.54599,351.631989,274.354004,231.212006,156902070.0,141754100.0,242425100.0,126654700.0
1,2014-09-22,BTC-USD,377.181,186772600.0,354.70401,320.842987,286.393005,232.757004,113691800.0,184757400.0,259502000.0,137307600.0
2,2014-09-29,BTC-USD,320.51001,276657896.0,325.891998,317.239014,267.959991,238.259003,107075700.0,107137540.0,206778200.0,136796200.0
3,2014-10-06,BTC-USD,378.549011,341152804.0,363.264008,264.195007,242.712997,247.050003,116793470.0,143354400.0,179497700.0,142478600.0
4,2014-10-13,BTC-USD,389.54599,156902070.0,387.881989,265.660004,260.597992,261.643005,208239500.0,164241500.0,150120700.0,199808300.0


**Volume when h=0**

In [31]:
gen_Y_vs_ucry_chart(Y='Volume')

In [32]:
gen_ucry_Y_corr_mat(asset, ucry, 'Volume', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.049546,0.094788,0.041371,0.097569,0.141936,0.211877,0.586345,0.333965,0.006118,-0.106259
Lucey-Reddit-Policy,0.280303,0.329672,0.268334,0.24423,0.265457,0.363052,0.48442,0.19947,-0.033594,-0.006728
VCRIX,0.202344,0.04347,0.003109,-0.07348,0.075818,0.111463,0.244522,0.187532,0.045041,0.098642


**Volume when h=4**

In [33]:
gen_Y_vs_ucry_chart(4, Y='Volume')

In [34]:
gen_ucry_Y_corr_mat(asset, ucry, 'Volume (Lag=4)', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,-0.009504,-0.067393,-0.106979,0.005507,0.046933,0.08482,0.080627,0.39296,-0.00318,-0.130101
Lucey-Reddit-Policy,0.177984,0.12674,0.098426,0.096329,0.200298,0.201562,0.091643,0.116375,-0.000899,-0.065199
VCRIX,0.126593,-0.073025,-0.058302,-0.157956,0.061761,0.06391,0.068754,-0.0499,0.019046,0.003177


**Volume when h=12**

In [35]:
gen_Y_vs_ucry_chart(12, Y='Volume')

In [36]:
gen_ucry_Y_corr_mat(asset, ucry, 'Volume (Lag=12)', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,-0.05058,-0.121683,-0.088214,0.019206,0.072385,0.108015,0.133564,0.010479,-0.010895,-0.127459
Lucey-Reddit-Policy,0.128629,0.062812,0.098806,0.1472,0.201047,0.229635,0.089037,0.037919,0.026433,-0.074275
VCRIX,0.038193,-0.139157,-0.086295,-0.101102,0.030302,0.02825,0.045341,-0.02972,0.024069,-0.091162


**Volume when h=24**

In [37]:
gen_Y_vs_ucry_chart(24, Y='Volume')

In [38]:
gen_ucry_Y_corr_mat(asset, ucry, 'Volume (Lag=24)', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,-0.077056,-0.215725,-0.192643,-0.20619,-0.05569,-0.097036,-0.082234,-0.017702,-0.014196,-0.21066
Lucey-Reddit-Policy,0.049475,-0.055712,-0.014861,-0.112349,0.087209,0.024935,-0.052351,-0.009706,0.042425,-0.121658
VCRIX,0.017942,-0.260695,-0.252406,-0.256092,-0.174731,-0.199984,-0.498247,0.056268,0.145153,-0.125614


#### 3 Price Returns

Price Return = $\frac{ \sum_{i=1}^{N}{n_iP_i} } { D }$

* Weekly, where weeks = 1
* Monthly, where weeks = 4
* Annual, where weeks = 52

NOTE: Should we be using number of trading days in a month instead (Like 21?)

In [39]:
# Get Weekly, Monthly and Annual Returns 
asset['Weekly Price Returns'] = asset.groupby('Ticker')['Price'].pct_change(1)
asset['Monthly Price Returns'] = asset.groupby('Ticker')['Price'].pct_change(4)
asset['Yearly Price Returns'] = asset.groupby('Ticker')['Price'].pct_change(52)

**Weekly Price Returns**

In [40]:
gen_Y_vs_ucry_chart(Y='Weekly Price Returns')

In [41]:
gen_ucry_Y_corr_mat(asset, ucry, 'Weekly Price Returns', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,-0.120147,-0.271833,0.018289,-0.174973,-0.18628,-0.171453,-0.402909,-0.035094,0.034824,-0.062075
Lucey-Reddit-Policy,-0.195381,-0.22292,0.108915,-0.213609,-0.195837,-0.057857,-0.325905,0.080364,-0.002443,-0.156295
VCRIX,0.014475,0.001493,0.017294,-0.014888,0.093496,0.057235,0.57664,-0.027645,0.106673,0.040685


**Monthly Price Returns**

In [42]:
gen_Y_vs_ucry_chart(Y='Monthly Price Returns')

In [43]:
gen_ucry_Y_corr_mat(asset, ucry, 'Monthly Price Returns', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.096276,-0.078541,0.032396,-0.143338,-0.050523,0.038268,-0.265444,0.008488,0.021373,-0.003331
Lucey-Reddit-Policy,-0.029289,-0.039484,0.115343,-0.147616,-0.097028,0.028658,-0.330482,-0.086367,0.066331,-0.100221
VCRIX,-0.098089,-0.124391,0.174596,-0.131071,0.013483,-0.018818,0.777844,-0.051597,0.212215,-0.051029


**Annual Price Returns**

In [44]:
gen_Y_vs_ucry_chart(Y='Yearly Price Returns')

In [45]:
gen_ucry_Y_corr_mat(asset, ucry, 'Yearly Price Returns', 'Index Value (Standardised)')

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.48058,0.357784,-0.145971,0.348679,0.322932,0.320689,-0.304562,0.032447,-0.170438,0.296982
Lucey-Reddit-Policy,0.32399,0.405694,-0.141242,0.365296,0.329109,0.347194,0.328492,0.11792,-0.034058,0.199116
VCRIX,0.15478,0.116453,0.179743,0.105704,0.133793,0.111589,0.040587,-0.098843,-0.087007,-0.229659


### Stationarity Checks and Inter-Index Correlations
1. Stationarity Check of Time Series
2. Correlation Across Uncertainty Indices (Lucey Reddit, Lucery Original and VRIX)

#### Stationarity of Reddit Lucey and VCRIX

In [68]:
adf_val, p_val, num_lags, _, _, _ = dstats.stationarity_test_adf(ucry_reddit_price_ts)

print(f"""
UCRY REDDIT PRICE
ADF Value: {adf_val}
P-Value: {p_val}
Number of Lags: {num_lags}
Is Stationary (5%): {p_val < 0.05}
""")


UCRY REDDIT PRICE
ADF Value: -3.315264785073671
P-Value: 0.014209774949259426
Number of Lags: 10
Is Stationary (5%): True



In [69]:
adf_val, p_val, num_lags, _, _, _ = dstats.stationarity_test_adf(ucry_reddit_policy_ts)

print(f"""
UCRY REDDIT POLICY
ADF Value: {adf_val}
P-Value: {p_val}
Number of Lags: {num_lags}
Is Stationary (5%): {p_val < 0.05}
""")


UCRY REDDIT POLICY
ADF Value: -7.107347609941217
P-Value: 4.021562435323942e-10
Number of Lags: 2
Is Stationary (5%): True



In [70]:
adf_val, p_val, num_lags, _, _, _ = dstats.stationarity_test_adf(ucry_vcrix_ts)

print(f"""
VCRIX
ADF Value: {adf_val}
P-Value: {p_val}
Number of Lags: {num_lags}
Is Stationary (5%): {p_val < 0.05}
""")


VCRIX
ADF Value: -4.715913069837575
P-Value: 7.854730573159154e-05
Number of Lags: 9
Is Stationary (5%): True



We see that for our Lucey-Reddit Index TS values and for VCRIX, they are stationary at the 5% significance level

#### Correlation between VCRIX and Lucey Reddit

In [74]:
price_vcrix = ucry_combined[ucry_combined.type == 'Lucey-Reddit-Price'].dropna().merge(ucry_combined[ucry_combined.type == 'VCRIX'], on='start_date')

r, p = stats.pearsonr(
   price_vcrix['index_value_x'],
   price_vcrix['index_value_y']
)

print(f"VCRIX vs Lucey Reddit Price Index Correlation: {r} | P-Value: {p}")

VCRIX vs Lucey Reddit Policy Index Correlation: 0.13878203678316667 | P-Value: 0.018660221916462737


In [75]:
policy_vcrix = ucry_combined[ucry_combined.type == 'Lucey-Reddit-Policy'].dropna().merge(ucry_combined[ucry_combined.type == 'VCRIX'], on='start_date')

r, p = stats.pearsonr(
   policy_vcrix['index_value_x'],
   policy_vcrix['index_value_y']
)

print(f"VCRIX vs Lucey Reddit Price Index Correlation: {r} | P-Value: {p}")

VCRIX vs Lucey Reddit Price Index Correlation: 0.19173500100962493 | P-Value: 0.0010971179972081222
