## Exploratory Time Series Analysis with Reddit Based Lucey Uncertainty Index
* Identifying if our baseline ```Reddit UCRY``` indices serve as a leading indicator for various cryptocurrencies
* Further analysis on its relationship with existing volatility and returns indicators for crypto
* Locate any relationships with other asset classes (E.g. SP500, Gold, etc.)

### Set Up

In [1]:
# NB config
%load_ext autoreload
%autoreload 2

# Load Libraries
import os

os.chdir("..")
import toml
import warnings
import isoweek
from datetime import datetime
from typing import Dict, Optional
import numpy as np
import scipy.stats as stats
import altair as alt
from altair import pipe, limit_rows, to_values
from pathlib import Path
import pandas as pd
from darts import TimeSeries
from darts.utils import statistics as dstats
from sqlalchemy import create_engine

# Suppress warnings
warnings.filterwarnings("ignore")

  VALID_INDEX_TYPES = (pd.DatetimeIndex, pd.RangeIndex, pd.Int64Index)
  times: Union[pd.DatetimeIndex, pd.Int64Index],
  def time_index(self) -> Union[pd.DatetimeIndex, pd.Int64Index]:
  pd.Int64Index,


In [2]:
# Increase Max Rows for Altair
t = lambda data: pipe(data, limit_rows(max_rows=10000), to_values)
alt.data_transformers.register("custom", t)
alt.data_transformers.enable("custom")

DataTransformerRegistry.enable('custom')

In [3]:
# Connect to postgres
pg_config = toml.load(Path() / "config" / "etl_config.toml")["postgres"]
pg_engine = create_engine(pg_config["default_local_uri"], echo=True)

### Pull Relevant Data

In [4]:
# Set Date Range
START_DATE, END_DATE = "2014-01-01", "2021-12-31"

In [5]:
def get_week_start_end_from_date(date):
    date = datetime.strptime(date, "%Y-%m-%d")
    year, week = date.isocalendar()[0], date.isocalendar()[1]  # year, week, weekday
    w = isoweek.Week(year, week)
    return w.monday(), w.sunday()

In [125]:
# Original Lucey UCRY Index Data
index_data_path = (
    Path(
        "/Users/christopherliew/Desktop/Y4S1/HT/crypto_uncertainty_index/pipelines/crypto_index"
    )
    / "index_data"
)

ucry_original = pd.read_csv(index_data_path / "ucry_lucey_original_.csv")
ucry_original_policy = ucry_original[["Date", "UCRY Policy"]]
ucry_original_price = ucry_original[["Date", "UCRY Price"]]

# Process dates
# Break up timeline column

# Create start date and end date
ucry_original_policy[["start_date", "end_date"]] = pd.DataFrame(
    ucry_original_policy["Date"]
    .apply(lambda x: get_week_start_end_from_date(x))
    .tolist(),
    index=ucry_original_policy.index,
)

ucry_original_policy.drop(columns=["Date"], inplace=True)
ucry_original_policy.rename(columns={"UCRY Policy": "index_value"}, inplace=True)
ucry_original_policy["type"] = "Lucey-Original-Policy"


ucry_original_price[["start_date", "end_date"]] = pd.DataFrame(
    ucry_original_price["Date"]
    .apply(lambda x: get_week_start_end_from_date(x))
    .tolist(),
    index=ucry_original_price.index,
)

ucry_original_price.drop(columns=["Date"], inplace=True)
ucry_original_price.rename(columns={"UCRY Price": "index_value"}, inplace=True)
ucry_original_price["type"] = "Lucey-Original-Price"

In [126]:
# UCRY Index Data
# Lucey Price
ucry_query = f"""
    SELECT *
    FROM ucry_index
    """

ucry_reddit = pd.read_sql(sql=ucry_query, con=pg_engine)
ucry_reddit["type"] = ucry_reddit["type"].map(
    {
        "lucey-price": "Lucey-Reddit-Price",
        "lucey-policy": "Lucey-Reddit-Policy",
        "lda-price": "LDA-Reddit-Price",
        "lda-policy": "LDA-Reddit-Policy",
        "top2vec-price": "Top2Vec-Reddit-Price",
        "top2vec-policy": "Top2Vec-Reddit-Policy",
    }
)

ucry_price = ucry_reddit[ucry_reddit.type == "Lucey-Reddit-Price"]
ucry_policy = ucry_reddit[ucry_reddit.type == "Lucey-Reddit-Policy"]
ucry_lda_price = ucry_reddit[ucry_reddit.type == "LDA-Reddit-Price"]
ucry_lda_policy = ucry_reddit[ucry_reddit.type == "LDA-Reddit-Policy"]
ucry_t2v_price = ucry_reddit[ucry_reddit.type == "Top2Vec-Reddit-Price"]
ucry_t2v_policy = ucry_reddit[ucry_reddit.type == "Top2Vec-Reddit-Policy"]

2022-02-21 23:17:04,764 INFO sqlalchemy.engine.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2022-02-21 23:17:04,764 INFO sqlalchemy.engine.Engine [cached since 4.903e+04s ago] {'name': '\n    SELECT *\n    FROM ucry_index\n    '}
2022-02-21 23:17:04,796 INFO sqlalchemy.engine.Engine 
    SELECT *
    FROM ucry_index
    
2022-02-21 23:17:04,797 INFO sqlalchemy.engine.Engine [raw sql] {}


In [128]:
# Combined UCRY
ucry_combined = pd.concat(
    [ucry_reddit, ucry_original_price, ucry_original_policy], axis=0
)
ucry_combined["start_date"] = pd.to_datetime(ucry_combined["start_date"], utc=True)
ucry_combined["end_date"] = pd.to_datetime(ucry_combined["end_date"], utc=True)

**Load and Process CRIX and VCRIX data**

In [9]:
# Load Data
vcrix = (
    pd.read_csv(
        "/Users/christopherliew/Desktop/Y4S1/HT/crypto_uncertainty_index/etl/raw_data_dump/crix/vcrix.csv"
    )
)[["date", "vcrix"]].rename(columns={"date": "start_date", "vcrix": "index_value"})
vcrix["type"] = "VCRIX"
vcrix["start_date"] = pd.to_datetime(vcrix["start_date"])
vcrix = vcrix.set_index("start_date")

# Aggregate to Weekly Level
vcrix = vcrix.resample("W-MON").agg({"index_value": "mean"}).reset_index()
vcrix["type"] = "VCRIX"

In [136]:
# Update UCRY combined
ucry_combined = pd.concat([ucry_combined, vcrix], axis=0)
ucry_combined["start_date"] = pd.to_datetime(ucry_combined["start_date"], utc=True)

# Get rid of UTC Time
ucry_combined["start_date"] = ucry_combined["start_date"].dt.tz_localize(None)
ucry_combined["end_date"] = ucry_combined["end_date"].dt.tz_localize(None)

In [11]:
# Crypto Price Data
asset_types_query = """
    SELECT DISTINCT ticker
    FROM asset_prices
    """

asset_types = [i for i in pg_engine.execute(asset_types_query)]

2022-02-21 03:51:39,137 INFO sqlalchemy.engine.Engine 
    SELECT DISTINCT ticker
    FROM asset_prices
    
2022-02-21 03:51:39,138 INFO sqlalchemy.engine.Engine [raw sql] {}


In [12]:
# Get respective time series
combined_df = pd.read_sql(sql="SELECT * FROM asset_prices", con=pg_engine)
btc_usd = combined_df[combined_df.ticker == "BTC-USD"]
eth_usd = combined_df[combined_df.ticker == "ETH-USD"]
luna_usd = combined_df[combined_df.ticker == "LUNA-USD"]
tether_usd = combined_df[combined_df.ticker == "USDT-USD"]
ripple_usd = combined_df[combined_df.ticker == "XRP-USD"]
polkadot_usd = combined_df[combined_df.ticker == "DOT-USD"]
cardano_usd = combined_df[combined_df.ticker == "ADA-USD"]
binance_usd = combined_df[combined_df.ticker == "BNB-USD"]

2022-02-21 03:51:39,187 INFO sqlalchemy.engine.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2022-02-21 03:51:39,188 INFO sqlalchemy.engine.Engine [cached since 0.2923s ago] {'name': 'SELECT * FROM asset_prices'}
2022-02-21 03:51:39,193 INFO sqlalchemy.engine.Engine SELECT * FROM asset_prices
2022-02-21 03:51:39,193 INFO sqlalchemy.engine.Engine [raw sql] {}


In [13]:
# Other Data
sp500 = combined_df[combined_df.ticker == "^GSPC"]
gold = combined_df[combined_df.ticker == "GC=F"]

### Exploratory Analysis: Identifying if UCRY Reddut Lucey (Reddit Based) is a Leading Indicator
#### Overview of Approach
1. Convert to Darts' TimeSeries
2. Price and Volume at different Horizons
   * Adjusted Closing Price (hereafter ```Price```)
   * Trade Volumes 
3. Returns and Volatility at different Horizons
   * Price Returns
4. Compute Cross Correlations for each Pair of Asset Price / Volume / Price Returns across different horizons and UCRY


#### 1. Convert to Darts TimeSeries

In [14]:
# Helper to convert to TS Dataset
def pd_to_darts(df: pd.DataFrame, time_col: str = "date") -> TimeSeries:
    new_df = TimeSeries.from_dataframe(
        df.reset_index(drop=True).drop(columns=["ticker"]), time_col=time_col
    )
    return new_df

In [15]:
# Convert to Time Series Datasets
btc_usd_ts = pd_to_darts(btc_usd)
eth_usd_ts = pd_to_darts(eth_usd)
luna_usd_ts = pd_to_darts(luna_usd)
tether_usd_ts = pd_to_darts(tether_usd)
ripple_usd_ts = pd_to_darts(ripple_usd)
polkadot_usd_ts = pd_to_darts(polkadot_usd)
cardano_usd_ts = pd_to_darts(cardano_usd)
binance_usd_ts = pd_to_darts(binance_usd)
sp500_ts = pd_to_darts(sp500)
gold_ts = pd_to_darts(gold)

In [16]:
# Create TS datasets for relevant indices
ucry_reddit_price_ts = TimeSeries.from_dataframe(
    ucry_combined[ucry_combined.type == "Lucey-Reddit-Price"][
        ["start_date", "index_value"]
    ],
    time_col="start_date",
)
ucry_reddit_policy_ts = TimeSeries.from_dataframe(
    ucry_combined[ucry_combined.type == "Lucey-Reddit-Policy"][
        ["start_date", "index_value"]
    ],
    time_col="start_date",
)
ucry_vcrix_ts = TimeSeries.from_dataframe(
    ucry_combined[ucry_combined.type == "VCRIX"][["start_date", "index_value"]],
    time_col="start_date",
)

#### 2 Price and Volume Analyses
##### 2.1 UCRY against Asset Prices Analysis
Plot UCRY price and policy against Prices = Adjusted Closing Price (Weekly Granularity)
1. UCRY vs Prices at ***h = 0 weeks***
2. UCRY vs Prices at ***h = 4 weeks*** 
3. UCRY vs Prices at ***h = 12 weeks***
4. UCRY vs Prices at ***h = 24 weeks***


**Dataset Prep**

In [143]:
# Prep Datasets
asset = combined_df[["date", "ticker", "adj_close", "volume"]].rename(
    columns={
        "date": "Date",
        "ticker": "Ticker",
        "adj_close": "Price",
        "volume": "Volume",
    }
)

# Select relevant UCRY indices
ucry = ucry_combined[["start_date", "type", "index_value"]].rename(
    columns={
        "start_date": "Date",
        "type": "Index",
        "index_value": "Index Value (Standardised)",
    }
)[
    ucry_combined["type"].isin(
        [
            "Lucey-Original-Price",
            "Lucey-Original-Policy",
            "Lucey-Reddit-Price",
            "Lucey-Reddit-Policy",
            "VCRIX",
            "LDA-Reddit-Price",
            "LDA-Reddit-Policy",
            "Top2Vec-Reddit-Price",
            "Top2Vec-Reddit-Policy",
        ]
    )
]
ucry["Date"] = pd.to_datetime(ucry["Date"], utc=True).dt.tz_localize(None)

In [139]:
# Correlation P-val  Helper for DataFrame
def calculate_pvalues(df):
    df = df.dropna()._get_numeric_data()
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how="outer")
    for r in df.columns:
        for c in df.columns:
            pvalues[r][c] = round(stats.pearsonr(df[r], df[c])[1], 5)
    return pvalues

In [144]:
# Correaltion Matrix (Remove VCRIX first)
corr_df = ucry.dropna().pivot_table(
    index=ucry["Date"], values="Index Value (Standardised)", columns="Index"
)
corr_mat = corr_df.corr()
corr_mat[["VCRIX"]]

Index,VCRIX
Index,Unnamed: 1_level_1
LDA-Reddit-Policy,0.153195
LDA-Reddit-Price,0.147077
Lucey-Original-Policy,0.258539
Lucey-Original-Price,0.25511
Lucey-Reddit-Policy,0.191735
Lucey-Reddit-Price,0.138782
Top2Vec-Reddit-Policy,0.183765
Top2Vec-Reddit-Price,0.124188
VCRIX,1.0


In [146]:
calculate_pvalues(corr_df)["VCRIX"]

Index
LDA-Reddit-Policy        0.00934
LDA-Reddit-Price         0.01262
Lucey-Original-Policy    0.00001
Lucey-Original-Price     0.00001
Lucey-Reddit-Policy       0.0011
Lucey-Reddit-Price       0.01866
Top2Vec-Reddit-Policy    0.00177
Top2Vec-Reddit-Price     0.03548
VCRIX                        0.0
Name: VCRIX, dtype: object

In [63]:
alt.Chart(
    ucry[ucry["Index"].isin(["Top2Vec-Reddit-Price", "Top2Vec-Reddit-Policy"])]
).mark_line().encode(
    x="Date:T",
    y=alt.Y("Index Value (Standardised):Q", scale=alt.Scale(domain=[95, 115])),
    color=alt.Color("Index:N", scale=alt.Scale(scheme="plasma")),
    strokeDash="Index:N",
).properties(
    width=600
)

In [62]:
alt.Chart(
    ucry[ucry["Index"].isin(["Lucey-Reddit-Price", "Lucey-Reddit-Policy"])]
).mark_line().encode(
    x="Date:T",
    y=alt.Y("Index Value (Standardised):Q", scale=alt.Scale(domain=[95, 115])),
    color=alt.Color("Index:N", scale=alt.Scale(scheme="plasma")),
    strokeDash="Index:N",
).properties(
    width=600
)

In [60]:
alt.Chart(
    ucry[ucry["Index"].isin(["LDA-Reddit-Price", "LDA-Reddit-Policy"])]
).mark_line().encode(
    x="Date:T",
    y=alt.Y("Index Value (Standardised):Q", scale=alt.Scale(domain=[95, 115])),
    color=alt.Color("Index:N", scale=alt.Scale(scheme="plasma")),
    strokeDash="Index:N",
).properties(
    width=600
)

In [148]:
# Generate lags
lags_week = [1, 4, 12, 24, 52]

for lag in lags_week:
    asset[f"Price (Lag={lag})"] = asset["Price"].shift(-lag)

asset.head()

Unnamed: 0,Date,Ticker,Price,Volume,Price (Lag=1),Price (Lag=4),Price (Lag=12),Price (Lag=24),Price (Lag=52)
0,2014-09-15,BTC-USD,398.821014,156903400.0,377.181,389.54599,351.631989,274.354004,231.212006
1,2014-09-22,BTC-USD,377.181,186772600.0,320.51001,354.70401,320.842987,286.393005,232.757004
2,2014-09-29,BTC-USD,320.51001,276657896.0,378.549011,325.891998,317.239014,267.959991,238.259003
3,2014-10-06,BTC-USD,378.549011,341152804.0,389.54599,363.264008,264.195007,242.712997,247.050003
4,2014-10-13,BTC-USD,389.54599,156902070.0,354.70401,387.881989,265.660004,260.597992,261.643005


**Helper to Generate Faceted Chart of Price against UCRY**

In [191]:
def gen_Y_vs_ucry_chart(lags_in_weeks=None, Y="Price"):

    # Select horizon
    y_label = f"{Y} (Lag={lags_in_weeks})" if lags_in_weeks is not None else "Price"
    data = asset[asset["Ticker"] == "BTC-USD"][["Date", "Ticker", y_label]]

    # Asset Class Line Chart
    asset_line = (
        alt.Chart()
        .mark_line(interpolate="basis")
        .encode(
            x="Date:T",
            y=alt.Y(
                f"{y_label}:Q",
                scale=alt.Scale(type="log"),
                title="Weekly Price Returns (Log-Scale)",
            ),  # title=f'Lagged {Y} (Log-Scale) by {1 if lags_in_weeks is None else lags_in_weeks} Weeks'), # Change back to 0
            # color='Ticker:N',
            color=alt.value("#FF0000"),
            tooltip=["Date", y_label, "Ticker"],
        )
        .properties(width=600)
    )

    # UCRY Lucey Line Chart
    ucry_line = (
        alt.Chart(
            ucry[
                ucry.Index.str.contains(
                    "(Lucey-Reddit-Policy|Lucey-Reddit-Price|Top2Vec-Reddit-Policy)"
                )
            ]
        )
        .mark_line(interpolate="basis")
        .encode(
            x="Date:T",
            y=alt.Y("Index Value (Standardised):Q", scale=alt.Scale(domain=[93, 108])),
            color=alt.Color("Index:N", scale=alt.Scale(scheme="plasma")),
            strokeDash="Index:N",
            tooltip=["Date", "Index Value (Standardised)", "Index"],
        )
        .properties(width=600)
    )

    # Generate multiple charts for each Ticker
    price_chart_gen = (
        alt.layer(
            asset_line,
            ucry_line,
            data=data.dropna(),
            title=f"{asset_name} {Y} vs UCRY Indices",
        )
        .transform_filter(alt.datum.Ticker == asset_name)
        .resolve_scale(y="independent", x="independent")
        for asset_name in data.Ticker.unique()
    )

    alt.concat(*price_chart_gen, columns=1).display()

**Helper to create Correlation Matrix**

In [150]:
def get_pearson_corr(
    ts1: pd.DataFrame,
    ts2: pd.DataFrame,
    ts1_col: str,
    ts2_col: str,
    ts1_query: Optional[str] = None,
    ts2_query: Optional[str] = None,
    date_col: str = "Date",
) -> Dict[str, np.float]:
    # Handle Dates (Same time span)
    ts1 = ts1.dropna()
    if ts1_query:
        ts1 = ts1.query(ts1_query)
    ts2 = ts2.dropna()
    if ts2_query:
        ts2 = ts2.query(ts2_query)
    ts_combined = ts1.merge(ts2, on=date_col)

    # Compute correlation coefficient
    r, p = stats.pearsonr(ts_combined[ts1_col], ts_combined[ts2_col])
    return {"corr": r, "p-val": p}

In [151]:
def gen_ucry_Y_corr_mat(
    asset_df: pd.DataFrame, ucry_df: pd.DataFrame, asset_col: str, ucry_index_col: str
):
    tickers = asset_df.Ticker.unique().tolist()
    ucry = ucry_df.Index.unique().tolist()
    corr_mat = pd.DataFrame(columns=tickers, index=ucry)
    for t in tickers:
        for i in ucry:
            corr_mat.loc[i, t] = get_pearson_corr(
                asset_df,
                ucry_df,
                asset_col,
                ucry_index_col,
                f"Ticker == '{t}'",
                f"Index == '{i}'",
            ).get("corr", None)
    return corr_mat

**Prices where h = 0 weeks**

In [152]:
gen_Y_vs_ucry_chart()

In [23]:
gen_ucry_Y_corr_mat(
    asset,
    ucry[ucry.Index.str.contains("(Price|Policy)")],
    "Price",
    "Index Value (Standardised)",
)

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.101091,0.131824,0.019884,0.300295,0.051938,0.157283,0.064965,0.383884,-0.023505,0.067516
Lucey-Reddit-Policy,0.265019,0.244571,-0.004834,0.273914,0.179743,0.257297,0.037917,0.259864,0.15568,0.01081
LDA-Reddit-Price,0.367246,0.303458,-0.029495,0.341462,0.243288,0.295243,0.238912,0.175651,0.2154,0.250785
Top2Vec-Reddit-Price,0.144817,0.105305,0.016821,0.253818,0.038888,0.116872,0.084014,0.262345,0.020279,0.136767
Top2Vec-Reddit-Policy,0.171289,0.198207,-0.001427,0.167363,0.144245,0.193976,-0.054886,0.109901,0.056401,-0.134136
LDA-Reddit-Policy,0.065569,0.1547,0.060182,0.213308,0.098142,0.162788,0.030385,0.177875,-0.060617,-0.187774


In [24]:
get_pearson_corr(
    asset, ucry[ucry.Index == "VCRIX"], "Price", "Index Value (Standardised)"
)

{'corr': 0.03567562139198773, 'p-val': 0.09562719802446512}

**Prices where h = 1 weeks**

In [153]:
gen_Y_vs_ucry_chart(1)

In [154]:
gen_ucry_Y_corr_mat(asset, ucry, "Price (Lag=1)", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.099712,0.133166,0.00557,0.348313,0.052816,0.154498,0.047642,-0.008764,-0.019614,0.060027
Lucey-Reddit-Policy,0.26648,0.241599,0.023902,0.305926,0.1773,0.235874,0.012384,-0.031017,0.159052,0.008357
LDA-Reddit-Price,0.35881,0.315226,0.043056,0.352563,0.238954,0.281846,0.222819,-0.041092,0.219974,0.2434
Top2Vec-Reddit-Price,0.143405,0.115724,0.049237,0.291068,0.041909,0.115736,0.082409,-0.043806,0.025796,0.12998
Top2Vec-Reddit-Policy,0.169431,0.201353,0.012823,0.212265,0.148797,0.198559,-0.061471,-0.012863,0.064996,-0.133793
LDA-Reddit-Policy,0.062057,0.156092,0.043499,0.252292,0.088704,0.145754,-0.002023,-0.039521,-0.054189,-0.186743
Lucey-Original-Price,0.88402,0.835709,-0.035339,0.552955,0.788036,0.805288,0.655177,0.025641,0.642212,0.573689
Lucey-Original-Policy,0.885022,0.83889,-0.043376,0.569696,0.791496,0.805109,0.67918,0.03518,0.637021,0.496681
VCRIX,0.237869,0.162244,0.010959,0.182929,0.11864,0.206882,0.317039,-0.00261,0.159141,0.096357


**Price where h = 4 Weeks**

In [156]:
gen_Y_vs_ucry_chart(4)

In [157]:
gen_ucry_Y_corr_mat(asset, ucry, "Price (Lag=4)", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.117383,0.139804,0.022557,0.403514,0.052793,0.125873,0.043548,-0.017817,-0.021533,0.054573
Lucey-Reddit-Policy,0.276411,0.240693,0.016076,0.263648,0.187642,0.18509,0.039473,-0.016171,0.151218,0.009888
LDA-Reddit-Price,0.355278,0.304888,0.057045,0.386761,0.219083,0.186645,0.112622,-0.052563,0.222182,0.250028
Top2Vec-Reddit-Price,0.158451,0.127106,0.054833,0.382085,0.037833,0.08911,0.061507,-0.045374,0.032583,0.132945
Top2Vec-Reddit-Policy,0.175219,0.185098,-0.006492,0.237431,0.134772,0.167996,-0.075297,0.011405,0.050196,-0.12023
LDA-Reddit-Policy,0.062409,0.140486,0.00156,0.31138,0.068039,0.122665,-0.094444,0.004218,-0.059186,-0.174849
Lucey-Original-Price,0.851859,0.803536,-0.136804,0.53082,0.765617,0.694263,0.595699,0.129056,0.631143,0.578664
Lucey-Original-Policy,0.844089,0.796844,-0.164461,0.553887,0.76028,0.714497,0.591628,0.157838,0.628831,0.50158
VCRIX,0.23971,0.153108,0.005511,0.085491,0.145123,0.141064,0.389756,-0.008486,0.174187,0.114027


**Price where h = 12 Weeks**

In [158]:
gen_Y_vs_ucry_chart(12)

In [159]:
gen_ucry_Y_corr_mat(asset, ucry, "Price (Lag=12)", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.135021,0.177718,0.013744,0.04389,0.131996,0.102322,0.241176,-0.036516,-0.023441,0.043838
Lucey-Reddit-Policy,0.308309,0.304112,-0.004232,0.05327,0.27863,0.212292,0.207103,-0.001814,0.121861,0.008962
LDA-Reddit-Price,0.321736,0.253919,-0.101825,0.044,0.238672,0.27468,0.211734,0.097151,0.212114,0.226133
Top2Vec-Reddit-Price,0.176499,0.153492,-0.003847,0.016658,0.115438,0.084937,0.31294,-0.032721,0.033483,0.12521
Top2Vec-Reddit-Policy,0.233186,0.275811,0.01138,0.069113,0.262158,0.204979,0.260295,0.011836,0.016965,-0.114016
LDA-Reddit-Policy,0.075961,0.167118,-0.031862,0.059415,0.126092,0.160922,0.161012,0.038591,-0.087632,-0.183611
Lucey-Original-Price,0.703282,0.72031,0.044761,0.310477,0.73389,0.710446,0.223252,0.329819,0.615758,0.573015
Lucey-Original-Policy,0.692311,0.711002,0.027557,0.32568,0.724766,0.718614,0.201663,0.341297,0.610432,0.49432
VCRIX,0.221762,0.204757,-0.037071,0.009349,0.197574,0.128649,0.437493,-0.016698,0.18106,0.144644


**Price where h = 24 Weeks**

In [160]:
gen_Y_vs_ucry_chart(24)

In [161]:
gen_ucry_Y_corr_mat(asset, ucry, "Price (Lag=24)", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.130248,0.190113,-0.019991,-0.017711,0.153337,-0.002439,0.247382,-0.049308,-0.03831,0.042819
Lucey-Reddit-Policy,0.268254,0.331604,-0.017867,0.03868,0.309985,0.068786,0.256314,0.011396,0.088661,0.012098
LDA-Reddit-Price,0.277623,0.279661,-0.003745,0.090004,0.263532,0.087751,0.110853,0.084288,0.209166,0.231485
Top2Vec-Reddit-Price,0.156781,0.177098,-0.045316,-0.033926,0.147186,-0.0219,0.221221,-0.050984,0.023436,0.125464
Top2Vec-Reddit-Policy,0.2018,0.319217,-0.070614,0.022895,0.309016,0.083608,0.272925,0.014111,-0.007784,-0.102975
LDA-Reddit-Policy,0.085658,0.236213,0.015938,0.030325,0.235481,0.050889,0.260105,-0.006134,-0.105353,-0.166964
Lucey-Original-Price,0.416685,0.41623,0.042597,0.58987,0.396505,0.559263,-0.34276,0.591329,0.589414,0.576503
Lucey-Original-Policy,0.412882,0.425152,0.030004,0.587596,0.400849,0.576204,-0.356796,0.587482,0.583774,0.507809
VCRIX,0.037098,-0.029091,-0.049255,0.025683,-0.030909,0.034233,-0.317621,0.107868,0.138068,0.135776


##### 2.2 UCRY against Asset Volume Analysis
Plot UCRY price and policy against Prices = Trading Volume (Weekly Granularity)
1. UCRY vs Volume at ***h = 0***
2. UCRY vs Volume at ***h = 4*** 
3. UCRY vs Volume at ***h = 12***
4. UCRY vs Volume at ***h = 24***

In [165]:
# Generate lags
for lag in lags_week:
    asset[f"Volume (Lag={lag})"] = asset["Volume"].shift(-lag)

asset.head()

Unnamed: 0,Date,Ticker,Price,Volume,Price (Lag=1),Price (Lag=4),Price (Lag=12),Price (Lag=24),Price (Lag=52),Volume (Lag=1),Volume (Lag=4),Volume (Lag=12),Volume (Lag=24),Volume (Lag=52)
0,2014-09-15,BTC-USD,398.821014,156903400.0,377.181,389.54599,351.631989,274.354004,231.212006,186772600.0,156902070.0,141754100.0,242425100.0,126654700.0
1,2014-09-22,BTC-USD,377.181,186772600.0,320.51001,354.70401,320.842987,286.393005,232.757004,276657896.0,113691800.0,184757400.0,259502000.0,137307600.0
2,2014-09-29,BTC-USD,320.51001,276657896.0,378.549011,325.891998,317.239014,267.959991,238.259003,341152804.0,107075700.0,107137540.0,206778200.0,136796200.0
3,2014-10-06,BTC-USD,378.549011,341152804.0,389.54599,363.264008,264.195007,242.712997,247.050003,156902070.0,116793470.0,143354400.0,179497700.0,142478600.0
4,2014-10-13,BTC-USD,389.54599,156902070.0,354.70401,387.881989,265.660004,260.597992,261.643005,113691800.0,208239500.0,164241500.0,150120700.0,199808300.0


**Volume when h=0**

In [166]:
gen_Y_vs_ucry_chart(Y="Volume")

In [167]:
gen_ucry_Y_corr_mat(asset, ucry, "Volume", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.049546,0.094788,0.041371,0.097569,0.141936,0.211877,0.586345,0.333965,0.006118,-0.106259
Lucey-Reddit-Policy,0.280303,0.329672,0.268334,0.24423,0.265457,0.363052,0.48442,0.19947,-0.033594,-0.006728
LDA-Reddit-Price,0.325541,0.285043,0.26785,0.260661,0.275058,0.338219,0.520108,0.163974,0.038712,0.013398
Top2Vec-Reddit-Price,0.121523,0.102364,0.067324,0.142264,0.124766,0.193981,0.570213,0.2555,0.030805,-0.057751
Top2Vec-Reddit-Policy,0.190467,0.309589,0.258672,0.210731,0.227667,0.3191,0.481463,0.099714,-0.033642,-0.008681
LDA-Reddit-Policy,0.053906,0.169809,0.149839,0.160491,0.159617,0.229386,0.403611,0.191925,-0.010301,-0.030259
Lucey-Original-Price,0.739147,0.73868,0.776206,0.596481,0.762255,0.845826,0.697166,-0.085309,0.01182,0.289512
Lucey-Original-Policy,0.731012,0.727377,0.767409,0.603894,0.76061,0.831135,0.68845,-0.066947,0.006233,0.279031
VCRIX,0.202344,0.04347,0.003109,-0.07348,0.075818,0.111463,0.244522,0.187532,0.045041,0.098642


**Volume when h=4**

In [168]:
gen_Y_vs_ucry_chart(4, Y="Volume")

In [169]:
gen_ucry_Y_corr_mat(asset, ucry, "Volume (Lag=4)", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,-0.009504,-0.067393,-0.106979,0.005507,0.046933,0.08482,0.080627,0.39296,-0.00318,-0.130101
Lucey-Reddit-Policy,0.177984,0.12674,0.098426,0.096329,0.200298,0.201562,0.091643,0.116375,-0.000899,-0.065199
LDA-Reddit-Price,0.25665,0.146549,0.118951,0.223399,0.180379,0.231675,0.131623,0.279047,-0.010605,-0.051555
Top2Vec-Reddit-Price,0.056727,-0.046262,-0.081926,0.066455,0.0309,0.083518,0.109356,0.42699,-0.01847,-0.106019
Top2Vec-Reddit-Policy,0.103606,0.113183,0.086637,0.125101,0.145592,0.189443,0.013563,0.11501,0.005169,-0.103203
LDA-Reddit-Policy,-0.013576,0.023818,0.001434,0.120822,0.037039,0.065631,-0.130912,0.263439,-0.002856,-0.115456
Lucey-Original-Price,0.623808,0.55802,0.600703,0.394727,0.648445,0.677874,0.363649,-0.098872,0.143614,0.135061
Lucey-Original-Policy,0.62151,0.567079,0.607536,0.396388,0.663233,0.684773,0.377407,-0.078102,0.172696,0.136844
VCRIX,0.126593,-0.073025,-0.058302,-0.157956,0.061761,0.06391,0.068754,-0.0499,0.019046,0.003177


**Volume when h=12**

In [36]:
gen_Y_vs_ucry_chart(12, Y="Volume")

In [177]:
gen_ucry_Y_corr_mat(asset, ucry, "Volume (Lag=12)", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,-0.125957,0.127421,0.183514,0.212094,0.301717,0.32396,-0.04872,-0.006235,-0.021849,-0.159184
Lucey-Reddit-Policy,0.088979,0.18407,0.231946,0.235263,0.300652,0.322287,-0.157536,0.030884,0.029088,-0.044683
LDA-Reddit-Price,0.102345,0.185291,0.227804,0.182896,0.363274,0.352453,-0.198447,0.106795,0.122734,-0.023117
Top2Vec-Reddit-Price,-0.057187,0.126414,0.167338,0.157834,0.28385,0.301725,-0.159074,-0.019121,-0.007603,-0.11635
Top2Vec-Reddit-Policy,0.080596,0.173372,0.232466,0.266051,0.302595,0.339356,-0.217079,0.01995,0.024331,-0.023886
LDA-Reddit-Policy,0.001451,0.16778,0.187252,0.163992,0.230642,0.301129,-0.337438,0.042562,0.024695,-0.028451
Lucey-Original-Price,0.387165,0.33651,0.36998,0.186277,0.536476,0.48842,0.40956,0.085396,0.356911,0.170422
Lucey-Original-Policy,0.378956,0.333514,0.365491,0.181186,0.547844,0.494837,0.33141,0.091655,0.365283,0.150295
VCRIX,0.038193,-0.124795,-0.058699,-0.081039,0.06242,0.059666,0.690779,-0.025531,0.024069,-0.091162


**Volume when h=24**

In [38]:
gen_Y_vs_ucry_chart(24, Y="Volume")

In [178]:
gen_ucry_Y_corr_mat(asset, ucry, "Volume (Lag=24)", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,-0.155178,-0.050205,-0.013212,-0.12892,0.100485,0.031834,0.178577,0.012178,-0.029983,-0.259426
Lucey-Reddit-Policy,0.00319,0.020553,0.072208,-0.081486,0.162781,0.083333,-0.035929,-0.001038,0.046707,-0.100207
LDA-Reddit-Price,0.025591,0.023151,0.022121,-0.078524,0.159043,0.091848,0.031926,-0.001213,0.125641,-0.186902
Top2Vec-Reddit-Price,-0.091415,-0.002942,-0.00247,-0.105298,0.075622,0.011049,-0.088313,0.00125,-0.012613,-0.254203
Top2Vec-Reddit-Policy,0.031612,0.078851,0.095715,-0.046758,0.180702,0.133018,-0.225901,-0.015966,0.019622,-0.06076
LDA-Reddit-Policy,-0.034429,0.06337,0.085461,0.010061,0.18685,0.100994,-0.302478,0.030576,-0.01803,-0.094389
Lucey-Original-Price,0.115746,-0.100831,-0.070791,-0.145844,0.136366,0.20649,0.460128,0.112892,0.609748,-0.072235
Lucey-Original-Policy,0.102646,-0.099255,-0.073055,-0.152422,0.136378,0.211816,0.545205,0.126295,0.599441,-0.094483
VCRIX,0.017942,-0.26495,-0.261751,-0.258647,-0.169764,-0.201649,0.248602,0.070932,0.145153,-0.125614


#### 3 Price Returns

Price Return = $\frac{ \sum_{i=1}^{N}{n_iP_i} } { D }$

* Weekly, where weeks = 1
* Monthly, where weeks = 4
* Annual, where weeks = 52

NOTE: Should we be using number of trading days in a month instead (Like 21?)

In [170]:
# Get Weekly, Monthly and Annual Returns
asset["Weekly Price Returns"] = asset.groupby("Ticker")["Price"].pct_change(1)
asset["Monthly Price Returns"] = asset.groupby("Ticker")["Price"].pct_change(4)
asset["Yearly Price Returns"] = asset.groupby("Ticker")["Price"].pct_change(52)

**Weekly Price Returns**

In [192]:
gen_Y_vs_ucry_chart(Y="Weekly Price Returns")

In [172]:
gen_ucry_Y_corr_mat(asset, ucry, "Weekly Price Returns", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,-0.120147,-0.271833,0.018289,-0.174973,-0.18628,-0.171453,-0.402909,-0.035094,0.034824,-0.062075
Lucey-Reddit-Policy,-0.195381,-0.22292,0.108915,-0.213609,-0.195837,-0.057857,-0.325905,0.080364,-0.002443,-0.156295
LDA-Reddit-Price,0.003793,-0.046299,0.029891,0.000715,-0.056219,0.030446,0.096359,0.005257,0.011177,-0.037757
Top2Vec-Reddit-Price,-0.010764,-0.173455,0.071864,-0.063332,-0.107853,-0.062208,-0.067419,0.020039,0.011654,-0.073776
Top2Vec-Reddit-Policy,-0.08912,-0.151576,0.055446,-0.133846,-0.108633,-0.018026,-0.196156,0.071391,-0.030117,-0.181231
LDA-Reddit-Policy,-0.011058,-0.192674,0.064093,-0.126332,-0.089033,-0.081109,-0.113549,0.063033,-0.007354,-0.060587
Lucey-Original-Price,-0.060773,-0.004703,-0.005583,0.06903,0.068426,0.072871,-0.362422,-0.105009,-0.024935,-0.000833
Lucey-Original-Policy,-0.051837,-0.003684,-0.01165,0.056705,0.067818,0.064271,-0.223576,-0.102207,-0.021134,0.012205
VCRIX,0.014475,0.001493,0.017294,-0.014888,0.093496,0.057235,0.57664,-0.027645,0.106673,0.040685


**Monthly Price Returns**

In [173]:
gen_Y_vs_ucry_chart(Y="Monthly Price Returns")

In [174]:
gen_ucry_Y_corr_mat(asset, ucry, "Monthly Price Returns", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.096276,-0.078541,0.032396,-0.143338,-0.050523,0.038268,-0.265444,0.008488,0.021373,-0.003331
Lucey-Reddit-Policy,-0.029289,-0.039484,0.115343,-0.147616,-0.097028,0.028658,-0.330482,-0.086367,0.066331,-0.100221
LDA-Reddit-Price,0.244276,0.142306,0.011594,-0.022486,0.080349,0.147566,0.267869,-0.055777,0.041022,0.014928
Top2Vec-Reddit-Price,0.203787,-0.000281,0.070486,-0.090303,-0.013279,0.056649,-0.0282,-0.009006,0.021903,-0.013469
Top2Vec-Reddit-Policy,0.087367,-0.035405,0.153328,-0.164556,-0.062344,0.034373,-0.1741,-0.099817,0.001762,-0.15043
LDA-Reddit-Policy,0.172129,-0.106097,0.083571,-0.142898,-0.015836,-0.006604,-0.123714,-0.036491,0.006854,-0.080906
Lucey-Original-Price,0.020325,0.175288,-0.021602,0.190661,0.238798,0.252294,-0.035775,-0.092218,-0.023603,0.050665
Lucey-Original-Policy,0.016379,0.159791,-0.026867,0.202369,0.24534,0.239824,-0.021226,-0.087762,-0.018523,0.056883
VCRIX,-0.098089,-0.124391,0.174596,-0.131071,0.013483,-0.018818,0.777844,-0.051597,0.212215,-0.051029


**Annual Price Returns**

In [175]:
gen_Y_vs_ucry_chart(Y="Yearly Price Returns")

In [176]:
gen_ucry_Y_corr_mat(asset, ucry, "Yearly Price Returns", "Index Value (Standardised)")

Unnamed: 0,BTC-USD,ETH-USD,USDT-USD,XRP-USD,BNB-USD,ADA-USD,DOT-USD,LUNA-USD,GC=F,^GSPC
Lucey-Reddit-Price,0.48058,0.357784,-0.145971,0.348679,0.322932,0.320689,-0.304562,0.032447,-0.170438,0.296982
Lucey-Reddit-Policy,0.32399,0.405694,-0.141242,0.365296,0.329109,0.347194,0.328492,0.11792,-0.034058,0.199116
LDA-Reddit-Price,0.445062,0.481594,-0.195477,0.442516,0.406151,0.391862,0.612048,0.105003,-0.103535,0.306315
Top2Vec-Reddit-Price,0.473518,0.294153,-0.189319,0.288759,0.253946,0.246605,0.199198,-0.014624,-0.146053,0.279902
Top2Vec-Reddit-Policy,0.248991,0.313702,-0.136802,0.256754,0.23175,0.239038,0.03081,0.057651,-0.121059,0.122663
LDA-Reddit-Policy,0.333444,0.260564,-0.138651,0.237557,0.202724,0.213179,0.290301,0.127837,-0.178021,0.198144
Lucey-Original-Price,0.334271,0.805346,-0.077495,0.741259,0.720019,0.767637,0.016083,0.132591,-0.004323,0.22401
Lucey-Original-Policy,0.349737,0.81371,-0.08555,0.753761,0.731345,0.77503,0.086928,0.143731,-0.004268,0.186635
VCRIX,0.15478,0.116453,0.179743,0.105704,0.133793,0.111589,0.040587,-0.098843,-0.087007,-0.229659


### Stationarity Checks and Inter-Index Correlations
1. Stationarity Check of Time Series
2. Correlation Across Uncertainty Indices (Lucey Reddit, Lucery Original and VRIX)

#### Stationarity of Reddit Lucey and VCRIX

In [47]:
adf_val, p_val, num_lags, _, _, _ = dstats.stationarity_test_adf(ucry_reddit_price_ts)

print(
    f"""
UCRY REDDIT PRICE
ADF Value: {adf_val}
P-Value: {p_val}
Number of Lags: {num_lags}
Is Stationary (5%): {p_val < 0.05}
"""
)


UCRY REDDIT PRICE
ADF Value: -3.315264785073671
P-Value: 0.014209774949259426
Number of Lags: 10
Is Stationary (5%): True



In [48]:
adf_val, p_val, num_lags, _, _, _ = dstats.stationarity_test_adf(ucry_reddit_policy_ts)

print(
    f"""
UCRY REDDIT POLICY
ADF Value: {adf_val}
P-Value: {p_val}
Number of Lags: {num_lags}
Is Stationary (5%): {p_val < 0.05}
"""
)


UCRY REDDIT POLICY
ADF Value: -7.107347609941217
P-Value: 4.021562435323942e-10
Number of Lags: 2
Is Stationary (5%): True



In [49]:
adf_val, p_val, num_lags, _, _, _ = dstats.stationarity_test_adf(ucry_vcrix_ts)

print(
    f"""
VCRIX
ADF Value: {adf_val}
P-Value: {p_val}
Number of Lags: {num_lags}
Is Stationary (5%): {p_val < 0.05}
"""
)


VCRIX
ADF Value: -4.715913069837575
P-Value: 7.854730573159154e-05
Number of Lags: 9
Is Stationary (5%): True



We see that for our Lucey-Reddit Index TS values and for VCRIX, they are stationary at the 5% significance level

#### Correlation between VCRIX and Lucey Reddit

In [50]:
price_vcrix = (
    ucry_combined[ucry_combined.type == "Lucey-Reddit-Price"]
    .dropna()
    .merge(ucry_combined[ucry_combined.type == "VCRIX"], on="start_date")
)

r, p = stats.pearsonr(price_vcrix["index_value_x"], price_vcrix["index_value_y"])

print(f"VCRIX vs Lucey Reddit Price Index Correlation: {r} | P-Value: {p}")

ValueError: x and y must have length at least 2.

In [None]:
policy_vcrix = (
    ucry_combined[ucry_combined.type == "Lucey-Reddit-Policy"]
    .dropna()
    .merge(ucry_combined[ucry_combined.type == "VCRIX"], on="start_date")
)

r, p = stats.pearsonr(policy_vcrix["index_value_x"], policy_vcrix["index_value_y"])

print(f"VCRIX vs Lucey Reddit Price Index Correlation: {r} | P-Value: {p}")

VCRIX vs Lucey Reddit Price Index Correlation: 0.19173500100962493 | P-Value: 0.0010971179972081222
