## Time Series Correlation Analysis across various UCRY indices
### Set Up

In [30]:
# Load Libraries
import os

os.chdir("..")
import pandas as pd
import numpy as np
import scipy.stats as stats
from pathlib import Path
from datetime import datetime

In [13]:
# Index data
index_data_path = Path("crypto_index") / "index_data"

# Original Lucey
ucry_original = pd.read_csv(index_data_path / "ucry_lucey_original.csv")
ucry_original_policy = ucry_original[["Timeline", "UCRY Policy Index"]]
ucry_original_price = ucry_original[["Timeline", "UCRY Price Index"]]

# Reddit based index
ucry_reddit_price = pd.read_csv(index_data_path / "ucry_lucey_price.csv")
ucry_reddit_policy = pd.read_csv(index_data_path / "ucry_lucey_policy.csv")

In [25]:
# Break up timeline column
ucry_original_policy[["start_date", "end_date"]] = ucry_original_policy[
    "Timeline"
].str.split(" - ", expand=True)

ucry_original_price[["start_date", "end_date"]] = ucry_original_price[
    "Timeline"
].str.split(" - ", expand=True)

In [34]:
# Convert to datetime
ucry_reddit_policy["start_date"], ucry_reddit_policy["end_date"] = (
    pd.to_datetime(ucry_reddit_policy["start_date"], utc=True),
    pd.to_datetime(ucry_reddit_policy["end_date"], utc=True),
)

ucry_reddit_price["start_date"], ucry_reddit_price["end_date"] = (
    pd.to_datetime(ucry_reddit_price["start_date"], utc=True),
    pd.to_datetime(ucry_reddit_price["end_date"], utc=True),
)

### Pearson Correlation Analysis
**Price Correlation Analysis**

In [40]:
# Filter Reddit based Indcies to end at 21/02/2021
ucry_reddit_price_trim = ucry_reddit_price[ucry_reddit_price.end_date <= "2021-02-22"]
ucry_reddit_policy_trim = ucry_reddit_policy[ucry_reddit_price.end_date <= "2021-02-22"]

In [41]:
r, p = stats.pearsonr(
    ucry_original_price.dropna()["UCRY Price Index"],
    ucry_reddit_price_trim.dropna()["index_value"],
)

print(f"Price Index Correlation: {r} | P-Value: {p}")

Price Index Correlation: 0.30448683031496315 | P-Value: 1.9247203819116855e-09


**Policy Correlation Analysis**

In [42]:
r, p = stats.pearsonr(
    ucry_original_policy.dropna()["UCRY Policy Index"],
    ucry_reddit_policy_trim.dropna()["index_value"],
)

print(f"Policy Index Correlation: {r} | P-Value: {p}")

Policy Index Correlation: 0.31024076417114127 | P-Value: 9.161402448878682e-10


**Reddit Price Vs Policy**

In [44]:
r, p = stats.pearsonr(
    ucry_reddit_price_trim.dropna()["index_value"],
    ucry_reddit_policy_trim.dropna()["index_value"],
)

print(f"Reddit Price vs Policy Index Correlation: {r} | P-Value: {p}")

Reddit Price vs Policy Index Correlation: 0.6029701377767512 | P-Value: 2.6916336775149503e-38


**Reddit Price Vs Lucey Policy**

In [45]:
r, p = stats.pearsonr(
    ucry_reddit_price_trim.dropna()["index_value"],
    ucry_original_policy.dropna()["UCRY Policy Index"],
)

print(f"Reddit Price vs Lucey Policy Index Correlation: {r} | P-Value: {p}")

Reddit Price vs Lucy Policy Index Correlation: 0.30305844782443936 | P-Value: 2.308465440296555e-09


**Reddit Policy Vs Lucey Price**

In [46]:
r, p = stats.pearsonr(
    ucry_reddit_policy_trim.dropna()["index_value"],
    ucry_original_price.dropna()["UCRY Price Index"],
)

print(f"Lucey Price vs Reddit Policy Index Correlation: {r} | P-Value: {p}")

Lucey Price vs Reddit Policy Index Correlation: 0.30795859373219925 | P-Value: 1.2321787201605176e-09


**Lucey Vs Lucey**

In [48]:
r, p = stats.pearsonr(
    ucry_original_policy.dropna()["UCRY Policy Index"],
    ucry_original_price.dropna()["UCRY Price Index"],
)

print(f"Lucey Price vs Lucey Policy Index Correlation: {r} | P-Value: {p}")

Lucey Price vs Lucey Policy Index Correlation: 0.9746483492453702 | P-Value: 2.4241584203953726e-243
