## Time Series Correlation Analysis across various UCRY indices
### Set Up

In [1]:
# Load Libraries
import os
os.chdir("..")
import pandas as pd
from datetime import datetime
import isoweek
import numpy as np
import scipy.stats as stats
from pathlib import Path
from datetime import datetime

In [2]:
# Index data
index_data_path = Path("pipelines/crypto_index") / "index_data"

# Original Lucey
ucry_original = pd.read_csv(index_data_path / "ucry_lucey_original_.csv")
ucry_original_policy = ucry_original[["Date", "UCRY Policy"]]
ucry_original_price = ucry_original[["Date", "UCRY Price"]]

# Reddit based index
ucry_reddit_price = pd.read_csv(index_data_path / "ucry_lucey_price.csv")
ucry_reddit_policy = pd.read_csv(index_data_path / "ucry_lucey_policy.csv")

In [3]:
def get_week_start_end_from_date(date):
    date = datetime.strptime(date, '%Y-%m-%d')
    year, week = date.isocalendar()[0], date.isocalendar()[1] # year, week, weekday
    w = isoweek.Week(year, week)
    return w.monday(), w.sunday()

In [4]:
# Create start date and end date
ucry_original_policy[["start_date", "end_date"]] = (
    pd.DataFrame(
        ucry_original_policy["Date"]
        .apply(lambda x: get_week_start_end_from_date(x)).tolist(),
        index=ucry_original_policy.index
    )
)

ucry_original_policy.drop(columns=["Date"], inplace=True)

ucry_original_price[["start_date", "end_date"]] = (
    pd.DataFrame(
        ucry_original_price["Date"]
        .apply(lambda x: get_week_start_end_from_date(x)).tolist(),
        index=ucry_original_price.index
    )
)

ucry_original_price.drop(columns=["Date"], inplace=True)

In [5]:
# Convert to datetime
ucry_reddit_policy["start_date"], ucry_reddit_policy["end_date"] = (
    pd.to_datetime(ucry_reddit_policy["start_date"], utc=True),
    pd.to_datetime(ucry_reddit_policy["end_date"], utc=True),
)

ucry_reddit_price["start_date"], ucry_reddit_price["end_date"] = (
    pd.to_datetime(ucry_reddit_price["start_date"], utc=True),
    pd.to_datetime(ucry_reddit_price["end_date"], utc=True),
)

### Pearson Correlation Analysis
**Price Correlation Analysis**

In [6]:
r, p = stats.pearsonr(
    ucry_original_price.dropna()["UCRY Price"],
    ucry_reddit_price.dropna()["index_value"],
)

print(f"Price Index Correlation: {r} | P-Value: {p}")

Price Index Correlation: 0.278213014756129 | P-Value: 7.222720390237522e-09


**Policy Correlation Analysis**

In [7]:
r, p = stats.pearsonr(
    ucry_original_policy.dropna()["UCRY Policy"],
    ucry_reddit_policy.dropna()["index_value"],
)

print(f"Policy Index Correlation: {r} | P-Value: {p}")

Policy Index Correlation: 0.37919918520286205 | P-Value: 9.633226768940495e-16


**Reddit Price Vs Policy**

In [8]:
r, p = stats.pearsonr(
    ucry_reddit_price.dropna()["index_value"],
    ucry_reddit_policy.dropna()["index_value"],
)

print(f"Reddit Price vs Policy Index Correlation: {r} | P-Value: {p}")

Reddit Price vs Policy Index Correlation: 0.642649657345846 | P-Value: 4.567062351223928e-50


**Reddit Price Vs Lucey Policy**

In [10]:
r, p = stats.pearsonr(
    ucry_reddit_price.dropna()["index_value"],
    ucry_original_policy.dropna()["UCRY Policy"],
)

print(f"Reddit Price vs Lucey Policy Index Correlation: {r} | P-Value: {p}")

Reddit Price vs Lucey Policy Index Correlation: 0.2734892581431104 | P-Value: 1.319883396166118e-08


**Reddit Policy Vs Lucey Price**

In [11]:
r, p = stats.pearsonr(
    ucry_reddit_policy.dropna()["index_value"],
    ucry_original_price.dropna()["UCRY Price"],
)

print(f"Lucey Price vs Reddit Policy Index Correlation: {r} | P-Value: {p}")

Lucey Price vs Reddit Policy Index Correlation: 0.37698508935483005 | P-Value: 1.4544683744023738e-15


**Lucey Vs Lucey**

In [12]:
r, p = stats.pearsonr(
    ucry_original_policy.dropna()["UCRY Policy"],
    ucry_original_price.dropna()["UCRY Price"],
)

print(f"Lucey Price vs Lucey Policy Index Correlation: {r} | P-Value: {p}")

Lucey Price vs Lucey Policy Index Correlation: 0.9884880677506681 | P-Value: 0.0
