# Principal Component Correlation Robustness Data Analysis at Crypto Market

Date: 2024-08-26
  

In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# Define the ticker symbols
commodity_tickers = ["GC=F", "CL=F", "DX-Y.NYB"]
equity_tickers = ["SPY", "^VIX", "TLT", "QQQ", "NVDA"]
crypto_tickers = ["BTC-USD", "SOL-USD"]

tickers = commodity_tickers + equity_tickers + crypto_tickers

In [3]:
# Specify the period and interval
commodity_data = yf.download(commodity_tickers, period="1d", interval="1m", group_by='ticker')
index_data = yf.download(equity_tickers, period="1d", interval="1m", group_by='ticker')
crypto_data = yf.download(crypto_tickers, period="1d", interval="1m", group_by='ticker')

eth_data = yf.download('ETH-USD', period="1d", interval="1m", group_by='ticker')

# Combine all data into a single DataFrame
data = pd.concat([commodity_data, index_data, crypto_data], axis=1)

[*********************100%%**********************]  3 of 3 completed
[*********************100%%**********************]  5 of 5 completed
[*********************100%%**********************]  2 of 2 completed
[*********************100%%**********************]  1 of 1 completed


In [4]:
# Extract the 'Close' data for each ticker
close_data = data.xs('Close', level=1, axis=1)
rt_data = close_data.pct_change()
target_rt = eth_data['Close'].pct_change()
rt_data.dropna(inplace=True)
target_rt.dropna(inplace=True)

In [5]:
rt_data

Ticker,GC=F,CL=F,DX-Y.NYB,^VIX,SPY,QQQ,NVDA,TLT,BTC-USD,SOL-USD
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-08-27 13:31:00+00:00,0.000629,0.000000,0.000069,-0.001804,0.000000,0.000071,0.000442,0.000050,0.000000,0.001305
2024-08-27 13:32:00+00:00,0.000000,0.001564,0.000139,0.002410,-0.000170,-0.000155,-0.001205,-0.000204,-0.000553,0.000238
2024-08-27 13:33:00+00:00,-0.000197,-0.000651,-0.000079,0.000000,0.000447,0.001119,0.006825,-0.000359,-0.000880,-0.000611
2024-08-27 13:34:00+00:00,-0.000039,0.000391,0.000040,-0.002404,-0.000322,-0.000802,-0.000159,0.000205,0.001321,-0.000091
2024-08-27 13:35:00+00:00,0.001297,-0.000911,-0.000198,0.004819,-0.000250,-0.000443,-0.000634,0.000564,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...
2024-08-27 19:00:00+00:00,0.000000,0.000000,0.000000,0.000000,-0.000014,-0.000126,-0.000087,-0.000204,0.000000,0.000000
2024-08-27 19:01:00+00:00,0.000000,0.000000,0.000000,0.000000,-0.000003,0.000053,-0.000179,-0.000153,0.000000,0.000619
2024-08-27 19:02:00+00:00,0.000000,0.000000,0.000000,0.000000,-0.000125,-0.000210,-0.000319,0.000102,-0.000363,0.000000
2024-08-27 19:03:00+00:00,0.000000,0.000000,0.000000,0.000000,-0.000206,-0.000220,-0.000421,0.000102,0.000000,0.000000


In [8]:
close_data

Ticker,GC=F,CL=F,DX-Y.NYB,^VIX,SPY,QQQ,NVDA,TLT,BTC-USD,SOL-USD
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-08-27 00:00:00+00:00,,,,,,,,,62856.585938,
2024-08-27 00:01:00+00:00,,,,,,,,,,157.201462
2024-08-27 00:02:00+00:00,,,,,,,,,62844.976562,157.242035
2024-08-27 00:03:00+00:00,,,,,,,,,62794.175781,157.246384
2024-08-27 00:05:00+00:00,,,,,,,,,,157.487518
...,...,...,...,...,...,...,...,...,...,...
2024-08-27 19:00:00+00:00,,,,,561.581970,476.849915,128.598007,97.849998,,
2024-08-27 19:01:00+00:00,,,,,561.580017,476.875000,128.574997,97.834999,,155.451172
2024-08-27 19:02:00+00:00,,,,,561.510010,476.774994,128.533997,97.845001,62080.730469,
2024-08-27 19:03:00+00:00,,,,,561.394287,476.670013,128.479904,97.855003,,


In [16]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA

def rolling_window_pca(df, window_size, n_components, step=1):
    transformed_data = []
    index_list = []
    print(len(df))
    # Loop over the data using a rolling window
    for start in range(0, len(df) - window_size + 1, step):
        end = start + window_size
        window_data = df.iloc[start:end]
        # Perform PCA on the current window
        window_data = (window_data - window_data.mean(axis=0)) / window_data.std(axis=0)
        pca = PCA(n_components=n_components)
        window_transformed = pca.fit_transform(window_data)[-1, :]
        
        # Store the result and the corresponding index
        transformed_data.append(window_transformed)
        index_list.append(df.index[end-1])

    # Combine the results into a DataFrame
    transformed_df = pd.DataFrame(
        transformed_data,
        index=index_list,
        columns=[f'PC{i+1}' for i in range(n_components)]
    )

    return transformed_df


In [17]:
pca_df = rolling_window_pca(rt_data.copy(), window_size=120, n_components=len(rt_data.columns), step=1)

34


In [18]:
pca_df['target'] = target_rt

In [19]:
target_rt

Datetime
2024-08-27 00:01:00+00:00    0.000119
2024-08-27 00:02:00+00:00    0.000153
2024-08-27 00:03:00+00:00   -0.000135
2024-08-27 00:05:00+00:00    0.001047
2024-08-27 00:07:00+00:00    0.000765
2024-08-27 00:08:00+00:00   -0.000580
2024-08-27 00:09:00+00:00    0.000179
2024-08-27 00:10:00+00:00    0.000227
2024-08-27 00:12:00+00:00    0.000676
2024-08-27 00:13:00+00:00   -0.000791
2024-08-27 00:14:00+00:00    0.000173
2024-08-27 00:15:00+00:00   -0.000060
2024-08-27 00:17:00+00:00   -0.000003
2024-08-27 00:19:00+00:00    0.000074
2024-08-27 00:20:00+00:00    0.000858
2024-08-27 00:21:00+00:00    0.000512
2024-08-27 00:22:00+00:00    0.000128
2024-08-27 00:23:00+00:00   -0.000215
2024-08-27 00:25:00+00:00   -0.000653
2024-08-27 00:26:00+00:00    0.000622
2024-08-27 00:27:00+00:00    0.000246
2024-08-27 00:29:00+00:00   -0.000286
2024-08-27 00:31:00+00:00   -0.000009
2024-08-27 00:32:00+00:00   -0.000764
2024-08-27 00:33:00+00:00    0.000384
Name: Close, dtype: float64