In [22]:
from pandas import read_csv as rc
from pandas import DataFrame as df
from tqdm import tqdm
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller
from datetime import datetime
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from pmdarima.arima import auto_arima
from feature_calculations import *
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from scipy.stats import pearsonr
from statsmodels.tsa.api import VAR

In [24]:
def train_and_forecast_VAR(data,ar_order = 1, k = 1):

    model = VAR(data).fit(ar_order)

    forecasts = model.forecast(
        y=data, steps=k)[:, 0]

    return forecasts[::-1]

In [6]:
df_dict = {}

for asset_id in range(1, 2):
    df_dict[f'Asset_{asset_id}'] = rc(f"/Users/ryanlucas/Desktop/G-Research/Data/Asset-{asset_id}.csv")


In [5]:
features_dict = {}

for asset_id in range(1,2):
    features_dict[f'Asset_{asset_id}'] = rc(f"/Users/ryanlucas/Desktop/G-Research/Data/Asset-{asset_id}_features.csv")

In [7]:
features_dict['Asset_1'] = features_dict['Asset_1'].iloc[:, 4:]

In [8]:
def train_and_forecast_AR1(data, ar_order= 1):

    model = ARIMA(data, order=(
        ar_order, 1, 0)).fit(method="yule_walker")

    forecasts = model.forecast(1)

    return forecasts
    

In [18]:
def cross_validate_var(data, step_size, window_size, forecast_func = train_and_forecast_VAR):
    forecasts = []
    actual_values = []
    for integer_position in tqdm(range(0, len(data)-window_size, step_size), position = 0, leave = True):
                        
        train_data = data[integer_position: integer_position+window_size, :]

        forecasts.append(float(forecast_func(train_data)))
        actual_value = data[integer_position+window_size+1, 0]
        actual_values.append(float(actual_value))

    correlation, _ = pearsonr(forecasts, actual_values) 
    return correlation

In [26]:
window_size_dict = {}

for window_size in [40000, 100000, 500000]:
    exog = features_dict["Asset_1"]['Upper Shadow'], features_dict["Asset_1"]['Lower Shadow']
    endog = df_dict['Asset_1']["Target"].fillna(method = 'bfill').fillna(method = 'ffill')
    var_system = np.column_stack([endog, exog[0], exog[1]])
    correlation = cross_validate_var(var_system,10000, window_size, forecast_func=train_and_forecast_VAR)
    window_size_dict[window_size] = correlation

100%|██████████| 192/192 [00:20<00:00,  9.46it/s]
100%|██████████| 186/186 [00:41<00:00,  4.49it/s]
100%|██████████| 146/146 [02:30<00:00,  1.03s/it]


In [None]:
window_size_dict

{2000: (0.6240050604839891, 1.5083860668869513e-22),
 3000: (0.6928640850258662, 2.300779302557752e-29),
 5000: (0.7504413460938983, 9.806627145177511e-37),
 10000: (0.7755041211877594, 2.083058178585332e-40),
 20000: (0.7748152733083014, 4.280439587183226e-40),
 40000: (0.7777761088618098, 3.5736703630084055e-40),
 100000: (0.7836379688040276, 6.764631040792074e-40),
 500000: (0.8151816109575423, 5.716896011270598e-36)}

In [58]:
features_dict

{'Asset_1':          Unnamed: 0  Unnamed: 0.1  Unnamed: 0.1.1  Unnamed: 0.1.1.1  \
 0                 0             0               0                 0   
 1                 1             1               1                 1   
 2                 2             2               2                 2   
 3                 3             3               3                 3   
 4                 4             4               4                 4   
 ...             ...           ...             ...               ...   
 1956277     1956277       1956277         1956277           1956277   
 1956278     1956278       1956278         1956278           1956278   
 1956279     1956279       1956279         1956279           1956279   
 1956280     1956280       1956280         1956280           1956280   
 1956281     1956281       1956281         1956281           1956281   
 
          VWAP_Delta_5  VWAP_Delta_1  Upper Shadow  Lower Shadow  \
 0           -0.008725      0.000961    163.624000    1