# Vector Autoregressive 

In [1]:
import pandas as pd
import numpy as np

## Plotting
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_squared_error

from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import acf, pacf, grangercausalitytests

## Import TimeSeriesSplit
from sklearn.model_selection import TimeSeriesSplit

import yfinance as yf

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss

## Read the file

In [2]:
df = pd.read_csv('../Data/dataset_others.csv')
# Extract _change_in_price columns
change_in_price_cols = [col for col in df.columns if '_change_in_price' in col]
df_change = df[change_in_price_cols].copy()

In [3]:
df_change

Unnamed: 0,CADUSD=X_change_in_price,GM_change_in_price,JCI_change_in_price,TM_change_in_price,TRYUSD=X_change_in_price,^IXIC_change_in_price,F_change_in_price
0,-0.000418,0.099998,0.330002,-0.569992,-0.000812,15.439453,0.010000
1,0.001932,0.230000,0.160000,-0.309998,-0.000532,57.250000,0.090000
2,-0.000607,-0.139999,-0.279999,-1.449997,0.000039,-39.709961,-0.040000
3,-0.000238,-0.119999,-0.560001,0.059998,0.000314,-97.479492,-0.050000
4,-0.001081,-0.350002,-0.480000,0.459991,0.000223,-47.350586,-0.120000
...,...,...,...,...,...,...,...
1232,-0.001776,4.799999,-0.529999,0.759995,0.000026,39.759766,0.230000
1233,0.000905,-0.810001,-0.159996,1.400009,-0.000006,-303.119141,-0.049999
1234,-0.001072,-0.199997,0.379997,-0.900009,-0.000013,138.839844,0.179999
1235,-0.000866,-0.650002,-0.120003,1.570007,-0.000016,103.119141,-0.170000


## Tickers list

In [4]:
tickers = []
for col in df.columns:
    if '_RSI' in col:
        tickers.append(col.split('_')[0])

tickers.pop()

'F'

## Train - test set

In [5]:
# Train set for cross-validation
train = df_change.iloc[:-5].copy()
# Test set is saved for the best model
test = df_change.iloc[-5:].copy()

## Cross validation for each ticker model

In [6]:
## Test
df_test = df_change[['F_change_in_price', 'GM_change_in_price']]

# Train set for cross-validation
train_test = df_test.iloc[:-5].copy()
# Test set is saved for the best model
test_test = df_test.iloc[-5:].copy()

cv = TimeSeriesSplit(10, test_size=5)
MSE = np.zeros(40) # Smallest order should be 1

for train_index, test_index in cv.split(train_test):
    t_tt = train_test.iloc[train_index]
    t_ho = train_test.iloc[test_index]
    model = VAR(t_tt)
    for i in range(1, 41):
        var_model = model.fit(i)
        pred = var_model.forecast(t_tt[-i:].values, steps=5)
        MSE[i - 1] = np.sqrt(mean_squared_error(t_ho['F_change_in_price'].values, pred[:, 0]))

order_optimal = np.argmin(MSE) + 1

In [7]:
MSE

array([0.13739331, 0.13458652, 0.13366308, 0.12661237, 0.12812163,
       0.12954037, 0.12711169, 0.12143149, 0.12191102, 0.12018846,
       0.11872364, 0.11582741, 0.116997  , 0.12224228, 0.11686215,
       0.11724692, 0.13767682, 0.13759634, 0.12031377, 0.12560544,
       0.12594169, 0.12461681, 0.13660581, 0.13837208, 0.14369521,
       0.1428337 , 0.14319373, 0.13815179, 0.13867744, 0.13178538,
       0.13647811, 0.13568821, 0.13197315, 0.1315245 , 0.13030578,
       0.1193876 , 0.14046661, 0.14534909, 0.16048401, 0.16014163])

In [8]:
order_optimal

np.int64(12)

In [9]:
def best_VAR(train, forecast_length = 5):
    cv = TimeSeriesSplit(10, test_size=forecast_length)
    MSE = np.zeros(40) # Smallest order should be 1

    for train_index, test_index in cv.split(train):
        t_tt = train.iloc[train_index]
        t_ho = train.iloc[test_index]
        model = VAR(t_tt)
        for i in range(1, 41):
            var_model = model.fit(i)
            pred = var_model.forecast(t_tt[-i:].values, steps=forecast_length)
            MSE[i - 1] = np.sqrt(mean_squared_error(t_ho['F_change_in_price'].values, pred[:, 0]))
    
    order_optimal = np.argmin(MSE) + 1
    return order_optimal

In [10]:
train

Unnamed: 0,CADUSD=X_change_in_price,GM_change_in_price,JCI_change_in_price,TM_change_in_price,TRYUSD=X_change_in_price,^IXIC_change_in_price,F_change_in_price
0,-0.000418,0.099998,0.330002,-0.569992,-0.000812,15.439453,0.010000
1,0.001932,0.230000,0.160000,-0.309998,-0.000532,57.250000,0.090000
2,-0.000607,-0.139999,-0.279999,-1.449997,0.000039,-39.709961,-0.040000
3,-0.000238,-0.119999,-0.560001,0.059998,0.000314,-97.479492,-0.050000
4,-0.001081,-0.350002,-0.480000,0.459991,0.000223,-47.350586,-0.120000
...,...,...,...,...,...,...,...
1227,-0.001141,-0.780003,-1.080002,-5.550003,0.000020,-187.099609,-0.080000
1228,0.001146,1.160000,-0.209999,1.949997,0.000049,51.490234,0.190000
1229,0.001377,0.370003,0.860001,-0.300003,0.000026,6.529297,0.040000
1230,-0.002145,-0.200001,0.190002,0.330002,-0.000024,115.941406,0.030001


In [11]:
order_optimal = np.zeros(len(tickers))
order_optimal

array([0., 0., 0., 0., 0., 0.])

In [12]:
order_optimal

array([0., 0., 0., 0., 0., 0.])

## Compute optimal lag

In [13]:
c = ['Ticker', 'Optimal order', 'Accuracy']
accuracy = pd.DataFrame(columns=c)
accuracy

Unnamed: 0,Ticker,Optimal order,Accuracy


In [14]:
for i, t in enumerate(tickers):
    accuracy.loc[i, 'Ticker'] = t.split('_')[0]

accuracy

Unnamed: 0,Ticker,Optimal order,Accuracy
0,CADUSD=X,,
1,GM,,
2,JCI,,
3,TM,,
4,TRYUSD=X,,
5,^IXIC,,


In [None]:
for i in range(len(change_in_price_cols) - 1):
    tr_set = train[[change_in_price_cols[-1], change_in_price_cols[i]]]
    accuracy.loc[i, 'Optimal order'] = best_VAR(tr_set, 5)

In [None]:
accuracy

## Compute VAR predictions

In [None]:
prediction_cols = []
for t in tickers:
    prediction_cols.append(t + '_VAR')

prediction_cols

In [None]:
predictions = pd.DataFrame(columns=prediction_cols, index=test.index)

In [None]:
predictions

In [None]:
VAR_change_in_price = predictions.copy()

In [None]:
for i in predictions.index:
    print(i)

In [None]:
tickers

In [None]:
accuracy.loc[i, 'Optimal order']

In [None]:
train.iloc[-1, 0]

In [None]:
for i in range(6):
    t = prediction_cols[i]
    train_fit = train[[change_in_price_cols[-1], change_in_price_cols[i]]].copy()
    model = VAR(train_fit)
    k = accuracy.loc[i, 'Optimal order']
    results = model.fit(k)
    predictions.loc[:, t] = results.forecast(train_fit.values[-k:], steps=5)[:, 0]
    if i == 0:
        VAR_change_in_price.loc[i, t] = train.loc[-1, t]

In [None]:
predictions

In [None]:
predictions = pd.concat([predictions, test['F_change_in_price']], axis=1)

In [None]:
predictions

In [None]:
predictions

In [None]:
df.iloc[-5:]

## Compute F_close from VAR predictions change_in_price

In [None]:
df_close = df[['F_Close_pred']].copy()
#df_close['VAR'] = df[['F_Close_pred']].copy()
df_close[-5:] 

In [None]:
predictions.loc[1, 'F_change_in_price']

In [None]:
df_close.iloc[-5:] 

In [None]:
forecast = []

In [None]:
for i in predictions.index:
    forecast.append(df_close[i - 1] + )

In [None]:
for t in prediction_cols:
    for i in VAR_change_in_price.index:
        if i == 1232:
        VAR_change_in_price[i] =
for i in VAR_change_in_price.index:
    