# Import necessary libraries and data

In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from math import *
from scipy.stats import jarque_bera
import statistics as st
from statsmodels.tsa.stattools import adfuller
from arch import arch_model
from statsmodels.stats.diagnostic import acorr_ljungbox
from arch.__future__ import reindexing

In [56]:
oil = pd.read_csv("oil.csv")
snp = pd.read_csv("snp.csv")
nasdaq = pd.read_csv("nasdaq.csv")

## Data Analysis

In [57]:
oil.head()

Unnamed: 0,Datetime,Open,High,Low,Close,Adj Close,Volume
0,2023-12-03 18:00:00-05:00,74.474998,74.724998,74.375,74.675003,74.675003,16
1,2023-12-03 18:02:00-05:00,74.675003,74.699997,74.574997,74.574997,74.574997,28
2,2023-12-03 18:04:00-05:00,74.599998,74.599998,74.474998,74.599998,74.599998,21
3,2023-12-03 18:06:00-05:00,74.5,74.525002,74.474998,74.474998,74.474998,12
4,2023-12-03 18:08:00-05:00,74.449997,74.5,74.400002,74.400002,74.400002,15


In [58]:
snp.head()

Unnamed: 0,Datetime,Open,High,Low,Close,Adj Close,Volume
0,2023-12-03 18:00:00-05:00,4603.25,4604.5,4601.0,4601.25,4601.25,1181
1,2023-12-03 18:02:00-05:00,4601.5,4602.0,4600.25,4600.5,4600.5,1120
2,2023-12-03 18:04:00-05:00,4600.75,4600.75,4600.0,4600.25,4600.25,785
3,2023-12-03 18:06:00-05:00,4600.5,4600.75,4598.0,4598.25,4598.25,976
4,2023-12-03 18:08:00-05:00,4598.25,4599.0,4597.5,4599.0,4599.0,784


In [59]:
nasdaq.head()

Unnamed: 0,Datetime,Open,High,Low,Close,Adj Close,Volume
0,2023-12-03 18:00:00-05:00,16022.0,16026.0,16014.5,16015.25,16015.25,627
1,2023-12-03 18:02:00-05:00,16015.75,16017.25,16005.5,16007.0,16007.0,737
2,2023-12-03 18:04:00-05:00,16007.25,16009.5,16003.25,16004.25,16004.25,475
3,2023-12-03 18:06:00-05:00,16004.5,16005.5,15993.0,15993.25,15993.25,579
4,2023-12-03 18:08:00-05:00,15994.0,15995.5,15988.25,15993.75,15993.75,643


### The timestamps in data don't match

In [60]:
df = oil+nasdaq+snp
df.isna().any()

Datetime     True
Open         True
High         True
Low          True
Close        True
Adj Close    True
Volume       True
dtype: bool

In [61]:
df.dropna(inplace = True)
df.isna().any()

Datetime     False
Open         False
High         False
Low          False
Close        False
Adj Close    False
Volume       False
dtype: bool

In [62]:
oil = oil.loc[oil.index.isin(df.index)]
snp = snp.loc[snp.index.isin(df.index)]
nasdaq = nasdaq.loc[nasdaq.index.isin(df.index)]

In [63]:
oil.head()

Unnamed: 0,Datetime,Open,High,Low,Close,Adj Close,Volume
0,2023-12-03 18:00:00-05:00,74.474998,74.724998,74.375,74.675003,74.675003,16
1,2023-12-03 18:02:00-05:00,74.675003,74.699997,74.574997,74.574997,74.574997,28
2,2023-12-03 18:04:00-05:00,74.599998,74.599998,74.474998,74.599998,74.599998,21
3,2023-12-03 18:06:00-05:00,74.5,74.525002,74.474998,74.474998,74.474998,12
4,2023-12-03 18:08:00-05:00,74.449997,74.5,74.400002,74.400002,74.400002,15


In [64]:
snp.head()

Unnamed: 0,Datetime,Open,High,Low,Close,Adj Close,Volume
0,2023-12-03 18:00:00-05:00,4603.25,4604.5,4601.0,4601.25,4601.25,1181
1,2023-12-03 18:02:00-05:00,4601.5,4602.0,4600.25,4600.5,4600.5,1120
2,2023-12-03 18:04:00-05:00,4600.75,4600.75,4600.0,4600.25,4600.25,785
3,2023-12-03 18:06:00-05:00,4600.5,4600.75,4598.0,4598.25,4598.25,976
4,2023-12-03 18:08:00-05:00,4598.25,4599.0,4597.5,4599.0,4599.0,784


In [65]:
nasdaq.head()

Unnamed: 0,Datetime,Open,High,Low,Close,Adj Close,Volume
0,2023-12-03 18:00:00-05:00,16022.0,16026.0,16014.5,16015.25,16015.25,627
1,2023-12-03 18:02:00-05:00,16015.75,16017.25,16005.5,16007.0,16007.0,737
2,2023-12-03 18:04:00-05:00,16007.25,16009.5,16003.25,16004.25,16004.25,475
3,2023-12-03 18:06:00-05:00,16004.5,16005.5,15993.0,15993.25,15993.25,579
4,2023-12-03 18:08:00-05:00,15994.0,15995.5,15988.25,15993.75,15993.75,643


## Adding our necessary features to the dataframes

In [66]:
def param_adder(df):
    df['bas'] = 2*(df['High']-df['Low'])/(df['High']+df['Low'])
    df['voSQ'] = np.log((df['Close']/df['Close'].shift())**2)
    df['voGK'] = 0.5*(np.log(df['High']) - np.log(df['Low']))**2 - (2*log(2) - 1)*(np.log(df['Close']) - np.log(df['Open']))**2
    df['voRS'] = (np.log(df['High']) - np.log(df['Open']))*(np.log(df['High']) - np.log(df['Close'])) + (np.log(df['Low']) - np.log(df['Open']))*(np.log(df['Low']) - np.log(df['Close']))
    df.dropna(inplace = True)

In [67]:
param_adder(oil)
param_adder(snp)
param_adder(nasdaq)

In [68]:
oil.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,bas,voSQ,voGK,voRS
count,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0
mean,72.305013,72.332505,72.277078,72.304672,72.304672,22.464621,0.000767,-7e-06,4.014433e-07,4.072195e-07
std,1.765517,1.765596,1.764994,1.765016,1.765016,199.383988,0.000848,0.001849,1.070805e-06,1.139029e-06
min,67.775002,67.875,67.699997,67.800003,67.800003,0.0,0.0,-0.019035,0.0,0.0
25%,71.0,71.025002,70.974998,71.0,71.0,3.0,0.0,-0.000703,0.0,0.0
50%,72.5,72.525002,72.449997,72.5,72.5,8.0,0.000669,0.0,5.740348e-08,0.0
75%,73.699997,73.724998,73.650002,73.699997,73.699997,19.0,0.001055,0.000702,3.610948e-07,3.748558e-07
max,76.150002,76.199997,76.125,76.150002,76.150002,10411.0,0.010496,0.022409,3.112745e-05,3.090117e-05


In [69]:
snp.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,bas,voSQ,voGK,voRS
count,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0
mean,4717.982311,4718.577349,4717.379003,4717.982081,4717.982081,2045.232,0.000254,4e-06,4.576761e-08,4.674481e-08
std,101.161338,101.123278,101.183342,101.154961,101.154961,16383.0,0.000251,0.000483,2.056193e-07,2.112501e-07
min,4549.5,4549.75,4548.75,4549.25,4549.25,0.0,0.0,-0.005904,0.0,0.0
25%,4605.0,4605.5,4604.5,4605.0,4605.0,149.0,0.000105,-0.000207,4.341655e-09,2.969966e-09
50%,4764.75,4766.0,4763.5,4764.75,4764.75,391.0,0.000164,0.0,1.203597e-08,1.166812e-08
75%,4811.25,4811.75,4810.75,4811.25,4811.25,2066.5,0.000317,0.000207,3.645117e-08,3.809114e-08
max,4841.5,4841.5,4841.0,4841.5,4841.5,1616545.0,0.006609,0.023728,1.847586e-05,1.856061e-05


In [70]:
nasdaq.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,bas,voSQ,voGK,voRS
count,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0
mean,16576.161483,16578.921559,16573.347367,16576.156521,16576.156521,729.347282,0.000338,5e-06,8.366872e-08,8.495606e-08
std,457.917178,457.554274,458.206995,457.891683,457.891683,5488.192193,0.000343,0.000637,2.807153e-07,2.896702e-07
min,15727.5,15737.25,15721.25,15727.5,15727.5,1.0,0.0,-0.008156,0.0,0.0
25%,16069.125,16072.0,16066.25,16069.25,16069.25,77.0,0.000131,-0.000204,6.060041e-09,5.67567e-09
50%,16696.0,16698.5,16694.75,16696.0,16696.0,178.0,0.000222,0.0,1.807406e-08,1.786368e-08
75%,17000.5,17003.25,16997.875,17000.75,17000.75,753.5,0.000422,0.000207,6.659883e-08,6.601616e-08
max,17162.5,17165.25,17159.0,17162.25,17162.25,636602.0,0.006868,0.030804,1.575842e-05,1.520173e-05


## Testing Normality, Presence of Unit Root, Autocorrelation etc.

In [71]:
def stats(df , final):
    
    for index in final.index:

        final['Mean'][index] = np.mean(df[index])
        final['SD'][index] = st.stdev(df[index])
        final['JB'][index] = jarque_bera(df[index]).pvalue
        final['ADF'][index] = adfuller(df[index])[1]
    
        model1 = arch_model(df[index], vol='ARCH', q=1)
        results = model1.fit()
        final['ARCH(1)'][index] = results.pvalues[2]
        
        model2 = arch_model(df[index], vol='ARCH', q=12)
        results = model2.fit()
        final['ARCH(12)'][index] = results.pvalues[2]
        
        final['LB(1)'][index] = acorr_ljungbox(df[index], lags=1)['lb_pvalue'][1]
        final['LB(12)'][index] = acorr_ljungbox(df[index], lags=12)['lb_pvalue'][1]

In [None]:
columns = ['Mean' , 'SD' , 'JB' , 'ADF' , 'ARCH(1)' , 'ARCH(12)' , 'LB(1)' , 'LB(12)' ]
rows = ['Volume' , 'bas' , 'voSQ' , 'voGK' , 'voRS']

params_oil = pd.DataFrame(columns = columns , index = rows)
params_snp = pd.DataFrame(columns = columns , index = rows)
params_nasdaq = pd.DataFrame(columns = columns , index = rows)

stats(oil , params_oil)
stats(snp , params_snp)
stats(nasdaq , params_nasdaq)

In [73]:
params_oil

Unnamed: 0,Mean,SD,JB,ADF,ARCH(1),ARCH(12),LB(1),LB(12)
Volume,22.464621,199.383988,0.0,0.0,0.999999,0.999999,0.710957,0.710957
bas,0.000767,0.000848,0.0,0.0,0.0,0.0,0.0,0.0
voSQ,-7e-06,0.001849,0.0,0.0,0.0,0.0,1e-06,1e-06
voGK,0.0,1e-06,0.0,0.0,1e-06,1e-06,0.0,0.0
voRS,0.0,1e-06,0.0,0.0,7e-06,7e-06,0.0,0.0


In [74]:
params_snp

Unnamed: 0,Mean,SD,JB,ADF,ARCH(1),ARCH(12),LB(1),LB(12)
Volume,2045.231569,16383.003546,0.0,0.0,1.0,1.0,2e-06,2e-06
bas,0.000254,0.000251,0.0,0.0,0.0,0.0,0.0,0.0
voSQ,4e-06,0.000483,0.0,0.0,0.0,0.0,0.415587,0.415587
voGK,0.0,0.0,0.0,0.0,0.005731,0.005731,0.0,0.0
voRS,0.0,0.0,0.0,0.0,0.002082,0.002082,0.0,0.0


In [75]:
params_nasdaq

Unnamed: 0,Mean,SD,JB,ADF,ARCH(1),ARCH(12),LB(1),LB(12)
Volume,729.347282,5488.192193,0.0,0.0,1.0,1.0,2e-06,2e-06
bas,0.000338,0.000343,0.0,0.0,0.225687,0.225687,0.0,0.0
voSQ,5e-06,0.000637,0.0,0.0,0.0,0.0,0.431115,0.431115
voGK,0.0,0.0,0.0,0.0,0.02973,0.02973,0.0,0.0
voRS,0.0,0.0,0.0,0.0,0.030983,0.030983,0.0,0.0


In [None]:
final = pd.DataFrame(columns = ['p' , 'q' , 'AIC' , 'BIC'])

for i in range(1,13):
    
    for j in range(1,13):
        
        model = arch_model(oil['voSQ'] , x = oil[rows] + snp[rows] , p = i , q = j)
        results = model.fit()
        final.loc[len(final)]= [i , j , results.aic , results.bic]
        
final['mult'] = final['BIC']*final['AIC']

In [77]:
final.sort_values(by = 'mult' , ascending = True)

Unnamed: 0,p,q,AIC,BIC,mult
46,4.0,11.0,4173.590955,4302.064339,1.795506e+07
45,4.0,10.0,26196.827141,26317.743267,6.894414e+08
36,4.0,1.0,-116638.860056,-116585.959251,1.359845e+10
135,12.0,4.0,-141300.325905,-141164.295263,1.994656e+10
134,12.0,3.0,-141325.296303,-141196.822919,1.995468e+10
...,...,...,...,...,...
0,1.0,1.0,-141799.451679,-141769.222648,2.010280e+10
2,1.0,3.0,-141836.484831,-141791.141283,2.011116e+10
1,1.0,2.0,-141866.537094,-141828.750805,2.012075e+10
119,10.0,12.0,142576.250002,142757.624191,2.035385e+10


## We choose EGARCH(1,1) based on the Information Criterias

### Correlations

In [78]:
tuples = [
    ('voSQ' , 'Equity'),
    ('voSQ' , 'Crude Oil'),
    ('voGK' , 'Equity'),
    ('voGK' , 'Crude Oil'),
    ('voRS' , 'Equity'),
    ('voRS' , 'Crude Oil'),
]
columns = pd.MultiIndex.from_tuples(tuples)

struct = pd.DataFrame(index = ['bas_E' , 'bas_O' , 'Volume_E' , 'Volume_O' , 'vo_E' , 'vo_O'] , columns = columns)

corr_snp = struct
corr_nasdaq = struct

corr_snp_lagged = struct
corr_nasdaq_lagged = struct

In [79]:
def corr_maker(vo , eq , df):
    
    df.loc['bas_E' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo] , eq['bas'])[0,1]
    df.loc['bas_O' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo] , oil['bas'])[0,1]
    df.loc['Volume_E' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo] , eq['Volume'])[0,1]
    df.loc['Volume_O' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo] , oil['Volume'])[0,1]
    df.loc['vo_E' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo] , eq[vo])[0,1]
    df.loc['vo_O' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo] , oil[vo])[0,1]
    
    df.loc['bas_E' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo] , eq['bas'])[0,1]
    df.loc['bas_O' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo] , oil['bas'])[0,1]
    df.loc['Volume_E' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo] , eq['Volume'])[0,1]
    df.loc['Volume_O' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo] , oil['Volume'])[0,1]
    df.loc['vo_E' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo] , eq[vo])[0,1]
    df.loc['vo_O' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo] , oil[vo])[0,1]


In [80]:
corr_maker('voSQ' , snp , corr_snp)
corr_maker('voGK' , snp , corr_snp)
corr_maker('voRS' , snp , corr_snp)

corr_maker('voSQ' , nasdaq , corr_nasdaq)
corr_maker('voGK' , nasdaq , corr_nasdaq)
corr_maker('voRS' , nasdaq , corr_nasdaq)

In [81]:
corr_snp

Unnamed: 0_level_0,voSQ,voSQ,voGK,voGK,voRS,voRS
Unnamed: 0_level_1,Equity,Crude Oil,Equity,Crude Oil,Equity,Crude Oil
bas_E,-0.046133,0.002481,0.775769,0.005819,0.737901,-0.000847
bas_O,-0.015563,0.001881,-0.024153,0.810131,-0.022934,0.708569
Volume_E,0.392352,0.000114,0.125013,-0.000857,0.124522,-0.003251
Volume_O,0.002372,-0.010406,0.010969,0.0713,0.012021,0.062645
vo_E,1.0,-0.006563,1.0,-0.01019,1.0,-0.009924
vo_O,-0.006563,1.0,-0.01019,1.0,-0.009924,1.0


In [82]:
corr_nasdaq

Unnamed: 0_level_0,voSQ,voSQ,voGK,voGK,voRS,voRS
Unnamed: 0_level_1,Equity,Crude Oil,Equity,Crude Oil,Equity,Crude Oil
bas_E,-0.046133,0.002481,0.775769,0.005819,0.737901,-0.000847
bas_O,-0.015563,0.001881,-0.024153,0.810131,-0.022934,0.708569
Volume_E,0.392352,0.000114,0.125013,-0.000857,0.124522,-0.003251
Volume_O,0.002372,-0.010406,0.010969,0.0713,0.012021,0.062645
vo_E,1.0,-0.006563,1.0,-0.01019,1.0,-0.009924
vo_O,-0.006563,1.0,-0.01019,1.0,-0.009924,1.0


In [83]:
nasdaq.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,bas,voSQ,voGK,voRS
count,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0
mean,16576.161483,16578.921559,16573.347367,16576.156521,16576.156521,729.347282,0.000338,5e-06,8.366872e-08,8.495606e-08
std,457.917178,457.554274,458.206995,457.891683,457.891683,5488.192193,0.000343,0.000637,2.807153e-07,2.896702e-07
min,15727.5,15737.25,15721.25,15727.5,15727.5,1.0,0.0,-0.008156,0.0,0.0
25%,16069.125,16072.0,16066.25,16069.25,16069.25,77.0,0.000131,-0.000204,6.060041e-09,5.67567e-09
50%,16696.0,16698.5,16694.75,16696.0,16696.0,178.0,0.000222,0.0,1.807406e-08,1.786368e-08
75%,17000.5,17003.25,16997.875,17000.75,17000.75,753.5,0.000422,0.000207,6.659883e-08,6.601616e-08
max,17162.5,17165.25,17159.0,17162.25,17162.25,636602.0,0.006868,0.030804,1.575842e-05,1.520173e-05


In [84]:
snp.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,bas,voSQ,voGK,voRS
count,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0,14147.0
mean,4717.982311,4718.577349,4717.379003,4717.982081,4717.982081,2045.232,0.000254,4e-06,4.576761e-08,4.674481e-08
std,101.161338,101.123278,101.183342,101.154961,101.154961,16383.0,0.000251,0.000483,2.056193e-07,2.112501e-07
min,4549.5,4549.75,4548.75,4549.25,4549.25,0.0,0.0,-0.005904,0.0,0.0
25%,4605.0,4605.5,4604.5,4605.0,4605.0,149.0,0.000105,-0.000207,4.341655e-09,2.969966e-09
50%,4764.75,4766.0,4763.5,4764.75,4764.75,391.0,0.000164,0.0,1.203597e-08,1.166812e-08
75%,4811.25,4811.75,4810.75,4811.25,4811.25,2066.5,0.000317,0.000207,3.645117e-08,3.809114e-08
max,4841.5,4841.5,4841.0,4841.5,4841.5,1616545.0,0.006609,0.023728,1.847586e-05,1.856061e-05


## Training the models

In [85]:
oil_train = oil.loc[:int(0.8*len(oil))]
snp_train = snp.loc[:int(0.8*len(snp))]
nasdaq_train = nasdaq.loc[:int(0.8*len(nasdaq))]

In [86]:
tuples1 = [
    ('voSQ' , 'model1'),
    ('voSQ' , 'model2'),
    ('voSQ' , 'model3'),
    ('voGK' , 'model1'),
    ('voGK' , 'model2'),
    ('voGK' , 'model3'),
    ('voRS' , 'model1'),
    ('voRS' , 'model2'),
    ('voRS' , 'model3'),
]

tuples2 = [
    ('Equity' , 'AIC'),
    ('Equity' , 'SIC'),
    ('Equity' , 'ADJR'),
    ('Oil' , 'AIC'),
    ('Oil' , 'SIC'),
    ('Oil' , 'ADJR'),
]

columns = pd.MultiIndex.from_tuples(tuples1)
indices = pd.MultiIndex.from_tuples(tuples2)
struct = pd.DataFrame(index = indices , columns = columns)

eq_snp = struct
eq_nasdaq = struct

In [87]:
def model1(df , vo , instrument , final):
    
    model = arch_model(y = df[[vo]] , x = df[[vo]] , vol = 'EGARCH')
    results = model.fit()
    
    if instrument == 'Equity':
        
        final.loc[('Equity', 'AIC'), (vo, 'model1')] = results.aic
        final.loc[('Equity', 'SIC'), (vo, 'model1')] = results.bic
        
        rsquared = results.rsquared
        n = len(df)
        k = 2
        final.loc[('Equity', 'ADJR'), (vo, 'model1')] = 1 - ((1 - rsquared) * (n - 1) / (n - k - 1))
        
    else:
        
        final.loc[('Oil', 'AIC'), (vo, 'model1')] = results.aic
        final.loc[('Oil', 'SIC'), (vo, 'model1')] = results.bic
        
        rsquared = results.rsquared
        n = len(df)
        k = 2
        final.loc[('Oil', 'ADJR'), (vo, 'model1')] = 1 - ((1 - rsquared) * (n - 1) / (n - k - 1))

        
def model2(df , vo , instrument , final):
    
    model = arch_model(y = df[vo] , x = df[[vo , 'bas' , 'Volume']] , vol = 'EGARCH')
    results = model.fit()
    
    if instrument == 'Equity':
        
        final.loc[('Equity', 'AIC'), (vo, 'model2')] = results.aic
        final.loc[('Equity', 'SIC'), (vo, 'model2')] = results.bic
        
        rsquared = results.rsquared
        n = len(df)
        k = 4
        final.loc[('Equity', 'ADJR'), (vo, 'model2')] = 1 - ((1 - rsquared) * (n - 1) / (n - k - 1))
        
    else:
        
        final.loc[('Oil', 'AIC'), (vo, 'model2')] = results.aic
        final.loc[('Oil', 'SIC'), (vo, 'model2')] = results.bic
        
        rsquared = results.rsquared
        n = len(df)
        k = 4
        final.loc[('Oil', 'ADJR'), (vo, 'model2')] = 1 - ((1 - rsquared) * (n - 1) / (n - k - 1))

        
        
def model3(df1 , df2 , vo , instrument , final):
    
    model = arch_model(y = df1[vo] , x = pd.concat([df1[[vo , 'bas' , 'Volume']], df2[[vo , 'bas' , 'Volume']]], axis=1),vol = 'EGARCH')
    results = model.fit()
    
    if instrument == 'Equity':
        
        final.loc[('Equity', 'AIC'), (vo, 'model3')] = results.aic
        final.loc[('Equity', 'SIC'), (vo, 'model3')] = results.bic
        
        rsquared = results.rsquared
        n = len(df1)
        k = 7
        final.loc[('Equity', 'ADJR'), (vo, 'model3')] = 1 - ((1 - rsquared) * (n - 1) / (n - k - 1))
        
    else:
        
        final.loc[('Oil', 'AIC'), (vo, 'model3')] = results.aic
        final.loc[('Oil', 'SIC'), (vo, 'model3')] = results.bic
        
        rsquared = results.rsquared
        n = len(df1)
        k = 7
        final.loc[('Oil', 'ADJR'), (vo, 'model3')] = 1 - ((1 - rsquared) * (n - 1) / (n - k - 1))

In [None]:
model1(oil_train , 'voSQ' , 'Oil' , eq_snp)
model1(snp_train , 'voSQ' , 'Equity' , eq_snp)

model1(oil_train , 'voGK' , 'Oil' , eq_snp)
model1(snp_train , 'voGK' , 'Equity' , eq_snp)

model1(oil_train , 'voRS' , 'Oil' , eq_snp)
model1(snp_train , 'voRS' , 'Equity' , eq_snp)

model2(oil_train , 'voSQ' , 'Oil' , eq_snp)
model2(snp_train , 'voSQ' , 'Equity' , eq_snp)

model2(oil_train , 'voGK' , 'Oil' , eq_snp)
model2(snp_train , 'voGK' , 'Equity' , eq_snp)

model2(oil_train , 'voRS' , 'Oil' , eq_snp)
model2(snp_train , 'voRS' , 'Equity' , eq_snp)

model3(oil_train, snp_train , 'voSQ' , 'Oil' , eq_snp)
model3(snp_train, oil_train , 'voSQ' , 'Equity' , eq_snp)

model3(oil_train, snp_train , 'voGK' , 'Oil' , eq_snp)
model3(snp_train, oil_train , 'voGK' , 'Equity' , eq_snp)

model3(oil_train, snp_train , 'voRS' , 'Oil' , eq_snp)
model3(snp_train, oil_train , 'voRS' , 'Equity' , eq_snp)


In [None]:
model1(oil_train , 'voSQ' , 'Oil' , eq_nasdaq)
model1(nasdaq_train , 'voSQ' , 'Equity' , eq_nasdaq)

model1(oil_train , 'voGK' , 'Oil' , eq_nasdaq)
model1(nasdaq_train , 'voGK' , 'Equity' , eq_nasdaq)

model1(oil_train , 'voRS' , 'Oil' , eq_nasdaq)
model1(nasdaq_train , 'voRS' , 'Equity' , eq_nasdaq)

model2(oil_train , 'voSQ' , 'Oil' , eq_nasdaq)
model2(nasdaq_train , 'voSQ' , 'Equity' , eq_nasdaq)

model2(oil_train , 'voGK' , 'Oil' , eq_nasdaq)
model2(nasdaq_train , 'voGK' , 'Equity' , eq_nasdaq)

model2(oil_train , 'voRS' , 'Oil' , eq_nasdaq)
model2(nasdaq_train , 'voRS' , 'Equity' , eq_nasdaq)

model3(oil_train, nasdaq_train , 'voSQ' , 'Oil' , eq_nasdaq)
model3(nasdaq_train, oil_train , 'voSQ' , 'Equity' , eq_nasdaq)

model3(oil_train, nasdaq_train , 'voGK' , 'Oil' , eq_nasdaq)
model3(nasdaq_train, oil_train , 'voGK' , 'Equity' , eq_nasdaq)

model3(oil_train, nasdaq_train , 'voRS' , 'Oil' , eq_nasdaq)
model3(nasdaq_train, oil_train , 'voRS' , 'Equity' , eq_nasdaq)


In [90]:
eq_snp

Unnamed: 0_level_0,Unnamed: 1_level_0,voSQ,voSQ,voSQ,voGK,voGK,voGK,voRS,voRS,voRS
Unnamed: 0_level_1,Unnamed: 1_level_1,model1,model2,model3,model1,model2,model3,model1,model2,model3
Equity,AIC,188680345.587983,188680345.587983,188680345.587983,34949610033.05357,34949610033.05357,34949610033.05357,4330105030.848904,4330105030.848904,4330105030.848904
Equity,SIC,188680374.924229,188680374.924229,188680374.924229,34949610062.38982,34949610062.38982,34949610062.38982,4330105060.185149,4330105060.185149,4330105060.185149
Equity,ADJR,-0.000177,-0.000354,-0.000619,-0.000177,-0.000354,-0.000619,-0.000177,-0.000354,-0.000619
Oil,AIC,335183699.363222,335183699.363222,335183699.363222,8265896523.888433,8265896523.888433,8265896523.888433,7074501065.186553,7074501065.186553,7074501065.186553
Oil,SIC,335183728.699467,335183728.699467,335183728.699467,8265896553.224679,8265896553.224679,8265896553.224679,7074501094.522799,7074501094.522799,7074501094.522799
Oil,ADJR,-0.000177,-0.000354,-0.000619,-0.000177,-0.000354,-0.000619,-0.000177,-0.000354,-0.000619


In [91]:
eq_nasdaq

Unnamed: 0_level_0,Unnamed: 1_level_0,voSQ,voSQ,voSQ,voGK,voGK,voGK,voRS,voRS,voRS
Unnamed: 0_level_1,Unnamed: 1_level_1,model1,model2,model3,model1,model2,model3,model1,model2,model3
Equity,AIC,188680345.587983,188680345.587983,188680345.587983,34949610033.05357,34949610033.05357,34949610033.05357,4330105030.848904,4330105030.848904,4330105030.848904
Equity,SIC,188680374.924229,188680374.924229,188680374.924229,34949610062.38982,34949610062.38982,34949610062.38982,4330105060.185149,4330105060.185149,4330105060.185149
Equity,ADJR,-0.000177,-0.000354,-0.000619,-0.000177,-0.000354,-0.000619,-0.000177,-0.000354,-0.000619
Oil,AIC,335183699.363222,335183699.363222,335183699.363222,8265896523.888433,8265896523.888433,8265896523.888433,7074501065.186553,7074501065.186553,7074501065.186553
Oil,SIC,335183728.699467,335183728.699467,335183728.699467,8265896553.224679,8265896553.224679,8265896553.224679,7074501094.522799,7074501094.522799,7074501094.522799
Oil,ADJR,-0.000177,-0.000354,-0.000619,-0.000177,-0.000354,-0.000619,-0.000177,-0.000354,-0.000619


### Correlations for lagged effect

In [92]:
def lagged_corr_maker(vo , eq , df , oil = oil):
    
    df.loc['bas_E' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo][1:] , eq['bas'][:len(eq) - 1])[0,1]
    df.loc['bas_O' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo][1:] , oil['bas'][:len(eq) - 1])[0,1]
    df.loc['Volume_E' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo][1:] , eq['Volume'][:len(eq) - 1])[0,1]
    df.loc['Volume_O' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo][1:] , oil['Volume'][:len(eq) - 1])[0,1]
    df.loc['vo_E' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo][1:] , eq[vo][:len(eq) - 1])[0,1]
    df.loc['vo_O' , (vo , 'Equity' ) ] = np.corrcoef(eq[vo][1:] , oil[vo][:len(eq) - 1])[0,1]
    
    df.loc['bas_E' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo][1:] , eq['bas'][:len(eq) - 1])[0,1]
    df.loc['bas_O' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo][1:] , oil['bas'][:len(eq) - 1])[0,1]
    df.loc['Volume_E' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo][1:] , eq['Volume'][:len(eq) - 1])[0,1]
    df.loc['Volume_O' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo][1:] , oil['Volume'][:len(eq) - 1])[0,1]
    df.loc['vo_E' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo][1:] , eq[vo][:len(eq) - 1])[0,1]
    df.loc['vo_O' , (vo , 'Crude Oil' ) ] = np.corrcoef(oil[vo][1:] , oil[vo][:len(eq) - 1])[0,1]


In [93]:
lagged_corr_maker('voSQ' , snp_train , corr_snp_lagged , oil_train)
lagged_corr_maker('voGK' , snp_train , corr_snp_lagged , oil_train)
lagged_corr_maker('voRS' , snp_train , corr_snp_lagged , oil_train)

corr_snp_lagged

Unnamed: 0_level_0,voSQ,voSQ,voGK,voGK,voRS,voRS
Unnamed: 0_level_1,Equity,Crude Oil,Equity,Crude Oil,Equity,Crude Oil
bas_E,0.031757,-0.00195,0.3298,-0.011098,0.326497,-0.019049
bas_O,-0.017624,-0.001248,-0.01438,0.41257,-0.012713,0.394054
Volume_E,0.0169,0.004199,0.053385,0.041329,0.053406,0.063947
Volume_O,0.011302,-0.00452,-0.002845,0.041154,-0.002387,0.039312
vo_E,0.000531,0.005903,0.216553,-0.008007,0.217474,-0.010958
vo_O,-0.007571,-0.035324,-0.004601,0.397068,-0.0097,0.337863


In [94]:
lagged_corr_maker('voSQ' , nasdaq_train , corr_nasdaq_lagged , oil_train)
lagged_corr_maker('voGK' , nasdaq_train , corr_nasdaq_lagged , oil_train)
lagged_corr_maker('voRS' , nasdaq_train , corr_nasdaq_lagged , oil_train)

corr_nasdaq_lagged

Unnamed: 0_level_0,voSQ,voSQ,voGK,voGK,voRS,voRS
Unnamed: 0_level_1,Equity,Crude Oil,Equity,Crude Oil,Equity,Crude Oil
bas_E,0.051443,-0.003299,0.48581,-0.016366,0.484996,-0.024128
bas_O,-0.016732,-0.001248,-0.036663,0.41257,-0.036118,0.394054
Volume_E,0.022709,-0.00113,0.109359,-0.005664,0.124328,-0.007943
Volume_O,0.008637,-0.00452,0.00641,0.041154,0.006259,0.039312
vo_E,-0.007276,0.001067,0.400283,-0.017433,0.382318,-0.017953
vo_O,-0.009912,-0.035324,-0.015025,0.397068,-0.016192,0.337863


## Testing on next 5%,10% and 20% of the data

In [95]:
tuples = [
    ('2-1' , 'Theil_U'),
    ('2-1' , 'pvalue'),
    ('3-1' , 'Theil_U'),
    ('3-1' , 'pvalue'),
    ('3-2' , 'Theil_U'),
    ('3-2' , 'pvalue'),
]

columns = pd.MultiIndex.from_tuples(tuples)
indices = ['voSQ_E' , 'voSQ_O' ,'voGK_E' , 'voGK_O' ,'voRS_E' , 'voRS_O']

struct = pd.DataFrame(columns = columns , index = indices)

in_sample_snp = struct
in_sample_nasdaq = struct

out_sample_snp_85 = struct
out_sample_snp_90 = struct
out_sample_snp_100 = struct

out_sample_nasdaq_85 = struct
out_sample_nasdaq_90 = struct
out_sample_nasdaq_100 = struct

In [96]:
def dm_test(actual_lst, pred1_lst, pred2_lst, h = 1, crit="MSE", power = 2):
    # Routine for checking errors
    def error_check():
        rt = 0
        msg = ""
        # Check if h is an integer
        if (not isinstance(h, int)):
            rt = -1
            msg = "The type of the number of steps ahead (h) is not an integer."
            return (rt,msg)
        # Check the range of h
        if (h < 1):
            rt = -1
            msg = "The number of steps ahead (h) is not large enough."
            return (rt,msg)
        len_act = len(actual_lst)
        len_p1  = len(pred1_lst)
        len_p2  = len(pred2_lst)
        # Check if lengths of actual values and predicted values are equal
        if (len_act != len_p1 or len_p1 != len_p2 or len_act != len_p2):
            rt = -1
            msg = "Lengths of actual_lst, pred1_lst and pred2_lst do not match."
            return (rt,msg)
        # Check range of h
        if (h >= len_act):
            rt = -1
            msg = "The number of steps ahead is too large."
            return (rt,msg)
        # Check if criterion supported
        if (crit != "MSE" and crit != "MAPE" and crit != "MAD" and crit != "poly"):
            rt = -1
            msg = "The criterion is not supported."
            return (rt,msg)  
        # Check if every value of the input lists are numerical values
        from re import compile as re_compile
        comp = re_compile("^\d+?\.\d+?$")  
        def compiled_regex(s):
            """ Returns True is string is a number. """
            if comp.match(s) is None:
                return s.isdigit()
            return True
#         for actual, pred1, pred2 in zip(actual_lst, pred1_lst, pred2_lst):
#             is_actual_ok = compiled_regex(str(abs(actual)))
#             is_pred1_ok = compiled_regex(str(abs(pred1)))
#             is_pred2_ok = compiled_regex(str(abs(pred2)))
#             if (not (is_actual_ok and is_pred1_ok and is_pred2_ok)):  
#                 msg = "An element in the actual_lst, pred1_lst or pred2_lst is not numeric."
#                 rt = -1
#                 return (rt,msg)
        return (rt,msg)
    
    # Error check
    error_code = error_check()
    # Raise error if cannot pass error check
    if (error_code[0] == -1):
        raise SyntaxError(error_code[1])
        return
    # Import libraries
    from scipy.stats import t
    import collections
    import pandas as pd
    import numpy as np
    
    # Initialise lists
    e1_lst = []
    e2_lst = []
    d_lst  = []
    
    # convert every value of the lists into real values
    actual_lst = pd.Series(actual_lst).apply(lambda x: float(x)).tolist()
    pred1_lst = pd.Series(pred1_lst).apply(lambda x: float(x)).tolist()
    pred2_lst = pd.Series(pred2_lst).apply(lambda x: float(x)).tolist()
    
    # Length of lists (as real numbers)
    T = float(len(actual_lst))
    
    # construct d according to crit
    if (crit == "MSE"):
        for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
            e1_lst.append((actual - p1)**2)
            e2_lst.append((actual - p2)**2)
        for e1, e2 in zip(e1_lst, e2_lst):
            d_lst.append(e1 - e2)
    elif (crit == "MAD"):
        for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
            e1_lst.append(abs(actual - p1))
            e2_lst.append(abs(actual - p2))
        for e1, e2 in zip(e1_lst, e2_lst):
            d_lst.append(e1 - e2)
    elif (crit == "MAPE"):
        for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
            e1_lst.append(abs((actual - p1)/actual))
            e2_lst.append(abs((actual - p2)/actual))
        for e1, e2 in zip(e1_lst, e2_lst):
            d_lst.append(e1 - e2)
    elif (crit == "poly"):
        for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
            e1_lst.append(((actual - p1))**(power))
            e2_lst.append(((actual - p2))**(power))
        for e1, e2 in zip(e1_lst, e2_lst):
            d_lst.append(e1 - e2)    
    
    # Mean of d        
    mean_d = pd.Series(d_lst).mean()
    
    # Find autocovariance and construct DM test statistics
    def autocovariance(Xi, N, k, Xs):
        autoCov = 0
        T = float(N)
        for i in np.arange(0, N-k):
              autoCov += ((Xi[i+k])-Xs)*(Xi[i]-Xs)
        return (1/(T))*autoCov
    gamma = []
    for lag in range(0,h):
        gamma.append(autocovariance(d_lst,len(d_lst),lag,mean_d)) # 0, 1, 2
    V_d = (gamma[0] + 2*sum(gamma[1:]))/T
    DM_stat=V_d**(-0.5)*mean_d
    harvey_adj=((T+1-2*h+h*(h-1)/T)/T)**(0.5)
    DM_stat = harvey_adj*DM_stat
    # Find p-value
    p_value = 2*t.cdf(-abs(DM_stat), df = T - 1)
    # Construct named tuple for return
    dm_return = collections.namedtuple('dm_return', 'DM p_value')
    
    rt = dm_return(DM = DM_stat, p_value = p_value)
    
    return rt

  comp = re_compile("^\d+?\.\d+?$")


In [97]:
volatilities = ['voSQ' , 'voGK' , 'voRS']

def in_sample(eq , df):
    for vo in volatilities:
        model1 = arch_model(y = oil_train[vo] , x = oil_train[vo] , vol = 'EGARCH')
        results1 = model1.fit()
        forecast1 = results1.conditional_volatility.to_frame()
        msfe1_oil = ((oil_train[vo] - forecast1['cond_vol']) ** 2).mean()
        
        model2 = arch_model(y = oil_train[vo] , x = [[vo , 'bas' , 'Volume']] , vol = 'EGARCH')
        results2 = model2.fit()
        forecast2 = results2.conditional_volatility.to_frame()
        msfe2_oil = ((oil_train[vo] - forecast2['cond_vol'])**2).mean()
        
        model3 = arch_model(y = oil_train[vo] , x = pd.concat([oil_train[[vo , 'bas' , 'Volume']], eq[[vo , 'bas' , 'Volume']]], axis=1),vol = 'EGARCH')
        results3 = model3.fit()
        forecast3 = results3.conditional_volatility.to_frame()
        msfe3_oil = ((oil_train[vo] - forecast3['cond_vol'])**2).mean()
        
        x,df.loc[f'{vo}_E' , ('2-1' , 'pvalue')] = dm_test(oil_train[vo].to_numpy() , forecast1['cond_vol'].to_numpy() , forecast2['cond_vol'].to_numpy())
        x,df.loc[f'{vo}_E' , ('3-1' , 'pvalue')] = dm_test(oil_train[vo].to_numpy() , forecast1['cond_vol'].to_numpy() , forecast3['cond_vol'].to_numpy())
        x,df.loc[f'{vo}_E' , ('3-2' , 'pvalue')] = dm_test(oil_train[vo].to_numpy() , forecast3['cond_vol'].to_numpy() , forecast2['cond_vol'].to_numpy())
        
        print(dm_test(oil_train[vo].to_list() , forecast1['cond_vol'].to_numpy() , forecast2['cond_vol'].to_numpy()))
        
        df.loc[f'{vo}_E' , ('2-1' , 'Theil_U')] = msfe2_oil/msfe1_oil
        df.loc[f'{vo}_E' , ('3-1' , 'Theil_U')] = msfe3_oil/msfe1_oil
        df.loc[f'{vo}_E' , ('3-2' , 'Theil_U')] = msfe3_oil/msfe2_oil

        model1 = arch_model(y = eq[vo] , x = eq[vo] , vol = 'EGARCH')
        result1 = model1.fit()
        forecast = results1.conditional_volatility.to_frame()
        msfe1_eq = ((eq[vo] - forecast['cond_vol']) ** 2).mean()
        
        model2 = arch_model(y = eq[vo] , x = [[vo , 'bas' , 'Volume']] , vol = 'EGARCH')
        results2 = model2.fit()
        forecast = results2.conditional_volatility.to_frame()
        msfe2_eq = ((eq[vo] - forecast['cond_vol'])**2).mean()
        
        model3 = arch_model(y = eq[vo] , x = pd.concat([oil_train[[vo , 'bas' , 'Volume']], eq[[vo , 'bas' , 'Volume']]], axis=1),vol = 'EGARCH')
        results3 = model3.fit()
        forecast = results3.conditional_volatility.to_frame()
        msfe3_eq = ((eq[vo] - forecast['cond_vol'])**2).mean()
        
        x,df.loc[f'{vo}_O' , ('2-1' , 'pvalue')] = dm_test(oil_train[vo].to_list() , forecast1['cond_vol'].to_numpy() , forecast2['cond_vol'].to_numpy())
        x,df.loc[f'{vo}_O' , ('3-1' , 'pvalue')] = dm_test(oil_train[vo].to_list() , forecast1['cond_vol'].to_numpy() , forecast3['cond_vol'].to_numpy())
        x,df.loc[f'{vo}_O' , ('3-2' , 'pvalue')] = dm_test(oil_train[vo].to_list() , forecast3['cond_vol'].to_numpy() , forecast2['cond_vol'].to_numpy())
        
        df.loc[f'{vo}_O' , ('2-1' , 'Theil_U')] = msfe2_eq/msfe1_eq
        df.loc[f'{vo}_O' , ('3-1' , 'Theil_U')] = msfe3_eq/msfe1_eq
        df.loc[f'{vo}_O' , ('3-2' , 'Theil_U')] = msfe3_eq/msfe2_eq
 

In [None]:
in_sample(snp_train , in_sample_snp)
in_sample(nasdaq_train , in_sample_nasdaq)

In [99]:
in_sample_snp

Unnamed: 0_level_0,2-1,2-1,3-1,3-1,3-2,3-2
Unnamed: 0_level_1,Theil_U,pvalue,Theil_U,pvalue,Theil_U,pvalue
voSQ_E,1.0,,1.0,,1.0,
voSQ_O,1.991145,,1.991145,,1.0,
voGK_E,1.0,,1.0,,1.0,
voGK_O,3.000742,,3.000742,,1.0,
voRS_E,1.0,,1.0,,1.0,
voRS_O,2.922141,,2.922141,,1.0,


In [100]:
in_sample_nasdaq

Unnamed: 0_level_0,2-1,2-1,3-1,3-1,3-2,3-2
Unnamed: 0_level_1,Theil_U,pvalue,Theil_U,pvalue,Theil_U,pvalue
voSQ_E,1.0,,1.0,,1.0,
voSQ_O,1.991145,,1.991145,,1.0,
voGK_E,1.0,,1.0,,1.0,
voGK_O,3.000742,,3.000742,,1.0,
voRS_E,1.0,,1.0,,1.0,
voRS_O,2.922141,,2.922141,,1.0,


In [101]:
volatilities = ['voSQ' , 'voGK' , 'voRS']

def out_sample(eq, eq_fwd , oil_fwd , df ):
    length = len(eq_fwd)
    
    for vo in volatilities:
        model1 = arch_model(y = oil_train[vo] , x = oil_train[vo] , vol = 'EGARCH')
        results1 = model1.fit().forecast(horizon = length , method = 'simulation')
        forecast1 = np.reshape(results1.mean + np.sqrt(results1.variance) , (length,))
        msfe1_oil = ((oil_fwd[vo].tolist() - forecast1) ** 2).mean()
        
        model2 = arch_model(y = oil_train[vo] , x = [[vo , 'bas' , 'Volume']] , vol = 'EGARCH')
        results2 = model2.fit().forecast(horizon = length , method = 'simulation')
        forecast2 = np.reshape(results2.mean + np.sqrt(results2.variance) , (length,))
        msfe2_oil = ((oil_fwd[vo].tolist() - forecast2) ** 2).mean()
        
        model3 = arch_model(y = oil_train[vo] , x = pd.concat([oil_train[[vo , 'bas' , 'Volume']], eq[[vo , 'bas' , 'Volume']]], axis=1),vol = 'EGARCH')
        results3 = model3.fit().forecast(horizon = length , method = 'simulation')
        forecast3 = np.reshape(results3.mean + np.sqrt(results3.variance) , (length,))
        msfe3_oil = ((oil_fwd[vo].tolist() - forecast3) ** 2).mean()
        
        x,df.loc[f'{vo}_E' , ('2-1' , 'pvalue')] = dm_test(oil_fwd[vo].to_list() , forecast1 , forecast2)
        x,df.loc[f'{vo}_E' , ('3-1' , 'pvalue')] = dm_test(oil_fwd[vo].to_list() , forecast1 , forecast3)
        x,df.loc[f'{vo}_E' , ('3-2' , 'pvalue')] = dm_test(oil_fwd[vo].to_list() , forecast3 , forecast2)
        
        df.loc[f'{vo}_E' , ('2-1' , 'Theil_U')] = msfe2_oil/msfe1_oil
        df.loc[f'{vo}_E' , ('3-1' , 'Theil_U')] = msfe3_oil/msfe1_oil
        df.loc[f'{vo}_E' , ('3-2' , 'Theil_U')] = msfe3_oil/msfe2_oil

        model1 = arch_model(y = eq[vo] , x = eq[vo] , vol = 'EGARCH')
        results1 = model1.fit().forecast(horizon = length , method = 'simulation')
        forecast1 = np.reshape(results1.mean + np.sqrt(results1.variance) , (length,))
        msfe1_eq = ((eq_fwd[vo].tolist() - forecast1) ** 2).mean()
        
        model2 = arch_model(y = eq[vo] , x = [[vo , 'bas' , 'Volume']] , vol = 'EGARCH')
        results2 = model2.fit().forecast(horizon = length , method = 'simulation')
        forecast2 = np.reshape(results2.mean + np.sqrt(results2.variance) , (length,))
        msfe2_eq = ((eq_fwd[vo].tolist() - forecast2) ** 2).mean()
        
        model3 = arch_model(y = eq[vo] , x = pd.concat([oil_train[[vo , 'bas' , 'Volume']], eq[[vo , 'bas' , 'Volume']]], axis=1),vol = 'EGARCH')
        results3 = model3.fit().forecast(horizon = length , method = 'simulation')
        forecast3 = np.reshape(results3.mean + np.sqrt(results3.variance) , (length,))
        msfe3_eq = ((eq_fwd[vo].tolist() - forecast3) ** 2).mean()
        
        x,df.loc[f'{vo}_O' , ('2-1' , 'pvalue')] = dm_test(oil_fwd[vo].to_list() , forecast1 , forecast2)
        x,df.loc[f'{vo}_O' , ('3-1' , 'pvalue')] = dm_test(oil_fwd[vo].to_list() , forecast1 , forecast3)
        x,df.loc[f'{vo}_O' , ('3-2' , 'pvalue')] = dm_test(oil_fwd[vo].to_list() , forecast3 , forecast2)
        
        df.loc[f'{vo}_O' , ('2-1' , 'Theil_U')] = msfe2_eq/msfe1_eq
        df.loc[f'{vo}_O' , ('3-1' , 'Theil_U')] = msfe3_eq/msfe1_eq
        df.loc[f'{vo}_O' , ('3-2' , 'Theil_U')] = msfe3_eq/msfe2_eq
 

In [None]:
out_sample(snp_train , snp.loc[len(snp_train) + 1:int(0.85*len(snp))] , oil.loc[len(snp_train) + 1:int(0.85*len(snp))] , out_sample_snp_85 )
out_sample(snp_train , snp.loc[len(snp_train) + 1:int(0.90*len(snp))] , oil.loc[len(snp_train) + 1:int(0.90*len(snp))] , out_sample_snp_90 )
out_sample(snp_train , snp.loc[len(snp_train) + 1:int(len(snp))] , oil.loc[len(snp_train) + 1:int(len(snp))] , out_sample_snp_100 )
out_sample(snp_train , nasdaq.loc[len(snp_train) + 1:int(0.85*len(snp))] , oil.loc[len(snp_train) + 1:int(0.85*len(snp))] , out_sample_nasdaq_85 )
out_sample(snp_train , nasdaq.loc[len(snp_train) + 1:int(0.90*len(snp))] , oil.loc[len(snp_train) + 1:int(0.90*len(snp))] , out_sample_nasdaq_90 )
out_sample(snp_train , nasdaq.loc[len(snp_train) + 1:int(len(snp))] , oil.loc[len(snp_train) + 1:int(len(snp))] , out_sample_nasdaq_100 )

In [103]:
out_sample_snp_85

Unnamed: 0_level_0,2-1,2-1,3-1,3-1,3-2,3-2
Unnamed: 0_level_1,Theil_U,pvalue,Theil_U,pvalue,Theil_U,pvalue
voSQ_E,1.0,0.300042,1.0,0.320934,1.0,0.270575
voSQ_O,0.999997,0.319277,0.999997,0.317141,1.0,0.303291
voGK_E,1.0,0.318052,1.0,0.317381,1.0,0.317379
voGK_O,1.0,0.415238,1.0,0.972055,1.0,0.505238
voRS_E,1.0,0.317369,1.0,0.317373,1.0,0.31737
voRS_O,1.0,0.317396,1.0,0.317396,1.0,0.317396


In [104]:
out_sample_snp_90

Unnamed: 0_level_0,2-1,2-1,3-1,3-1,3-2,3-2
Unnamed: 0_level_1,Theil_U,pvalue,Theil_U,pvalue,Theil_U,pvalue
voSQ_E,1.0,0.300042,1.0,0.320934,1.0,0.270575
voSQ_O,0.999997,0.319277,0.999997,0.317141,1.0,0.303291
voGK_E,1.0,0.318052,1.0,0.317381,1.0,0.317379
voGK_O,1.0,0.415238,1.0,0.972055,1.0,0.505238
voRS_E,1.0,0.317369,1.0,0.317373,1.0,0.31737
voRS_O,1.0,0.317396,1.0,0.317396,1.0,0.317396


In [105]:
out_sample_snp_100

Unnamed: 0_level_0,2-1,2-1,3-1,3-1,3-2,3-2
Unnamed: 0_level_1,Theil_U,pvalue,Theil_U,pvalue,Theil_U,pvalue
voSQ_E,1.0,0.300042,1.0,0.320934,1.0,0.270575
voSQ_O,0.999997,0.319277,0.999997,0.317141,1.0,0.303291
voGK_E,1.0,0.318052,1.0,0.317381,1.0,0.317379
voGK_O,1.0,0.415238,1.0,0.972055,1.0,0.505238
voRS_E,1.0,0.317369,1.0,0.317373,1.0,0.31737
voRS_O,1.0,0.317396,1.0,0.317396,1.0,0.317396


In [106]:
out_sample_nasdaq_85

Unnamed: 0_level_0,2-1,2-1,3-1,3-1,3-2,3-2
Unnamed: 0_level_1,Theil_U,pvalue,Theil_U,pvalue,Theil_U,pvalue
voSQ_E,1.0,0.300042,1.0,0.320934,1.0,0.270575
voSQ_O,0.999997,0.319277,0.999997,0.317141,1.0,0.303291
voGK_E,1.0,0.318052,1.0,0.317381,1.0,0.317379
voGK_O,1.0,0.415238,1.0,0.972055,1.0,0.505238
voRS_E,1.0,0.317369,1.0,0.317373,1.0,0.31737
voRS_O,1.0,0.317396,1.0,0.317396,1.0,0.317396


In [107]:
out_sample_nasdaq_90

Unnamed: 0_level_0,2-1,2-1,3-1,3-1,3-2,3-2
Unnamed: 0_level_1,Theil_U,pvalue,Theil_U,pvalue,Theil_U,pvalue
voSQ_E,1.0,0.300042,1.0,0.320934,1.0,0.270575
voSQ_O,0.999997,0.319277,0.999997,0.317141,1.0,0.303291
voGK_E,1.0,0.318052,1.0,0.317381,1.0,0.317379
voGK_O,1.0,0.415238,1.0,0.972055,1.0,0.505238
voRS_E,1.0,0.317369,1.0,0.317373,1.0,0.31737
voRS_O,1.0,0.317396,1.0,0.317396,1.0,0.317396


In [108]:
out_sample_nasdaq_100

Unnamed: 0_level_0,2-1,2-1,3-1,3-1,3-2,3-2
Unnamed: 0_level_1,Theil_U,pvalue,Theil_U,pvalue,Theil_U,pvalue
voSQ_E,1.0,0.300042,1.0,0.320934,1.0,0.270575
voSQ_O,0.999997,0.319277,0.999997,0.317141,1.0,0.303291
voGK_E,1.0,0.318052,1.0,0.317381,1.0,0.317379
voGK_O,1.0,0.415238,1.0,0.972055,1.0,0.505238
voRS_E,1.0,0.317369,1.0,0.317373,1.0,0.31737
voRS_O,1.0,0.317396,1.0,0.317396,1.0,0.317396
