In [1]:
## takes into account more than just past prices or past residuals --- MAX models

In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels.graphics.tsaplots as sgt
import statsmodels.tsa.stattools as sts
from statsmodels.tsa.arima_model import ARIMA
from scipy.stats.distributions import chi2

from math import sqrt
sns.set()

In [30]:
raw_csv = pd.read_csv('Index2018.csv')
df_copy = raw_csv.copy()
df_copy['date'] = pd.to_datetime(df_copy['date'],dayfirst=True)
df_copy.set_index('date',inplace=True)
df_copy = df_copy.asfreq('b')
df_copy = df_copy.fillna(method='ffill')

In [31]:
### Analyzing FTSE column as market_value(price)

df_copy['market_value'] = df_copy['ftse']

In [32]:
df_copy.drop(['spx','dax','ftse','nikkei'],inplace=True,axis=1)

In [33]:
df_copy.head()

Unnamed: 0_level_0,market_value
date,Unnamed: 1_level_1
1994-01-07,3445.98
1994-01-10,3440.58
1994-01-11,3413.77
1994-01-12,3372.02
1994-01-13,3360.01


In [34]:
# split 

size = int(len(df_copy)*0.8)
df = df_copy.iloc[:size]
df_test = df_copy.iloc[size:]

In [35]:
def llr_test(model_1,model_2,df=1):
    ''' can only use this when order of simple model is nested inside the complex model'''
    L1 = model_1.fit().llf
    L2 = model_2.fit().llf
    LR = (2*(L2-L1))
    p = chi2.sf(LR,df).round(3)
    return p
    

In [36]:
df['returns']=df['market_value'].pct_change(1).mul(100)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['returns']=df['market_value'].pct_change(1).mul(100)


In [37]:
df.columns

Index(['market_value', 'returns'], dtype='object')

In [38]:
raw_csv['spx'].iloc[:size]

0        469.900000
1        475.270000
2        474.130000
3        474.170000
4        472.470000
           ...     
5016    1553.278930
5017    1563.071269
5018    1568.607909
5019    1587.731827
5020    1593.369863
Name: spx, Length: 5021, dtype: float64

In [39]:
df['spx'] = raw_csv['spx'].iloc[:size]
# df['dax'] =raw_csv['dax'].iloc[:size]
# df['nikkei'] =raw_csv['nikkei'].iloc[:size]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['spx'] = raw_csv['spx'].iloc[:size]


In [46]:
raw_csv['spx'][:size].values

array([ 469.9     ,  475.27    ,  474.13    , ..., 1568.607909,
       1587.731827, 1593.369863])

In [42]:
df.head()

Unnamed: 0_level_0,market_value,returns,spx
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1994-01-07,3445.98,,
1994-01-10,3440.58,-0.156704,
1994-01-11,3413.77,-0.779229,
1994-01-12,3372.02,-1.222988,
1994-01-13,3360.01,-0.356166,


### ARIMAX(1,1,1)

In [47]:
## including spx as exogenous
model_ar_1_i_1_ma_1_Xspx = ARIMA(df.market_value,exog=raw_csv['spx'][:size].values,order=(1,1,1))
result_ar_1_i_1_ma_1_Xspx=model_ar_1_i_1_ma_1_Xspx.fit()
result_ar_1_i_1_ma_1_Xspx.summary()

0,1,2,3
Dep. Variable:,D.market_value,No. Observations:,5020.0
Model:,"ARIMA(1, 1, 1)",Log Likelihood,-27603.561
Method:,css-mle,S.D. of innovations,59.132
Date:,"Tue, 19 Jan 2021",AIC,55217.122
Time:,13:59:16,BIC,55249.728
Sample:,01-10-1994,HQIC,55228.547
,- 04-05-2013,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.5591,2.651,-0.211,0.833,-5.755,4.636
x1,0.0010,0.002,0.440,0.660,-0.004,0.006
ar.L1.D.market_value,0.7462,0.070,10.609,0.000,0.608,0.884
ma.L1.D.market_value,-0.7923,0.064,-12.317,0.000,-0.918,-0.666

0,1,2,3,4
,Real,Imaginary,Modulus,Frequency
AR.1,1.3402,+0.0000j,1.3402,0.0000
MA.1,1.2622,+0.0000j,1.2622,0.0000


In [49]:
## including nekkei as exogenous
model_ar_1_i_1_ma_1_Xspx = ARIMA(df.market_value,exog=raw_csv['nikkei'][:size].values,order=(1,1,1))
result_ar_1_i_1_ma_1_Xspx=model_ar_1_i_1_ma_1_Xspx.fit()
result_ar_1_i_1_ma_1_Xspx.summary()


0,1,2,3
Dep. Variable:,D.market_value,No. Observations:,5020.0
Model:,"ARIMA(1, 1, 1)",Log Likelihood,-27603.412
Method:,css-mle,S.D. of innovations,59.131
Date:,"Tue, 19 Jan 2021",AIC,55216.823
Time:,14:00:13,BIC,55249.429
Sample:,01-10-1994,HQIC,55228.249
,- 04-05-2013,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-1.3678,2.448,-0.559,0.576,-6.165,3.430
x1,0.0001,0.000,0.815,0.415,-0.000,0.000
ar.L1.D.market_value,0.7472,0.070,10.629,0.000,0.609,0.885
ma.L1.D.market_value,-0.7934,0.064,-12.350,0.000,-0.919,-0.668

0,1,2,3,4
,Real,Imaginary,Modulus,Frequency
AR.1,1.3383,+0.0000j,1.3383,0.0000
MA.1,1.2604,+0.0000j,1.2604,0.0000


In [50]:
## inclusing dax as exog
model_ar_1_i_1_ma_1_Xspx = ARIMA(df.market_value,exog=raw_csv['dax'][:size].values,order=(1,1,1))
result_ar_1_i_1_ma_1_Xspx=model_ar_1_i_1_ma_1_Xspx.fit()
result_ar_1_i_1_ma_1_Xspx.summary()

# not sig because maybe we have not included all the indexes

0,1,2,3
Dep. Variable:,D.market_value,No. Observations:,5020.0
Model:,"ARIMA(1, 1, 1)",Log Likelihood,-27603.664
Method:,css-mle,S.D. of innovations,59.134
Date:,"Tue, 19 Jan 2021",AIC,55217.328
Time:,14:00:37,BIC,55249.934
Sample:,01-10-1994,HQIC,55228.754
,- 04-05-2013,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.6972,2.106,0.331,0.741,-3.431,4.825
x1,-2.642e-05,0.000,-0.066,0.947,-0.001,0.001
ar.L1.D.market_value,0.7478,0.070,10.639,0.000,0.610,0.886
ma.L1.D.market_value,-0.7939,0.064,-12.358,0.000,-0.920,-0.668

0,1,2,3,4
,Real,Imaginary,Modulus,Frequency
AR.1,1.3373,+0.0000j,1.3373,0.0000
MA.1,1.2596,+0.0000j,1.2596,0.0000


### SARIMAX

In [52]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [55]:
model_sarimax = SARIMAX(df.market_value,exog=raw_csv['spx'][:size].values,order=(1,0,1),seasonal_order=(2,0,1,5))
result_sarimax=model_sarimax.fit()
result_sarimax.summary()

0,1,2,3
Dep. Variable:,market_value,No. Observations:,5021.0
Model:,"SARIMAX(1, 0, 1)x(2, 0, 1, 5)",Log Likelihood,-27485.333
Date:,"Tue, 19 Jan 2021",AIC,54984.665
Time:,14:44:44,BIC,55030.315
Sample:,01-07-1994,HQIC,55000.662
,- 04-05-2013,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
x1,1.0907,0.032,33.859,0.000,1.028,1.154
ar.L1,0.9998,0.000,2600.099,0.000,0.999,1.001
ma.L1,-0.1038,0.009,-11.683,0.000,-0.121,-0.086
ar.S.L5,0.9665,0.010,97.642,0.000,0.947,0.986
ar.S.L10,0.0331,0.008,4.038,0.000,0.017,0.049
ma.S.L5,-0.9992,0.008,-128.822,0.000,-1.014,-0.984
sigma2,3319.0237,35.997,92.202,0.000,3248.470,3389.577

0,1,2,3
Ljung-Box (Q):,193.06,Jarque-Bera (JB):,6450.2
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,2.26,Skew:,-0.31
Prob(H) (two-sided):,0.0,Kurtosis:,8.52
