In [12]:
%matplotlib inline
import math
import pandas as pd
from tsmom_model import *
import numpy as np
from datetime import datetime
import statsmodels.tsa.vector_ar.var_model as VAR
import PPCA_MR as p
import statsmodels.api as sm

In [13]:
import pylab
sns.set(font_scale=2)
pylab.rcParams['figure.figsize'] = (24, 18)

# Hand cleansed
Hand cleaned to removed duplicates

In [14]:
cleansed=load_amihud_markets_price()

# Amihud methodology

Amihud (2002) developed the illiquidity measure expressed by:

$$ Amihud =\frac{1}{N} \sum \frac{|r_t|}{\text{dvol}_t} $$

where $r_t$ is the return on day $t$ and $Volume_t$ is the dollar volume on day $t$. 

The benefit of taking the daily return is to weight the move by change of price and the volume.


In [15]:
amihud=calculate_amihud_liquidity(cleansed)[:'2016'] 

# Academic papers
Based on Illin, A., Raiko, T. 2010 paper called "Practical Approaches to Principal Component Analysis in the Presence of Missing Values"

Tipping and Bishop (1999) introducted probablistic formulation of PCA (PPCA)


Use a flavor of Probabilistic PCA that is robust to missing data (see Ilin and Raiko 2010). Rather than the vanilla one-shot SVD, PPCA uses an iterative EM procedure/fixed point algorithm. From an initial guess, it’ll alternatively interpolate missing data and update the components until convergence.

On a side note, PPCA interpolates using information from all series, making it a multivariate interpolator. Be careful though, fitting the model to too many components will likely lead to overfitting problems in the interpolated data.

Also note that I am using the standardization that is mentioned in Korajecyk and Sadka (2008) of expanding zscores


In [16]:
#data=calc_zscore_ew(amihud.dropna(how='all')).dropna(how='all')
data=calc_zscore_expanding_window(amihud.dropna(how='all'),3).dropna(how='all')

In [17]:
sector_zscores={}
d_map=sector_map(amihud)
for sect in d_map.keys():
    sector_zscores[sect]=data[d_map[sect]].dropna(how='all')

In [18]:
sector_PC={}
sector_variance_explained=pd.DataFrame()

no_pc=3
for sect in d_map.keys():
    try: 
        PPCA=p.PPCA()
        PPCA.fit(np.array(sector_zscores[sect]),d=no_pc)
        sector_variance_explained[sect] = PPCA.var_exp
        pcs=pd.DataFrame()
        for i in range(0,no_pc,1):
            x=pd.DataFrame(PPCA.C.T[i]*PPCA.data).sum(axis=1)
            x.set_axis(sector_zscores[sect].index)
            pcs[i]=x
        sector_PC[sect] = pcs
    except:
        print sect

  del sys.path[0]


In [19]:
no_fx = [i for i in data.columns if i not in d_map['Currencies']]
PPCA=p.PPCA()
PPCA.fit(np.array(data[no_fx]),d=no_pc)
sector_variance_explained[sect] = PPCA.var_exp
pcs=pd.DataFrame()
for i in range(0,no_pc,1):
    x=pd.DataFrame(PPCA.C.T[i]*PPCA.data).sum(axis=1)
    x.set_axis(sector_zscores[sect].index)
    pcs[i]=x
sector_PC['Total'] = pcs

  


In [20]:
prin_factors=pd.DataFrame()
for sector in  ['Agriculturals',
                 'Energies',
                 'Equities',
                 'Metals',
                 'Fixed Income',
                 'Total']:
    prin_factors['Amihud '+sector]=sector_PC[sector].mean(axis=1)

In [21]:
res = VAR.VAR(endog=prin_factors).fit()

In [22]:
print res.summary()

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Sat, 19, May, 2018
Time:                     13:18:56
--------------------------------------------------------------------
No. of Equations:         6.00000    BIC:                   -6.58698
Nobs:                     208.000    HQIC:                  -6.98840
Log likelihood:          -973.701    FPE:                0.000702580
AIC:                     -7.26090    Det(Omega_mle):     0.000576030
--------------------------------------------------------------------
Results for equation Amihud Agriculturals
                             coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------------
const                           0.340452         0.111744            3.047           0.003
L1.Amihud Agriculturals         0.819492         0.045768           17.905           0.000
L1.Am

# FHT method

In [36]:
FHT=calculate_FHT(cleansed)

In [37]:
data=calc_zscore_expanding_window(FHT.dropna(how='all'),3).dropna(how='all')

In [38]:
sector_zscores={}
d_map=sector_map(amihud)
for sect in d_map.keys():
    sector_zscores[sect]=data[d_map[sect]].dropna(how='all')

In [39]:
sector_PC={}
sector_variance_explained=pd.DataFrame()

no_pc=3
for sect in d_map.keys():
    try: 
        PPCA=p.PPCA()
        PPCA.fit(np.array(sector_zscores[sect]),d=no_pc)
        sector_variance_explained[sect] = PPCA.var_exp
        pcs=pd.DataFrame()
        for i in range(0,no_pc,1):
            x=pd.DataFrame(PPCA.C.T[i]*PPCA.data).sum(axis=1)
            x.set_axis(sector_zscores[sect].index)
            pcs[i]=x
        sector_PC[sect] = pcs
    except:
        print sect

  del sys.path[0]


In [40]:
PPCA=p.PPCA()
PPCA.fit(np.array(data),d=no_pc)
sector_variance_explained[sect] = PPCA.var_exp
pcs=pd.DataFrame()
for i in range(0,no_pc,1):
    x=pd.DataFrame(PPCA.C.T[i]*PPCA.data).sum(axis=1)
    x.set_axis(sector_zscores[sect].index)
    pcs[i]=x
sector_PC['Total'] = pcs


  import sys


In [41]:
prin_factors_FHT=pd.DataFrame()
for sector in sector_PC.keys():
    prin_factors_FHT[sector]=sector_PC[sector].mean(axis=1)

In [42]:
resFHT = VAR.VAR(endog=prin_factors_FHT['2000':]).fit()
print resFHT.summary()

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Sat, 19, May, 2018
Time:                     13:22:16
--------------------------------------------------------------------
No. of Equations:         7.00000    BIC:                   -1.91222
Nobs:                     204.000    HQIC:                  -2.45462
Log likelihood:          -1682.29    FPE:                  0.0594395
AIC:                     -2.82308    Det(Omega_mle):       0.0454083
--------------------------------------------------------------------
Results for equation Agriculturals
                      coefficient       std. error           t-stat            prob
-----------------------------------------------------------------------------------
const                   -0.232511         0.088295           -2.633           0.009
L1.Agriculturals         0.360961         0.071676            5.036           0.000
L1.Currencies            0.042838       