# Q1

In [1]:
import pandas as pd
import numpy as np
import datetime
import qrpm_funcs as qf
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
lastday=qf.LastYearEnd()
#Swiss franc, pound sterling, Japanese Yen
seriesnames=['DEXUSUK']
cdates,ratematrix=qf.GetFREDMatrix(seriesnames,enddate=lastday)
multipliers=[1]
lgdates,difflgs=qf.levels_to_log_returns(cdates,ratematrix,multipliers)
ind = lgdates.index('2020-12-31')
T = np.array(difflgs[:ind+1]) # Learning set
V = np.array(difflgs[ind+1:]) # Hold-out set
n = len(V)

In [3]:
from scipy.stats import kurtosis
ans = qf.Garch11Fit([0.7,0.2,1],T)
a = ans[0]
b = ans[1]
c = ans[2]
print(a,b,c)

sigma = [np.std(T)]
for i in range(n):
    sigma.append(np.sqrt(c+a*(V[i])**2+b*(sigma[-1])**2))
res = []
for i in range(n):
    res.append(V[i]/sigma[i])
print('GARCH, Variance is ', np.var(res))
print('GARCH, Kurtosis is', kurtosis(res)[0])

0.08710546019265984 0.9087550165524291 3.521266884224251e-07
GARCH, Variance is  0.8984643876197116
GARCH, Kurtosis is 1.4016648997449623


In [4]:
# TGARCH
def TGarch11Fit(initparams,InputData):
    import scipy.optimize as scpo
    import numpy as np
    array_data=np.array(InputData)
    def GarchMaxLike(params):
        import numpy as np        
        #Implement maximum likelihood formula Chapter 9
        xa,xb,xc,xd=params
        if xa>10: xa=10
        if xb>10: xb=10
        if xc>10: xc=10
        if xd>10: xd=10
        #Use trick to force a and b between 0 and .999;
        #(a+b) less than .999; and c>0
        a=.999*np.exp(xa)/(1+np.exp(xa))
        b=(.999-a)*np.exp(xb)/(1+np.exp(xb))
        c=np.exp(xc)
        d=np.exp(xd)
        t=len(array_data)
        minimal=10**(-20)
        vargarch=np.zeros(t)
        
        vargarch[0]=np.var(array_data)
        overallmean=np.mean(array_data)
        
        #Compute GARCH(1,1) var's from data given parameters
        for i in range(1,t):
            #Note offset - i-1 observation of data
            #is used for i estimate of variance
            delta = int(array_data[i-1]-overallmean < 0)
            vargarch[i]=c+b*vargarch[i-1]+\
            a*(array_data[i-1]-overallmean)**2 + d*delta*(array_data[i-1]-overallmean)**2 
            if vargarch[i]<=0:
                vargarch[i]=minimal        
        # sum logs of variances
        logsum=np.sum(np.log(vargarch))
        # sum yi^2/sigma^2
        othersum=0
        for i in range(t):
            othersum += ((array_data[i]-overallmean)**2)/vargarch[i]
        #Actually -2 times objective since we are minimizing
        return(logsum + othersum)
    #End of GarchMaxLike
    #Transform parameters to the form used in GarchMaxLike
    #This ensures parameters are in bounds 0<a,b<1, 0<c
    aparam=np.log(initparams[0]/(.999-initparams[0]))
    bparam=np.log(initparams[1]/(.999-initparams[0]-initparams[1]))
    cparam=np.log(initparams[2])
    dparam=np.log(initparams[3])
    xinit=[aparam,bparam,cparam,dparam]
    #Run the minimization. Constraints are built-in.
    results = scpo.minimize(GarchMaxLike,xinit,method='CG')
    aparam,bparam,cparam,dparam=results.x
    a=.999*np.exp(aparam)/(1+np.exp(aparam))
    b=(.999-a)*np.exp(bparam)/(1+np.exp(bparam))
    c=np.exp(cparam)
    d=np.exp(dparam)
    return([a,b,c,d])

In [8]:
ans2 = TGarch11Fit([0.6,0.3,2,2],T)
a = ans2[0]
b = ans2[1]
c = ans2[2]
d = ans2[3]
print(a,b,c,d)
sigma = [np.std(T)]
for i in range(n):
    delta = int(V[i] < 0)
    sigma.append(np.sqrt(c+a*(V[i])**2+b*(sigma[-1])**2+d*delta*(V[i])**2))
res = []
for i in range(n):
    res.append(V[i]/sigma[i])
print('TGARCH, Variance is ', np.var(res))
print('TGARCH, Kurtosis is', kurtosis(res)[0])

0.08710831449763716 0.9087542180622161 3.5210641628049344e-07 3.653038921575185e-08
TGARCH, Variance is  0.8984617058728512
TGARCH, Kurtosis is 1.4016847896162554


#### From the above results, we can see that TGARCH gives almost the same results on the variance and kurtosis of $y_i/\sigma_i$ as GARCH model on the holdout set.

# Q2

In [1]:
import pandas as pd
import qrpm_funcs as qf
import numpy as np
from tabulate import tabulate
import matplotlib.pyplot as plt
import scipy.stats as spst

plt.rcParams['figure.dpi']= 300

lastday=qf.LastYearEnd()
#Swiss franc, pound sterling, Japanese Yen
seriesnames=['DEXSZUS','DEXUSUK','DEXJPUS']
cdates,ratematrix=qf.GetFREDMatrix(seriesnames,enddate=lastday)
multipliers=[-1,1,-1]

lgdates,difflgs=qf.levels_to_log_returns(cdates,ratematrix,multipliers)
ind = lgdates.index('2020-12-31')
L = np.array(difflgs[:ind+1]) #Learning set
V = np.array(difflgs[ind+1:]) #Hold-out set

In [2]:
from numpy.linalg import inv
T = len(L)
X_c = L.T@(np.identity(T)-np.ones((T,1))@np.ones((1,T))/T)
C = X_c@X_c.T/T
S = np.diag([np.sqrt(i) for i in np.diag(C)])
R1 = inv(S)@C@inv(S)
print("Correlation matrix using (equal-weighted) historical data:\n", R1)

Correlation matrix using (equal-weighted) historical data:
 [[1.         0.57254629 0.47623944]
 [0.57254629 1.         0.30082627]
 [0.47623944 0.30082627 1.        ]]


In [3]:
L = L[::-1]
h = 1/252
labda = 2**(-h)
p_list = [(1-labda)*labda**i/(1-labda**T) for i in range(T)]
P = np.diag(p_list)
X_l = L.T@(np.identity(T)-P@np.ones((T,1))@np.ones((1,T)))
C_l = X_l@P@X_l.T
S = np.diag([np.sqrt(i) for i in np.diag(C_l)])
R2 = inv(S)@C_l@inv(S)
print("Correlation matrix using EWMA:\n", R2)

Correlation matrix using EWMA:
 [[1.         0.41162977 0.5366284 ]
 [0.41162977 1.         0.25379383]
 [0.5366284  0.25379383 1.        ]]


In [4]:
R3 = np.array(pd.DataFrame(V).corr())
print("Correlation matrix on V:\n", R3)

Correlation matrix on V:
 [[1.         0.49770195 0.59386854]
 [0.49770195 1.         0.24884904]
 [0.59386854 0.24884904 1.        ]]


In [5]:
# Box M test
def BoxM(T1,T2,s1,s2):
    from scipy import stats
    #Tests for equality of two covariance matrices, s1 and s2
    #T1 and T2 are numbers of observations for s1 and s2
    #Returns M statistic and p-value

    #Make sure dimension is common
    if len(s1)!=len(s2):
        print("Error: different dimensions in Box M Test:",len(s1),len(s2))
        return(0,0)
    
    #Matrices are pxp
    p=len(s1)

    #Form the combined matrix
    scomb=(T1*s1+T2*s2)/(T1+T2)

    #Box M statistic
    Mstat=(T1+T2-2)*np.log(np.linalg.det(scomb))-(T1-1)* \
        np.log(np.linalg.det(s1))-(T2-1)*np.log(np.linalg.det(s2))

    #Multipliers from equation (49) in Box 1949.
    A1=(2*p**2+3*p-1)/(6*(p+1))
    A1*=(1/(T1-1)+1/(T2-1)-1/(T1+T2-2))

    A2=(p-1)*(p+2)/6
    A2*=(1/(T1-1)**2+1/(T2-1)**2-1/(T1+T2-2)**2)

    discrim=A2-A1**2

    #Degrees of freedom
    df1=p*(p+1)/2

    if discrim <= 0:
        #Use chi-square (Box 1949 top p. 329)
        test_value=Mstat*(1-A1)
        p_value=1-spst.chi2.cdf(test_value,df1)
    else:
        #Use F Test (Box 1949 equation (68))
        df2=(df1+2)/discrim
        b=df1/(1-A1-(df1/df2))
        test_value=Mstat/b
        p_value=1-stats.f.cdf(test_value,df1,df2)
    
    return(test_value,p_value)


_, p_value1 = BoxM(T,len(V),R1,R3)
print("p-value of equal-weighted and R_V:",p_value1)
_, p_value2 = BoxM(T,len(V),R2,R3)
print("p-value of EWMA and R_V:",p_value2)
print(p_value1<p_value2)

p-value of equal-weighted and R_V: 0.024969986917747544
p-value of EWMA and R_V: 0.24283562356226296
True


#### EWMA has higher p value.

In [8]:
# (b)
def find_p(h):
    h = 1/h
    labda = 2**(-h)
    p_list = [(1-labda)*labda**i/(1-labda**T) for i in range(T)]
    P = np.diag(p_list)
    X_l = L.T@(np.identity(T)-P@np.ones((T,1))@np.ones((1,T)))
    C_l = X_l@P@X_l.T
    S = np.diag([np.sqrt(i) for i in np.diag(C_l)])
    R2 = inv(S)@C_l@inv(S)
    _, p_value = BoxM(T,len(V),R2,R3)
    return p_value

h_list = list(range(10,501,10))
pValue = []
for i in h_list:
    pValue.append(find_p(i))
ans = h_list[pValue.index(max(pValue))]
print("Optimal days so that EWMA correlation matrix has the highest p-value with R_V is:\n", ans)

Optimal days so that EWMA correlation matrix has the highest p-value with R_V is:
 180
