Om de cel hier onder te runnen moet er een package geinstalleerd worden via anaconda prompt. Deze package zorgt ervoor dat we functies uit andere scripts kunnen gerbuiken in dit script.
<break>
Tik het volgende in anaconda prompt:
<br \>
**pip install ipynb**

In [1]:
%%capture
import pandas as pd
import numpy as np
from numpy.linalg import inv
from numpy.linalg import det
from pandas_datareader import data as wb
import matplotlib.pyplot as plt
from ipynb.fs.full.Dataset import getdata
from ipynb.fs.full.Dataset import getreturns
from scipy.optimize import minimize
import warnings
warnings.filterwarnings('ignore')
import scipy.stats as st
from scipy.special import ndtri

###### Hier onder wordt de dataset verkregen. Ik gebruik 2010 tot en met 2015 als mijn in-sample data.
<break>
Hiervoor gebruik een functie die in een ander script (Dataset) is geschreven

In [2]:
df = getreturns()
mInSampleReturns = df.loc[:"2015"]
mOutSampleReturns = df.loc["2016":]

In [3]:
def Parameters_GARCH_Normal(vReturns):
    dOmega = 0.1
    dAlpha = 0.1
    dBeta = 0.8    
    vTheta = np.array([dOmega, dAlpha, dBeta])
    
    def LL_GARCH_Normal(vTheta,returns):
        dOmega = vTheta[0]
        dAlpha = vTheta[1]
        dBeta  = vTheta[2]
        iT=len(returns)
        vH=np.zeros(iT)
        
        for t in range(iT):
            if t == 0:
                vH[t] = np.var(returns) 
            else:
                vH[t] = dOmega + dAlpha*returns[t-1]**2 + dBeta * vH[t-1]    
        
        vLogPdf = -0.5 * np.log( 2 * np.pi * vH[1:] ) - 0.5 * ( returns[1:]**2 / vH[1:] )
        return -np.sum(vLogPdf)
    
    def Optimizer(returns, initials, function, bnds):
        result = minimize(function, initials, args=(returns), \
                          options ={'eps':1e-09, 'disp': True, 'maxiter':200}, method='SLSQP',bounds=bnds)
        return result
    bounds = ((0, 1), (0, 1), (0, 1))
    result=Optimizer(vReturns, vTheta, LL_GARCH_Normal, bounds)
    return result.x, -result.fun, result.success

In [4]:
(parameter1_N,likelihood1_N,success1_N)=Parameters_GARCH_Normal(np.array(mInSampleReturns.iloc[:,0]))
print(parameter1_N)
print(likelihood1_N)
print(success1_N)

Optimization terminated successfully.    (Exit mode 0)
            Current function value: 2516.193369258651
            Iterations: 12
            Function evaluations: 68
            Gradient evaluations: 12
[ 0.05195353  0.09350152  0.87921127]
-2516.193369258651
True


In [5]:
(parameter2_N,likelihood2_N,success2_N)=Parameters_GARCH_Normal(np.array(mInSampleReturns.iloc[:,1]))
print(parameter2_N)
print(likelihood2_N)
print(success2_N)

Optimization terminated successfully.    (Exit mode 0)
            Current function value: 2238.592048513497
            Iterations: 12
            Function evaluations: 69
            Gradient evaluations: 12
[ 0.03347424  0.09929269  0.8768056 ]
-2238.592048513497
True


#### Hier schat ik de waardes van de sigma's over tijd voor alle 2 de tijdreeksen

In [6]:
def ComputeSigma2GARCH(vTheta1_N,vTheta2_N,mReturns):
    iT = mReturns.shape[1]
    iDimension = mReturns.shape[0]
    mH = np.zeros((iDimension,iT))
    dOmega1 = vTheta1_N[0]
    dOmega2 = vTheta2_N[0]
    dAlpha1 = vTheta1_N[1]
    dAlpha2 = vTheta2_N[1]
    dBeta1 = vTheta1_N[2]
    dBeta2 = vTheta2_N[2]
    for t in range(iT):
        if t==0:
            mH[0,t]=np.var(mReturns[0,:])
            mH[1,t]=np.var(mReturns[1,:])
        else:
            mH[0,t]=dOmega1 + dAlpha1 * mReturns[0,t-1]**2 + dBeta1 * mH[0,t-1]
            mH[1,t]=dOmega2 + dAlpha2 * mReturns[1,t-1]**2 + dBeta2 * mH[1,t-1]
    return mH

In [7]:
mSigma2=ComputeSigma2GARCH(parameter1_N,parameter2_N,np.array(mInSampleReturns).T)

## Copula part

Vanaf hieronder zal ik de copula LL gaan uitwerken. Eerst moet ik de PIT transformatie toepassen op de returns. Hierbij moet ik de kans berekenen dat:
$$ P(Y_t<y_t)=u_t \quad Y_t\sim \text{N}(0,\sigma_t^2) $$
Waarbij $Y_t$ een stochast is en $y_t$ de realisatie. We gaan een ingebouwde functie (st.norm.cdf) gebruiken om deze kans te berekenen. 
<break>
De log-pdf op een bepaald tijdstip ziet er als volgt uit:
$$-\frac{1}{2}\log\left(\left|R\right|\right)-\frac{1}{2}x_t'\left(R^{-1}-I\right)x_t$$

- x is gedefinieerd als:
$$x_t=\begin{pmatrix} \Phi^{-1}(u_{1t})\\ \Phi^{-1}(u_{2t})\end{pmatrix}$$
$\Phi^{-1}(u_{it})$ wordt verkregen met de functie st.norm.ppf(), waarbij $u_{it}$ de PIT is van een tijdreeks

- $R$ is als volgt gedefinieerd:

$$R=\begin{pmatrix} 1 & \rho \\ \rho & 1 \end{pmatrix}$$
<break>

- de log-likelihood ziet er dan als volgt uit:
$$\sum_{t=1}^T-\frac{1}{2}\log\left(\left|R\right|\right)-\frac{1}{2}x_t'\left(R^{-1}-I\right)x_t$$
waarbij T het totaal aantal in-sample returns zijn

- de parameter die log-likelihood wordt geoptimaliseerd wrt $\rho$

In [17]:
def Parameters_Copula_Normal(mReturns,mH):
    dRho12 = 0.2
    vTheta = np.array([dRho12])
    
    def LL_Copula_Normal(vTheta,mReturns,mH):
        dRho12 = vTheta[0]
        iT = mReturns.shape[1]
        iDimension = len(mReturns)
        mUt = st.norm.cdf(mReturns, loc=np.zeros(mReturns.shape), scale=np.sqrt(mH))
        mR = np.ones((iDimension,iDimension))
        mR[1,0] = dRho12
        mR[0,1] = dRho12
        mI=np.identity(iDimension)
        
        dSum=0
        
        for t in range(1,iT):
            vU_t = mUt[:,t]
            vU_t = vU_t.reshape((iDimension,1))
            vX=st.norm.ppf(vU_t)
            dLogLikelihood = -0.5 * np.log(det(mR)) - 0.5 * vX.T @ (inv(mR)-mI) @ vX
            dSum += np.asscalar(dLogLikelihood) 
        return -dSum
    
    def Optimizer(returns, mH, initials, function, bnds):
        result = minimize(function, initials, args=(returns,mH), \
                          options ={'eps':1e-09, 'disp': True, 'maxiter':200}, method='SLSQP',bounds=bnds)
        return result
    bounds = ((-0.9999999, 0.9999999),)
    result=Optimizer(mReturns, mH, vTheta, LL_Copula_Normal, bounds)
    return result.x, -result.fun, result.success


In [18]:
(dRho,LL_Copula_N,success_Copula_N)=Parameters_Copula_Normal(np.array(mInSampleReturns).T,mSigma2)
print(dRho)
print(LL_Copula_N)
print(success_Copula_N)

Optimization terminated successfully.    (Exit mode 0)
            Current function value: -1489.8786033474553
            Iterations: 21
            Function evaluations: 71
            Gradient evaluations: 19
[ 0.92608295]
1489.8786033474553
True


In [19]:
def AIC(vTheta1, vTheta2, vTheta3, dLL1, dLL2, dLL3):
    iK=len(vTheta1)+len(vTheta2)+len(vTheta3)
    dLL=dLL1+dLL2+dLL3
    dAIC= 2*iK-2*dLL
    print(dAIC)

In [20]:
AIC(parameter1_N,parameter2_N,dRho, likelihood1_N, likelihood2_N , LL_Copula_N)

6543.813628849384


In [21]:
def LL_GARCH_Normal(vTheta,returns):
    dOmega = vTheta[0]
    dAlpha = vTheta[1]
    dBeta  = vTheta[2]
    iT=len(returns)
    vH=np.zeros(iT)

    for t in range(iT):
        if t == 0:
            vH[t] = np.var(returns) 
        else:
            vH[t] = dOmega + dAlpha*returns[t-1]**2 + dBeta * vH[t-1]    

    vLogPdf = -0.5 * np.log( 2 * np.pi * vH[1:] ) - 0.5 * ( returns[1:]**2 / vH[1:] )
    return np.sum(vLogPdf)

In [22]:
    def LL_Copula_Normal(vTheta,mReturns,mH):
        dRho12 = vTheta[0]
        iT = mReturns.shape[1]
        iDimension = len(mReturns)
        mEpsilon = mReturns/np.sqrt(mH)
        mR = np.ones((iDimension,iDimension))
        mR[1,0] = dRho12
        mR[0,1] = dRho12
        mI=np.identity(iDimension)
        
        dSum=0
        
        for t in range(0,iT):
            vEpsilon_t = mEpsilon[:,t]
            vEpsilon_t = vEpsilon_t.reshape((iDimension,1))
            vProbability=st.norm.cdf(vEpsilon_t)
            vQuantiles=st.norm.ppf(vProbability)
            dLogLikelihood = -0.5 * np.log(det(mR)) - 0.5 * vQuantiles.T @ (inv(mR)-mI) @ vQuantiles
            dSum += np.asscalar(dLogLikelihood) 
        return dSum

In [23]:
def LogarithmicScore(vTheta1,vTheta2,vTheta3,mOutSampleReturns):
    dLL1 = LL_GARCH_Normal(vTheta1,np.array(mOutSampleReturns.iloc[:,0]))
    dLL2 = LL_GARCH_Normal(vTheta2,np.array(mOutSampleReturns.iloc[:,1]))
    mVariance = ComputeSigma2GARCH(vTheta1,vTheta2,np.array(mOutSampleReturns).T)
    dLL3 = LL_Copula_Normal(vTheta3,np.array(mOutSampleReturns).T,mVariance)
    dLogScore = dLL1+dLL2+dLL3
    
    return dLogScore
    

In [24]:
dLogScore = LogarithmicScore(parameter1_N,parameter2_N,dRho,mOutSampleReturns)

In [25]:
print(dLogScore)

-1612.09475464
