# Transformation
Here we consider the computational efficiency of using the new parameterization of correlation matrices

In [3]:
import time
import numpy as np
import pandas as pd
from scipy.linalg import logm, expm

In [4]:
df = pd.read_csv("/Users/luuk/Documenten/School/Master/Research Assistantship/Data/FinalData5.csv")
df = df[~((df["DATE"] == "2020-11-27") & (df["TIME"] == "13:05:00"))]          #Some stocks contain an observation at 13:05 on 2022-11-27, while other stock do not. These observations are removed

data = df[["SYMBOL", "PRICE", "DATE"]]          #Select the relevant information from the dataset
data

Unnamed: 0,SYMBOL,PRICE,DATE
0,MMM,177.68,2020-01-02
1,MMM,177.54,2020-01-02
2,MMM,178.43,2020-01-02
3,MMM,179.19,2020-01-02
4,MMM,178.85,2020-01-02
...,...,...,...
1787248,DIS,85.96,2022-12-30
1787249,DIS,86.16,2022-12-30
1787250,DIS,86.41,2022-12-30
1787251,DIS,86.75,2022-12-30


In [5]:
dates = np.unique(data["DATE"])
stocks = np.unique(data["SYMBOL"])

lmRCorr = []         #Initialize a list for the realized correlation matrices for all dates
for date in dates:
    nobsDate = len(data[(data["SYMBOL"] == "AAPL") & (data["DATE"] == date)])

    mRCovDate = np.zeros((len(stocks), len(stocks)))           #Initialize an empty realized covariance matrix, which will be used to compute the realized correlation matrix
    mRAllStocksOnDate = np.zeros((nobsDate - 1, len(stocks)))           #Initialize a matrix of returns for all the stocks on the date at hand

    s = 0
    for stock in stocks:
        dfStockOnDate = data[(data["SYMBOL"] == stock) & (data["DATE"] == date)]
        vRStocksOnDate = np.diff(dfStockOnDate["PRICE"]) / dfStockOnDate["PRICE"][:-1]
        mRAllStocksOnDate[:, s] = vRStocksOnDate.reset_index(drop=True)         #Store the return vector in the column of the corresponding stock
        s += 1

    for j in range(nobsDate - 1):
        mRCovDate += np.outer(mRAllStocksOnDate[j, :].T, mRAllStocksOnDate[j, :])

    diagonalElements = np.diag(mRCovDate)
    mRVarDateNegSqrt = np.diag(1 / np.sqrt(diagonalElements))
    mRCorrDate = mRVarDateNegSqrt @ mRCovDate @ mRVarDateNegSqrt         #Compute the realized correlation matrix from the realized covariance matrix

    lmRCorr.append(mRCorrDate)
    print(date)         #Print the date to indicate how fast the process is going

2020-01-02
2020-01-03
2020-01-06
2020-01-07
2020-01-08
2020-01-09
2020-01-10
2020-01-13
2020-01-14
2020-01-15
2020-01-16
2020-01-17
2020-01-21
2020-01-22
2020-01-23
2020-01-24
2020-01-27
2020-01-28
2020-01-29
2020-01-30
2020-01-31
2020-02-03
2020-02-04
2020-02-05
2020-02-06
2020-02-07
2020-02-10
2020-02-11
2020-02-12
2020-02-13
2020-02-14
2020-02-18
2020-02-19
2020-02-20
2020-02-21
2020-02-24
2020-02-25
2020-02-26
2020-02-27
2020-02-28
2020-03-02
2020-03-03
2020-03-04
2020-03-05
2020-03-06
2020-03-09
2020-03-10
2020-03-11
2020-03-12
2020-03-13
2020-03-16
2020-03-17
2020-03-18
2020-03-19
2020-03-20
2020-03-23
2020-03-24
2020-03-25
2020-03-26
2020-03-27
2020-03-30
2020-03-31
2020-04-01
2020-04-02
2020-04-03
2020-04-06
2020-04-07
2020-04-08
2020-04-09
2020-04-13
2020-04-14
2020-04-15
2020-04-16
2020-04-17
2020-04-20
2020-04-21
2020-04-22
2020-04-23
2020-04-24
2020-04-27
2020-04-28
2020-04-29
2020-04-30
2020-05-01
2020-05-04
2020-05-05
2020-05-06
2020-05-07
2020-05-08
2020-05-11
2020-05-12

In [6]:
lmRCorr[100]

array([[ 1.00000000e+00,  1.03837762e-01,  2.38038526e-01,
         4.33484395e-01,  5.82801307e-01,  4.12172285e-01,
         6.09202284e-01,  2.59893633e-01,  5.65063547e-01,
         4.18814933e-01,  4.84528860e-01,  4.16969521e-01,
         4.67519010e-01,  3.06034358e-01,  6.19356895e-01,
        -1.33150820e-01,  3.77134172e-01,  1.71209109e-01,
         2.58168284e-02,  1.72907384e-01, -9.55666418e-02,
         6.63658101e-01,  5.22308031e-01, -1.05301816e-01,
         1.30805910e-01,  1.74284175e-01,  3.62430411e-01,
         4.74403845e-02,  3.21083521e-01,  2.84744045e-01],
       [ 1.03837762e-01,  1.00000000e+00,  2.80972296e-01,
         9.97655086e-02, -1.74210129e-01,  4.61373288e-01,
         4.17535504e-01,  2.78516706e-01,  2.78391293e-01,
        -2.82913248e-01,  1.39583460e-01,  5.23585923e-01,
        -2.08576567e-01, -5.98352724e-02,  1.96047598e-01,
         5.00036962e-01,  1.13964879e-01,  1.71750076e-01,
         5.09129556e-01,  8.90254835e-02,  3.83585269e-

In [7]:
stocks

array(['AAPL', 'AMGN', 'AXP', 'BA', 'CAT', 'CRM', 'CSCO', 'CVX', 'DIS',
       'DOW', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM', 'KO', 'MCD',
       'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'V', 'VZ', 'WBA',
       'WMT'], dtype=object)

In [8]:
startTime = time.time()

lgammamRCorr = []
for i in range(len(lmRCorr)):
    logmCorr = logm(lmRCorr[i])           #Take the matrix logarithm
    vLowerDiagonal = logmCorr[np.tril_indices(logmCorr.shape[0], k=-1)]           #Take the lower diagonal entries
    lgammamRCorr.append(vLowerDiagonal)

timeSpent = time.time() - startTime
timeSpent

1.1392271518707275

In [9]:
lgammamRCorr[100]          #Length should be (30 x 29) / 2 = 435

array([-1.07868917e-02, -1.54394711e-01,  2.43186746e-01,  2.05251204e-01,
        3.56393369e-03,  2.33965092e-01,  3.71466056e-01, -1.96007879e-01,
        3.13942479e-01,  2.74159806e-01,  3.54675204e-01,  1.67943480e-01,
        2.15007159e-01,  1.37271161e-01, -8.16745112e-02,  3.55960361e-01,
        2.64288741e-01,  1.71381599e-02,  3.31831523e-01,  2.55928309e-01,
        2.74633922e-01, -1.03737234e-01,  3.64156947e-02,  2.72250102e-01,
        4.26110350e-01, -1.37399693e-01,  2.10714610e-02,  1.63312473e-01,
        2.46722216e-01,  1.12762007e-01,  1.62395664e-01,  1.08187116e-01,
        2.12115918e-01,  2.04379786e-01,  3.44649233e-01,  3.19993993e-01,
        1.20133392e-01, -3.00209854e-01,  6.19056596e-02,  4.87307841e-01,
        5.63920687e-01, -2.15734900e-01, -1.17413494e-01,  2.34227113e-01,
        2.05810239e-01,  2.80897498e-01, -1.92338353e-01,  1.06622406e-01,
        6.33197609e-01,  3.96212149e-01,  4.61770129e-01, -9.40646155e-02,
        6.59748818e-02,  

In [10]:
def AlgorithmDiagonal(A, nStocks):
    x = np.random.normal(0, 1, size=nStocks)        #Randomly initialize the diagonal of the correlation matrix
    np.fill_diagonal(A, x)

    eps = 100
    while(eps > 10e-15):
        xnew = x - np.log(np.diag(expm(A)))         #Update the diagonal using Corollary 1 from Archakov and Hansen (2021)

        eps = abs(sum(xnew - x))
        x = xnew
        np.fill_diagonal(A, x)

    return x

In [11]:
startTime2 = time.time()

nStocks = len(stocks)

lmRCorrMappedBack = []
for i in range(len(lgammamRCorr)):
    A = np.zeros((nStocks, nStocks))        #Initialize A
    A[np.tril_indices(nStocks, k=-1)] = lgammamRCorr[i]     #Plug the entries of the new parameterization into the upper diagonal of A
    A = A + A.T     #Plug the entries of the new parameterization into the lower diagonal of A

    x = AlgorithmDiagonal(A, nStocks)       #Retrieve the entries on the diagonal
    np.fill_diagonal(A, x)

    lmRCorrMappedBack.append(expm(A))       #Take the exponent of A

timeSpent2 = time.time() - startTime2
timeSpent2

1.3623087406158447

In [12]:
error = 0
for i in range(len(lmRCorrMappedBack)):
    error += np.sum(np.abs(lmRCorrMappedBack[i] - lmRCorr[i]))      #Compute the sum of absolute errors for each observation for each realized correlation matrix

error

0.000462140339876366

In [13]:
lmRCorrMappedBack[100]

array([[ 1.00000000e+00,  1.03837763e-01,  2.38038526e-01,
         4.33484396e-01,  5.82801307e-01,  4.12172287e-01,
         6.09202284e-01,  2.59893633e-01,  5.65063548e-01,
         4.18814933e-01,  4.84528859e-01,  4.16969522e-01,
         4.67519010e-01,  3.06034358e-01,  6.19356896e-01,
        -1.33150820e-01,  3.77134173e-01,  1.71209109e-01,
         2.58168283e-02,  1.72907384e-01, -9.55666421e-02,
         6.63658100e-01,  5.22308032e-01, -1.05301816e-01,
         1.30805910e-01,  1.74284175e-01,  3.62430410e-01,
         4.74403844e-02,  3.21083522e-01,  2.84744046e-01],
       [ 1.03837763e-01,  1.00000000e+00,  2.80972296e-01,
         9.97655089e-02, -1.74210129e-01,  4.61373289e-01,
         4.17535504e-01,  2.78516706e-01,  2.78391293e-01,
        -2.82913248e-01,  1.39583460e-01,  5.23585925e-01,
        -2.08576567e-01, -5.98352724e-02,  1.96047598e-01,
         5.00036962e-01,  1.13964879e-01,  1.71750076e-01,
         5.09129556e-01,  8.90254836e-02,  3.83585269e-