In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
file_paths = [
    './Data/BIIB.OQ.csv',
    './Data/BIO.N.csv',
    './Data/LKQ.OQ.csv',
    './Data/TXT.N.csv',
    './Data/VRSK.OQ.csv',
    './Data/MKTX.OQ.csv'
]

# Load the data
dataframes = [pd.read_csv(file) for file in file_paths]


In [3]:
processed_dfs = []
for df in dataframes:
    # Convert 'Date' to datetime
    df['Date'] = pd.to_datetime(df['Date'])

    # Set the date and ticker symbol as a multi-index
    df.set_index(['Date', 'Instrument'], inplace=True)

    # Keep only the '1 Month Total Return' column
    df = df[['1 Month Total Return']]

    processed_dfs.append(df)

In [4]:
# Merge all dataframes on the multi-index
merged_data = pd.concat(processed_dfs, axis=0)

Estimate historical mean and variance covariance matrix of the stocks in your dataset. Estimate the variance covariance matrix with a Shrinkage toward Constant Correlation Approach (consider k=0.2). Estimate the vector of means with an exponential average (lambda=0.005). Comment on the correlation observed in your dataset.

In [5]:
# Define lambda parameter for exponential average
lambda_param = 0.005

# Calculate the exponential average of returns
exp_avg_returns = merged_data.groupby(level='Instrument').apply(lambda x: x.ewm(alpha=lambda_param).mean())
exp_avg_returns

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1 Month Total Return
Instrument,Date,Instrument,Unnamed: 3_level_1
BIIB.OQ,2004-01-30 00:00:00+00:00,BIIB.OQ,16.340402
BIIB.OQ,2004-02-27 00:00:00+00:00,BIIB.OQ,21.744004
BIIB.OQ,2004-03-31 00:00:00+00:00,BIIB.OQ,14.550265
BIIB.OQ,2004-04-30 00:00:00+00:00,BIIB.OQ,12.220466
BIIB.OQ,2004-05-28 00:00:00+00:00,BIIB.OQ,10.830338
...,...,...,...
VRSK.OQ,2023-07-31 00:00:00+00:00,VRSK.OQ,1.503041
VRSK.OQ,2023-08-31 00:00:00+00:00,VRSK.OQ,1.540937
VRSK.OQ,2023-09-29 00:00:00+00:00,VRSK.OQ,1.508839
VRSK.OQ,2023-10-31 00:00:00+00:00,VRSK.OQ,1.462740


In [7]:
# Aggregate data to ensure unique entries for each stock per date
aggregated_data = merged_data.groupby(level=['Date', 'Instrument']).mean()

# Unstack the data after aggregation
unstacked_data = aggregated_data.unstack(level='Instrument')

# Calculate the historical variance-covariance matrix
var_cov_matrix = unstacked_data.cov()
var_cov_matrix

Unnamed: 0_level_0,Unnamed: 1_level_0,1 Month Total Return,1 Month Total Return,1 Month Total Return,1 Month Total Return,1 Month Total Return,1 Month Total Return
Unnamed: 0_level_1,Instrument,BIIB.OQ,BIO.N,LKQ.OQ,MKTX.OQ,TXT.N,VRSK.OQ
Unnamed: 0_level_2,Instrument,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
1 Month Total Return,BIIB.OQ,98.491028,14.066107,13.585296,17.417318,17.396215,-0.871177
1 Month Total Return,BIO.N,14.066107,62.431415,21.535464,29.516425,33.41023,12.890438
1 Month Total Return,LKQ.OQ,13.585296,21.535464,74.616264,22.850067,38.492269,12.322917
1 Month Total Return,MKTX.OQ,17.417318,29.516425,22.850067,111.27821,38.155277,9.155613
1 Month Total Return,TXT.N,17.396215,33.41023,38.492269,38.155277,147.855852,13.887012
1 Month Total Return,VRSK.OQ,-0.871177,12.890438,12.322917,9.155613,13.887012,32.816518


In [12]:
# Define the shrinkage parameter
k = 0.2

# Calculate the simple average of the variances
mean_variance = np.mean(np.diag(var_cov_matrix))

# Create a shrinkage target matrix (constant correlation matrix)
n_assets = var_cov_matrix.shape[0]
target = np.full_like(var_cov_matrix, mean_variance * (1 - k))
np.fill_diagonal(target, np.diag(var_cov_matrix))

# Apply shrinkage
shrunk_cov_matrix = k * target + (1 - k) * var_cov_matrix
shrunk_cov_matrix

Unnamed: 0_level_0,Unnamed: 1_level_0,1 Month Total Return,1 Month Total Return,1 Month Total Return,1 Month Total Return,1 Month Total Return,1 Month Total Return
Unnamed: 0_level_1,Instrument,BIIB.OQ,BIO.N,LKQ.OQ,MKTX.OQ,TXT.N,VRSK.OQ
Unnamed: 0_level_2,Instrument,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
1 Month Total Return,BIIB.OQ,98.491028,25.319267,24.934618,28.000236,27.983353,13.36944
1 Month Total Return,BIO.N,25.319267,62.431415,31.294752,37.679521,40.794565,24.378732
1 Month Total Return,LKQ.OQ,24.934618,31.294752,74.616264,32.346434,44.860196,23.924715
1 Month Total Return,MKTX.OQ,28.000236,37.679521,32.346434,111.27821,44.590603,21.390871
1 Month Total Return,TXT.N,27.983353,40.794565,44.860196,44.590603,147.855852,25.175991
1 Month Total Return,VRSK.OQ,13.36944,24.378732,23.924715,21.390871,25.175991,32.816518


In [13]:
# Calculate the correlation matrix
corr_matrix = unstacked_data.corr()
corr_matrix

Unnamed: 0_level_0,Unnamed: 1_level_0,1 Month Total Return,1 Month Total Return,1 Month Total Return,1 Month Total Return,1 Month Total Return,1 Month Total Return
Unnamed: 0_level_1,Instrument,BIIB.OQ,BIO.N,LKQ.OQ,MKTX.OQ,TXT.N,VRSK.OQ
Unnamed: 0_level_2,Instrument,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
1 Month Total Return,BIIB.OQ,1.0,0.17938,0.158473,0.166496,0.144157,-0.015779
1 Month Total Return,BIO.N,0.17938,1.0,0.315527,0.350803,0.347743,0.286779
1 Month Total Return,LKQ.OQ,0.158473,0.315527,1.0,0.247446,0.366469,0.271829
1 Month Total Return,MKTX.OQ,0.166496,0.350803,0.247446,1.0,0.291964,0.176924
1 Month Total Return,TXT.N,0.144157,0.347743,0.366469,0.291964,1.0,0.263755
1 Month Total Return,VRSK.OQ,-0.015779,0.286779,0.271829,0.176924,0.263755,1.0
