In [21]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from scipy.linalg import cholesky
from time import time
file = 'DailyReturn.csv'
data = pd.read_csv(file)
returns_data = data.values

In [23]:
import time
# Step 1: Generate Pearson and Exponentially Weighted Covariance Matrices
def pearson_correlation(returns):
    return np.corrcoef(returns.T)

def exponentially_weighted_covariance_matrix(returns, lambd=0.97):
    T, N = returns.shape
    cov_matrix = np.zeros((N, N))
    weights = np.array([(1 - lambd) * lambd**(T - t - 1) for t in range(T)])
    weights /= weights.sum()  # Normalize the weights
    mean_returns = np.average(returns, axis=0, weights=weights)
    
    for t in range(T):
        diff = (returns[t, :] - mean_returns).reshape(-1, 1)
        cov_matrix += weights[t] * (diff @ diff.T)
    
    return cov_matrix

# Step 2: PCA Simulation
def pca_simulation(cov_matrix, mean_returns, explained_variance=None, num_simulations=25000):
    if explained_variance is not None:
        pca = PCA()
        pca.fit(cov_matrix)
        cumulative_variance = np.cumsum(pca.explained_variance_ratio_)
        components_to_keep = np.searchsorted(cumulative_variance, explained_variance) + 1
        eigvals = pca.explained_variance_[:components_to_keep]
        eigvecs = pca.components_[:components_to_keep]
        L = eigvecs.T @ np.diag(np.sqrt(eigvals))
    else:
        L = cholesky(cov_matrix, lower=True)
    
    Z = np.random.normal(size=(L.shape[1], num_simulations))
    simulated_returns = L @ Z + mean_returns[:, np.newaxis]
    
    return simulated_returns

# Step 3: Frobenius Norm calculation
def frobenius_norm(matrix1, matrix2):
    return np.linalg.norm(matrix1 - matrix2, 'fro')

# Step 4: Run all simulations and compare
def run_simulation_and_compare(returns_data):
    pearson_corr = pearson_correlation(returns_data)
    ew_cov_matrix = exponentially_weighted_covariance_matrix(returns_data)
    
    pearson_var = np.var(returns_data, axis=0)
    ew_var = np.diag(ew_cov_matrix)
    
    cov_matrices = {
        'Pearson Corr + Standard Var': np.diag(pearson_var) @ pearson_corr @ np.diag(pearson_var),
        'Pearson Corr + EW Var': np.diag(ew_var) @ pearson_corr @ np.diag(ew_var),
        'EW Corr + Standard Var': np.diag(pearson_var) @ ew_cov_matrix @ np.diag(pearson_var),
        'EW Corr + EW Var': ew_cov_matrix
    }
    
    flattened_results = []
    for name, cov_matrix in cov_matrices.items():
        mean_returns = np.mean(returns_data, axis=0)
        
        start_time = time.time()
        direct_sim = pca_simulation(cov_matrix, mean_returns)
        direct_time = time.time() - start_time
        direct_sim_cov = np.cov(direct_sim)
        direct_frobenius = frobenius_norm(cov_matrix, direct_sim_cov)
        
        flattened_results.append({
            'Matrix Type': name,
            'Method': 'Direct',
            'Time (s)': direct_time,
            'Frobenius Norm': direct_frobenius
        })
        
        pca_variances = [1.0, 0.75, 0.5]
        for var_explained in pca_variances:
            start_time = time.time()
            pca_sim = pca_simulation(cov_matrix, mean_returns, explained_variance=var_explained)
            pca_time = time.time() - start_time
            pca_sim_cov = np.cov(pca_sim)
            pca_frobenius = frobenius_norm(cov_matrix, pca_sim_cov)
            
            flattened_results.append({
                'Matrix Type': name,
                'Method': f'PCA {int(var_explained * 100)}%',
                'Time (s)': pca_time,
                'Frobenius Norm': pca_frobenius
            })
    
    return pd.DataFrame(flattened_results)

simulation_results_df = run_simulation_and_compare(returns_data)

print(simulation_results_df)

                    Matrix Type    Method  Time (s)  Frobenius Norm
0   Pearson Corr + Standard Var    Direct  0.117008    7.866287e-08
1   Pearson Corr + Standard Var  PCA 100%  0.118999    4.507415e-06
2   Pearson Corr + Standard Var   PCA 75%  0.018006    4.507415e-06
3   Pearson Corr + Standard Var   PCA 50%  0.026003    4.507415e-06
4         Pearson Corr + EW Var    Direct  0.099995    1.800771e-07
5         Pearson Corr + EW Var  PCA 100%  0.106998    1.132394e-05
6         Pearson Corr + EW Var   PCA 75%  0.024995    1.132394e-05
7         Pearson Corr + EW Var   PCA 50%  0.020982    1.132394e-05
8        EW Corr + Standard Var    Direct  0.085996    6.882756e-11
9        EW Corr + Standard Var  PCA 100%  0.113998    6.365738e-09
10       EW Corr + Standard Var   PCA 75%  0.025997    6.365738e-09
11       EW Corr + Standard Var   PCA 50%  0.021002    6.365738e-09
12             EW Corr + EW Var    Direct  0.114001    3.073945e-04
13             EW Corr + EW Var  PCA 100%  0.118