In [1]:
import numpy as np

def simulate_pca(a, nsim, nval=None):
    # Eigenvalue decomposition
    vals, vecs = np.linalg.eigh(a)

    # sort values in descending order and corresponding vectors
    sorted_index = np.argsort(vals)[::-1]
    vals = vals[sorted_index]
    vecs = vecs[:, sorted_index]

    tv = sum(vals)

    posv = np.where(vals >= 1e-8)[0]
    if nval is not None:
        if nval < posv.shape[0]:
            posv = posv[:nval]
       
    vals = vals[posv]
    vecs = vecs[:, posv]

    print(f"Simulating with {posv.shape[0]} PC Factors: {sum(vals) / tv * 100}% total variance explained")
    B = vecs * np.diag(np.sqrt(vals))

    m = vals.shape[0]
    r = np.random.randn(m, nsim)

    return (B @ r).T




def multivariate_normal_simulation_pca(mean, cov, n_samples, explained_variance=None):
    # Perform PCA on the covariance matrix
    eigenvalues, eigenvectors = np.linalg.eigh(cov)

    # Sort the eigenvectors in decreasing order of eigenvalues
    idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]

    # Determine the number of components to include based on the explained_variance
    if explained_variance is not None:
        explained_variance = explained_variance / 100
        cum_variance = np.cumsum(eigenvalues) / np.sum(eigenvalues)
        num_components = np.argmax(cum_variance >= explained_variance) + 1
    else:
        num_components = len(eigenvalues)

    # Use the top num_components eigenvectors to transform the standard normal samples
    samples = np.random.normal(size=(num_components, n_samples))
    samples = np.dot(eigenvectors[:, :num_components], np.sqrt(eigenvalues[:num_components])


SyntaxError: unexpected EOF while parsing (1742035362.py, line 50)

In [2]:

import numpy as np
import pandas as pd

def generate_correlation_matrix_variance_vector(data):

    # Calculate the covariance matrix
    cov_matrix = np.cov(data, rowvar=False)

    # Calculate the standard deviation vector
    std_vector = np.sqrt(np.diag(cov_matrix))

    # Calculate the correlation matrix
    corr_matrix = np.zeros_like(cov_matrix)
    for i in range(cov_matrix.shape[0]):
        for j in range(cov_matrix.shape[1]):
            corr_matrix[i, j] = cov_matrix[i, j] / (std_vector[i] * std_vector[j])

    return corr_matrix, np.diag(cov_matrix)


def generate_ew_correlation_matrix_variance_vector(data, lambda_value=0.97):
    # Calculate the weighted covariance matrix
    weight = lambda_value ** np.arange(data.shape[0])
    cov_matrix = np.cov(data, rowvar=False, aweights=weight)

    # Calculate the standard deviation vector
    std_vector = np.sqrt(np.diag(cov_matrix))

    # Calculate the weighted correlation matrix
    corr_matrix = np.zeros_like(cov_matrix)
    for i in range(cov_matrix.shape[0]):
        for j in range(cov_matrix.shape[1]):
            corr_matrix[i, j] = cov_matrix[i, j] / (std_vector[i] * std_vector[j])

    return corr_matrix, np.diag(cov_matrix)


In [3]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

def simulate_from_cov_matrix(cov_matrix, num_draws):
    
    # Calculate the eigenvalues and eigenvectors of the covariance matrix
    eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

    # Create a diagonal matrix from the eigenvalues
    eigenvalue_matrix = np.diag(np.sqrt(eigenvalues))

    # Simulate data from the covariance matrix
    return np.dot(np.dot(eigenvectors, eigenvalue_matrix), np.random.normal(size=(cov_matrix.shape[0], num_draws)))

def simulate_from_pca(data, num_draws, explained_variance=1.0):
    # Fit the PCA model to the data
    pca = PCA(n_components=data.shape[0], explained_variance=explained_variance)
    pca.fit(data)

    # Simulate data from the PCA model
    return pca.inverse_transform(np.random.normal(size=(num_draws, data.shape[0])))

def simulate_cov_matrix_direct_pca(cov_matrix, num_draws):
    # Directly simulate data from the covariance matrix
    direct = simulate_from_cov_matrix(cov_matrix, num_draws)

    # Simulate data from the PCA model with 100% explained variance
    pca_100 = simulate_from_pca(direct, num_draws, 1.0)

    # Simulate data from the PCA model with 75% explained variance
    pca_75 = simulate_from_pca(direct, num_draws, 0.75)

    #


In [5]:
df = pd.read_csv('DailyReturn.csv', sep=',')
new_data = data.drop(columns=['Unnamed: 0', 'SPY'])
new_data = new_data.iloc[:, 0:10]
means = new_data.mean().values

norm = (new_data - new_data.mean()).to_numpy()
cov = norm.T @ norm

alpha = 0.97
weights = (1-alpha)**np.arange(len(new_data))[::-1]
norm_new_data = (new_data - new_data.mean()).fillna(0).to_numpy()
EW_cov = ((weights * norm_new_data.T)@norm_new_data)/(weights.sum())

scaler = StandardScaler()
pca = PCA()
new_data_scale = scaler.fit_transform(new_data)
new_data_pca = pca.fit(new_data_scale)

NameError: name 'data' is not defined