In [1]:
import pandas as pd
import numpy as np
from scipy import stats

## Test 5.1

In [2]:
def ew_cov(mat, lam=0.97):
    m, n = mat.shape
    # get the weight
    w = np.array([(1 - lam) * (lam ** (m - 1 - i)) for i in range(m)])
    w = w / sum(w)
    # get diag(w)
    W = np.diag(w)
    # calculate mu with mu = Rw
    mu = np.dot(mat.T, w)
    # calculate ew cov with EW = RWR^T - mumu^T
    ew_cov = mat.T @ W @ mat - np.outer(mu, mu)
    return ew_cov

In [29]:
df_ab = pd.read_csv("quiz2.csv")
df_ab.set_index("Date", inplace=True)
df_ab

Unnamed: 0_level_0,A,B
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-05-15,100.000000,200.000000
2025-05-16,100.991133,201.275067
2025-05-17,100.340799,196.508266
2025-05-18,104.793398,204.723773
2025-05-19,105.657050,203.539489
...,...,...
2025-08-18,91.177604,179.033530
2025-08-19,91.396797,175.914463
2025-08-20,91.224166,171.807953
2025-08-21,92.166683,170.885450


In [30]:
ret_mat = df_ab[['A', 'B']].pct_change().dropna().values
ew_cov_ab = ew_cov(ret_mat, lam=0.94)
ew_cov_ab

array([[1.85565163e-04, 9.78667339e-05],
       [9.78667339e-05, 2.22637107e-04]])

In [5]:
# Normal Simulation PD Input 0 mean - 100,000 simulations, compare input vs output covariance
df = pd.read_csv("test5_1.csv")
df

Unnamed: 0,x1,x2,x3,x4,x5
0,0.084979,0.087586,0.042304,0.008984,0.003876
1,0.087586,0.160485,0.058136,0.012345,0.005326
2,0.042304,0.058136,0.03744,0.005963,0.002573
3,0.008984,0.012345,0.005963,0.001688,0.000546
4,0.003876,0.005326,0.002573,0.000546,0.000314


In [6]:
seed = 42
np.random.seed(seed)
def monte_carlo_normal_var(mat, N = 100000):
    L = np.linalg.cholesky(mat)
    k = mat.shape[0]
    Z = np.random.normal(0, 1, (N, k))
    R = Z @ L.T
    return np.cov(R, rowvar=False)
output_cov = monte_carlo_normal_var(df.values)
output_cov

array([[0.08529697, 0.08758956, 0.04248895, 0.00900416, 0.00389711],
       [0.08758956, 0.1600576 , 0.05799045, 0.0123159 , 0.0053234 ],
       [0.04248895, 0.05799045, 0.03751011, 0.00595618, 0.00258059],
       [0.00900416, 0.0123159 , 0.00595618, 0.00168902, 0.00054694],
       [0.00389711, 0.0053234 , 0.00258059, 0.00054694, 0.00031474]])

In [7]:
ans = pd.read_csv("testout_5.1.csv")
ans

Unnamed: 0,x1,x2,x3,x4,x5
0,0.085347,0.08772,0.042334,0.00902,0.003888
1,0.08772,0.160529,0.058067,0.012351,0.005324
2,0.042334,0.058067,0.037468,0.005974,0.002574
3,0.00902,0.012351,0.005974,0.001692,0.000549
4,0.003888,0.005324,0.002574,0.000549,0.000315


## 5.2

In [8]:
df = pd.read_csv("test5_2.csv")
df

Unnamed: 0,x1,x2,x3,x4,x5
0,0.084979,0.116781,0.042304,0.008984,0.003876
1,0.116781,0.160485,0.058136,0.012345,0.005326
2,0.042304,0.058136,0.03744,0.005963,0.002573
3,0.008984,0.012345,0.005963,0.001688,0.000546
4,0.003876,0.005326,0.002573,0.000546,0.000314


In [9]:
# Normal Simulation PSD Input 0 mean - 100,000 simulations, compare input vs output covariance
seed = 42
np.random.seed(seed)
def monte_carlo_normal_var_psd(mat, N = 100000):
    eigvals, eigvecs = np.linalg.eigh(mat)
    eigvals[eigvals < 0] = 0
    L = eigvecs @ np.diag(np.sqrt(eigvals))
    k = mat.shape[0]
    Z = np.random.normal(0, 1, (N, k))
    R = Z @ L.T
    return np.cov(R, rowvar=False)
output_cov = monte_carlo_normal_var_psd(df.values)
output_cov

array([[0.08456043, 0.1162058 , 0.04212658, 0.00894799, 0.00386328],
       [0.1162058 , 0.15969394, 0.05789177, 0.01229664, 0.00530905],
       [0.04212658, 0.05789177, 0.03744169, 0.00596924, 0.00257242],
       [0.00894799, 0.01229664, 0.00596924, 0.00168987, 0.00054781],
       [0.00386328, 0.00530905, 0.00257242, 0.00054781, 0.00031453]])

In [10]:
ans = pd.read_csv("testout_5.2.csv")
ans

Unnamed: 0,x1,x2,x3,x4,x5
0,0.085347,0.117287,0.042374,0.009028,0.003892
1,0.117287,0.16118,0.058232,0.012406,0.005349
2,0.042374,0.058232,0.037524,0.005989,0.002581
3,0.009028,0.012406,0.005989,0.001695,0.00055
4,0.003892,0.005349,0.002581,0.00055,0.000316


## 5.3

In [11]:
df = pd.read_csv("test5_3.csv")
df

Unnamed: 0,x1,x2,x3,x4,x5
0,0.084979,0.0,0.042304,0.008984,0.003876
1,0.0,0.160485,0.058136,0.012345,0.005326
2,0.042304,0.058136,0.03744,0.005963,0.002573
3,0.008984,0.012345,0.005963,0.001688,0.000546
4,0.003876,0.005326,0.002573,0.000546,0.000314


In [12]:
# Normal Simulation nonPSD Input, 0 mean, near_psd fix - 100,000 simulations, compare input vs output covariance
def near_psd_cov(A, epsilon = 0):
    # get correlation matrix
    d = np.sqrt(np.diag(A))
    corr = A / np.outer(d, d)
    eig_vals, eig_vecs = np.linalg.eig(corr)
    eig_vals[eig_vals < epsilon] = epsilon
    corr_psd = (eig_vecs @ np.diag(eig_vals) @ eig_vecs.T)
    corr_psd = corr_psd / np.outer(np.sqrt(np.diag(corr_psd)), np.sqrt(np.diag(corr_psd)))
    near_A = np.outer(d, d) * corr_psd
    return near_A

seed = 42
np.random.seed(seed)
def monte_carlo_normal_var_near_psd(mat, N = 100000):
    mat_psd = near_psd_cov(mat)
    return monte_carlo_normal_var_psd(mat_psd, N)
output_cov = monte_carlo_normal_var_near_psd(df.values)
output_cov

array([[0.08524667, 0.00825875, 0.0377202 , 0.00803968, 0.00346664],
       [0.00825875, 0.15992577, 0.05168932, 0.01096439, 0.00472551],
       [0.0377202 , 0.05168932, 0.03729618, 0.00598983, 0.00257676],
       [0.00803968, 0.01096439, 0.00598983, 0.00168348, 0.00054886],
       [0.00346664, 0.00472551, 0.00257676, 0.00054886, 0.00031341]])

In [13]:
ans = pd.read_csv("testout_5.3.csv")
ans

Unnamed: 0,x1,x2,x3,x4,x5
0,0.085347,0.008391,0.037693,0.008108,0.003462
1,0.008391,0.160773,0.051755,0.011086,0.004753
2,0.037693,0.051755,0.037418,0.006061,0.002535
3,0.008108,0.011086,0.006061,0.00169,0.000558
4,0.003462,0.004753,0.002535,0.000558,0.000315


## 5.4

In [14]:
df = pd.read_csv("test5_3.csv")
df

Unnamed: 0,x1,x2,x3,x4,x5
0,0.084979,0.0,0.042304,0.008984,0.003876
1,0.0,0.160485,0.058136,0.012345,0.005326
2,0.042304,0.058136,0.03744,0.005963,0.002573
3,0.008984,0.012345,0.005963,0.001688,0.000546
4,0.003876,0.005326,0.002573,0.000546,0.000314


In [15]:
# Normal Simulation PSD Input, 0 mean, higham fix - 100,000 simulations, compare input vs output covariance
def higham_corr(A, max_iter=100, tol=1e-6):
    n = A.shape[0]
    # symmetric
    A = (A + A.T) / 2
    X = A.copy()
    Y = np.zeros_like(A)
    for i in range(max_iter):
        R = X - Y
        eig_vals, eig_vecs = np.linalg.eig(R)
        eig_vals[eig_vals < 0] = 0
        X_new = eig_vecs @ np.diag(eig_vals) @ eig_vecs.T
        Y = X_new - R
        X_new[np.diag_indices(n)] = 1
        if np.linalg.norm(X_new - X, ord='fro') < tol:
            break
        X = X_new
    return X
def higham_cov(A, max_iter=100, tol=1e-6):
    # get correlation matrix
    d = np.sqrt(np.diag(A))
    corr = A / np.outer(d, d)
    corr_psd = higham_corr(corr, max_iter, tol)
    near_A = np.outer(d, d) * corr_psd
    return near_A

seed = 42
np.random.seed(seed)
def monte_carlo_normal_var_higham(mat, N = 100000):
    mat_psd = higham_cov(mat)
    return monte_carlo_normal_var_psd(mat_psd, N)
output_cov = monte_carlo_normal_var_higham(df.values)
output_cov

array([[0.08519654, 0.01262892, 0.03876062, 0.00825836, 0.00356093],
       [0.01262892, 0.1599542 , 0.05314819, 0.01127356, 0.00485942],
       [0.03876062, 0.05314819, 0.03729039, 0.00619781, 0.00266747],
       [0.00825836, 0.01127356, 0.00619781, 0.00168296, 0.00056791],
       [0.00356093, 0.00485942, 0.00266747, 0.00056791, 0.00031327]])

In [16]:
ans = pd.read_csv("testout_5.4.csv")
ans

Unnamed: 0,x1,x2,x3,x4,x5
0,0.085347,0.01281,0.038889,0.008288,0.003568
1,0.01281,0.160737,0.05338,0.011333,0.004898
2,0.038889,0.05338,0.037418,0.006219,0.002681
3,0.008288,0.011333,0.006219,0.00169,0.000571
4,0.003568,0.004898,0.002681,0.000571,0.000315


## 5.5

In [17]:
df = pd.read_csv("test5_2.csv")
df

Unnamed: 0,x1,x2,x3,x4,x5
0,0.084979,0.116781,0.042304,0.008984,0.003876
1,0.116781,0.160485,0.058136,0.012345,0.005326
2,0.042304,0.058136,0.03744,0.005963,0.002573
3,0.008984,0.012345,0.005963,0.001688,0.000546
4,0.003876,0.005326,0.002573,0.000546,0.000314


In [18]:
# PCA Simulation, 99% explained, 0 mean - 100,000 simulations compare input vs output covariance
seed = 42
np.random.seed(seed)

def pca_cov(mat, var_explained=0.99):
    eigvals, eigvecs = np.linalg.eigh(mat)
    total_var = np.sum(eigvals)
    sorted_indices = np.argsort(eigvals)[::-1]
    eigvals = eigvals[sorted_indices]
    eigvecs = eigvecs[:, sorted_indices]
    cum_var = np.cumsum(eigvals)
    num_components = np.searchsorted(cum_var, var_explained * total_var) + 1
    L = eigvecs[:, :num_components] @ np.diag(np.sqrt(eigvals[:num_components]))
    return L @ L.T

def monte_carlo_pca_var(mat, N = 100000, var_explained=0.99):
    mat_pca = pca_cov(mat, var_explained)
    return monte_carlo_normal_var_psd(mat_pca, N)
output_cov = monte_carlo_pca_var(df.values)
output_cov

array([[0.08456047, 0.11620586, 0.04212642, 0.00895495, 0.00386062],
       [0.11620586, 0.15969403, 0.05789155, 0.01230619, 0.0053054 ],
       [0.04212642, 0.05789155, 0.03743916, 0.0060116 , 0.00257978],
       [0.00895495, 0.01230619, 0.0060116 , 0.00109443, 0.0004707 ],
       [0.00386062, 0.0053054 , 0.00257978, 0.0004707 , 0.00020245]])

In [19]:
ans = pd.read_csv("testout_5.5.csv")
ans

Unnamed: 0,x1,x2,x3,x4,x5
0,0.085227,0.117122,0.042212,0.009002,0.003881
1,0.117122,0.160953,0.058009,0.012371,0.005334
2,0.042212,0.058009,0.03719,0.005993,0.002572
3,0.009002,0.012371,0.005993,0.001095,0.000471
4,0.003881,0.005334,0.002572,0.000471,0.000203


## 8.1

In [49]:
df = pd.read_csv("test7_1.csv")
df

Unnamed: 0,x1
0,0.071476
1,0.042703
2,0.046470
3,0.078857
4,0.071511
...,...
95,0.025379
96,0.006113
97,-0.020492
98,0.007548


In [59]:
# Var from Normal Distribution
def var_normal(mat):
    mu = np.mean(mat)
    sigma = np.std(mat, ddof=1)
    var_absolute = mu + sigma * stats.norm.ppf(0.05)
    var_diff_from_mean = sigma * stats.norm.ppf(0.05)
    return var_absolute, var_diff_from_mean
var_absolute, var_diff_from_mean = var_normal(df.values)
abs(var_absolute), abs(var_diff_from_mean)

(0.030920416807415675, 0.07694615326028394)

In [43]:
ans = pd.read_csv("testout8_1.csv")
ans

Unnamed: 0,VaR Absolute,VaR Diff from Mean
0,0.03092,0.076946


## 8.2

In [23]:
df = pd.read_csv("test7_2.csv")
df

Unnamed: 0,x1
0,0.062695
1,-0.001343
2,0.058816
3,0.074756
4,0.014312
...,...
95,0.083073
96,0.125152
97,0.046132
98,0.036900


In [24]:
def t_var_full_fit(data, alpha=0.05):
    nu_hat, loc_hat, scale_hat = stats.t.fit(data)     
    t_quant = stats.t.ppf(alpha, nu_hat)
    q_alpha = loc_hat + scale_hat * t_quant
    var_abs  = -q_alpha
    var_diff = -(scale_hat * t_quant)              
    return var_abs, var_diff
var_absolute, var_diff_from_mean = t_var_full_fit(df.values)
abs(var_absolute), abs(var_diff_from_mean)

(0.041529702716233574, 0.08747008276358771)

In [25]:
ans = pd.read_csv("testout8_2.csv")
ans

Unnamed: 0,VaR Absolute,VaR Diff from Mean
0,0.04153,0.08747


## 8.3

In [26]:
df = pd.read_csv("test7_2.csv")
df

Unnamed: 0,x1
0,0.062695
1,-0.001343
2,0.058816
3,0.074756
4,0.014312
...,...
95,0.083073
96,0.125152
97,0.046132
98,0.036900


In [27]:
seed = 42
np.random.seed(seed)
def var_t_simulation(data, N=100000):
    nu_hat, loc_hat, scale_hat = stats.t.fit(data)
    k = data.shape[1] 
    Z = np.random.standard_t(nu_hat, size=(N, k))
    R = loc_hat + scale_hat * Z
    var_absolute = np.percentile(R, 5, axis=0)
    var_diff_from_mean = var_absolute - loc_hat
    return var_absolute, var_diff_from_mean
var_absolute, var_diff_from_mean = var_t_simulation(df.values)
abs(var_absolute), abs(var_diff_from_mean)

(array([0.04171311]), array([0.08765349]))

In [28]:
ans = pd.read_csv("testout8_3.csv")
ans

Unnamed: 0,VaR Absolute,VaR Diff from Mean
0,0.040212,0.086586
