In [1]:
import numpy as np
import pandas as pd

# ---------- helpers ----------
def _symm(A: np.ndarray) -> np.ndarray:
    return 0.5 * (A + A.T)

def _to_corr_from_cov(S: np.ndarray):
    std = np.sqrt(np.diag(S))
    std = np.where(std <= 0.0, 1.0, std)
    D_inv = np.diag(1.0 / std)
    R = D_inv @ S @ D_inv
    return R, std

def _to_cov_from_corr(R: np.ndarray, std: np.ndarray):
    D = np.diag(std)
    return D @ R @ D

# ---------- Rebonato–Jäckel (near-PSD) ----------
def near_psd_correlation(R_df: pd.DataFrame, eps: float = 0.0) -> pd.DataFrame:
    C = _symm(R_df.values)
    eigvals, S = np.linalg.eigh(C)
    lam_p = np.maximum(eigvals, eps)           # clip eigenvalues

    # scaling t_i = 1 / sum_j S_{ij}^2 * lam'_j
    Si2 = S**2
    denom = Si2 @ lam_p
    denom = np.where(denom <= 0, 1.0, denom)
    t = 1.0 / denom

    # B = sqrt(T) S sqrt(Lam')
    B = (np.sqrt(t)[:, None]) * S * (np.sqrt(lam_p)[None, :])
    C_hat = _symm(B @ B.T)

    # normalize to diag=1
    d = np.sqrt(np.diag(C_hat))
    C_hat = C_hat / np.outer(d, d)
    np.fill_diagonal(C_hat, 1.0)

    return pd.DataFrame(C_hat, index=R_df.index, columns=R_df.columns)

def near_psd_covariance(S_df: pd.DataFrame, eps: float = 0.0) -> pd.DataFrame:
    S = _symm(S_df.values)
    R, std = _to_corr_from_cov(S)
    R_psd = near_psd_correlation(pd.DataFrame(R, index=S_df.index, columns=S_df.columns), eps=eps).values
    S_psd = _to_cov_from_corr(R_psd, std)
    return pd.DataFrame(_symm(S_psd), index=S_df.index, columns=S_df.columns)

# ---------- Higham (nearest correlation) ----------
def higham_correlation(R_df: pd.DataFrame, tol: float = 1e-8, max_iter: int = 200) -> pd.DataFrame:
    X = _symm(R_df.values.copy())
    for _ in range(max_iter):
        # PSD projection
        w, V = np.linalg.eigh(_symm(X))
        w = np.maximum(w, 0.0)
        X_psd = V @ np.diag(w) @ V.T
        # set diag=1
        np.fill_diagonal(X_psd, 1.0)
        if np.linalg.norm(X_psd - X, ord='fro') < tol:
            X = X_psd
            break
        X = X_psd
    return pd.DataFrame(X, index=R_df.index, columns=R_df.columns)

def higham_covariance(S_df: pd.DataFrame, tol: float = 1e-8, max_iter: int = 200) -> pd.DataFrame:
    S = _symm(S_df.values)
    R, std = _to_corr_from_cov(S)
    R_h = higham_correlation(pd.DataFrame(R, index=S_df.index, columns=S_df.columns),
                             tol=tol, max_iter=max_iter).values
    S_h = _to_cov_from_corr(R_h, std)
    return pd.DataFrame(_symm(S_h), index=S_df.index, columns=S_df.columns)

In [2]:
cov_df = pd.read_csv("../testfiles/data/testout_1.3.csv")   
corr_df = pd.read_csv("../testfiles/data/testout_1.4.csv")   

In [3]:
## Test3.1
print("\nnear_psd COV:")
print(near_psd_covariance(cov_df))


near_psd COV:
         x1        x2        x3        x4        x5
0  1.173986 -0.617989 -0.284559 -0.065152 -0.688287
1 -0.617989  1.318197  0.017092  0.445696  0.139176
2 -0.284559  0.017092  0.918102  0.354147  0.246056
3 -0.065152  0.445696  0.354147  0.894764 -0.218717
4 -0.688287  0.139176  0.246056 -0.218717  0.522607


In [4]:
## Test3.2
print("near_psd CORR:")
print(near_psd_correlation(corr_df))

near_psd CORR:
         x1        x2        x3        x4        x5
0  1.000000 -0.483199 -0.241787 -0.067767 -0.714761
1 -0.483199  1.000000  0.015446  0.405660  0.178286
2 -0.241787  0.015446  1.000000  0.488250  0.336248
3 -0.067767  0.405660  0.488250  1.000000 -0.322136
4 -0.714761  0.178286  0.336248 -0.322136  1.000000


In [5]:
## Test3.3
print("\nHigham COV:")
print(higham_covariance(cov_df))


Higham COV:
         x1        x2        x3        x4        x5
0  1.173986 -0.624301 -0.294868 -0.057170 -0.694612
1 -0.624301  1.318197  0.016488  0.448489  0.143391
2 -0.294868  0.016488  0.918102  0.353517  0.246406
3 -0.057170  0.448489  0.353517  0.894764 -0.216594
4 -0.694612  0.143391  0.246406 -0.216594  0.522607


In [6]:
## Test3.4
print("\nHigham CORR:")
print(higham_correlation(corr_df))


Higham CORR:
         x1        x2        x3        x4        x5
0  1.000000 -0.483199 -0.241787 -0.067767 -0.714761
1 -0.483199  1.000000  0.015446  0.405660  0.178286
2 -0.241787  0.015446  1.000000  0.488250  0.336248
3 -0.067767  0.405660  0.488250  1.000000 -0.322136
4 -0.714761  0.178286  0.336248 -0.322136  1.000000
