# Experimenting the parameteric estimation of varying correlation coefficient.

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext lab_black

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from scipy.stats import multivariate_normal
from tqdm import tqdm

np.random.seed(1234)

In [2]:
n = 3000

In [3]:
# covariates
covar = np.random.normal(size=n)
covar_rho = np.clip(0.3 + covar * 0.1, -1, 1)

data = []
for i in range(n):
    data.append(
        np.random.multivariate_normal(
            mean=[0, 0], cov=[[1, covar_rho[i]], [covar_rho[i], 1]]
        )
    )
data = np.array(data)

In [4]:
def negloglik(params):
    a = params[0]
    b = params[1]
    nll = 0
    for i in range(data.shape[0]):
        rho = np.clip(a + b * covar[i], -1, 1)
        nll -= multivariate_normal.logpdf(
            data[i, :],
            mean=[0, 0],
            cov=np.array([[1, rho], [rho, 1]]),
            allow_singular=True,
        )
    return nll


avg_coef = np.corrcoef(data[:, 0], data[:, 1])[0, 1]
model = minimize(negloglik, np.array([avg_coef, 0]), options={"disp": True}, tol=1e-4)

         Current function value: 8354.291551
         Iterations: 14
         Function evaluations: 81
         Gradient evaluations: 27
