In [1]:
import pandas as pd
import numpy as np

data1 = pd.read_csv('../database/pseudo_observations/vas.txt')
data1 = np.array(data1.iloc[::2, :])
data1 = np.array([float(i[0]) for i in data1])


data2 = pd.read_csv('../database/pseudo_observations/LQD.txt')
data2 = np.array(data2.iloc[::2, :])
data2 = np.array([float(i[0]) for i in data2])

data3 = pd.read_csv('../database/pseudo_observations/HYG.txt')
data3 = np.array(data3.iloc[::2, :])
data3 = np.array([float(i[0]) for i in data3])

data4 = pd.read_csv('../database/pseudo_observations/IEAC.txt')
data4 = np.array(data4.iloc[::2, :])
data4 = np.array([float(i[0]) for i in data4])

In [2]:
import pyvinecopulib as pv
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian,
                                             pv.BicopFamily.clayton,
                                             pv.BicopFamily.frank,
                                             pv.BicopFamily.gumbel,
                                             pv.BicopFamily.bb1])

u = np.stack([data1, data2, data3, data4]).T
print(u)
cop = pv.Vinecop(u, controls=controls)

u_sim = cop.simulate(100)
print(u_sim)

[[0.71928319 0.27945011 0.47239207 0.38955118]
 [0.4704488  0.52143145 0.50406523 0.55198975]
 [0.71220833 0.60157051 0.5348449  0.58271997]
 ...
 [0.52357397 0.37263047 0.53923004 0.53012607]
 [0.20885459 0.47613649 0.61418096 0.40347559]
 [0.71366527 0.51651115 0.34158869 0.62289767]]
[[0.06990019 0.25933776 0.08234936 0.0449144 ]
 [0.21709277 0.18871288 0.19145011 0.10341235]
 [0.36710131 0.55911429 0.54640769 0.53697323]
 [0.09648489 0.15427268 0.21804557 0.19491533]
 [0.89854131 0.9057658  0.89537353 0.890319  ]
 [0.61595475 0.34256354 0.29326    0.20305027]
 [0.68183736 0.57635795 0.61222625 0.63531488]
 [0.34592301 0.20066095 0.45868716 0.18171461]
 [0.28007387 0.22394797 0.32108443 0.28963696]
 [0.98655277 0.99306627 0.99572201 0.99546586]
 [0.97782651 0.61425574 0.64059123 0.55457261]
 [0.51231828 0.2706462  0.37743129 0.25298777]
 [0.24101145 0.36675364 0.46083358 0.40663656]
 [0.71708116 0.49060674 0.58952723 0.59625717]
 [0.59941511 0.54397941 0.65293624 0.57382592]
 [0.403

In [3]:
cop.structure

<pyvinecopulib.RVineStructure>
3 2 4 4 
2 4 2 
4 3 
1 

In [4]:
# Simulate some data
np.random.seed(1234)  # seed for the random generator
n = 1000  # number of observations
d = 5  # the dimension
mean = np.random.normal(size=d)  # mean vector
cov = np.random.normal(size=(d, d))  # covariance matrix
cov = np.dot(cov.transpose(), cov)  # make it non-negative definite
x = np.random.multivariate_normal(mean, cov, n)

# Transform copula data using the empirical distribution
u = pv.to_pseudo_obs(x)
print(u)
# Fit a Gaussian vine
# (i.e., properly specified since the data is multivariate normal)
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian])
cop = pv.Vinecop(u, controls=controls)

# Sample from the copula
n_sim = 1000
u_sim = cop.simulate(n_sim, seeds=[1, 2, 3, 4])

# Transform back simulations to the original scale
x_sim = np.asarray([np.quantile(x[:, i], u_sim[:, i]) for i in range(0, d)])

# Both the mean and covariance matrix look ok!
[mean, np.mean(x_sim, 1)]
[cov, np.cov(x_sim)]

[[0.62937063 0.75924076 0.22377622 0.23376623 0.34765235]
 [0.3976024  0.37862138 0.66133866 0.38361638 0.38761239]
 [0.31668332 0.67832168 0.52747253 0.74025974 0.46853147]
 ...
 [0.36563437 0.36563437 0.81518482 0.2997003  0.7042957 ]
 [0.47652348 0.53746254 0.11588412 0.51148851 0.37662338]
 [0.59340659 0.37962038 0.52147852 0.15984016 0.92007992]]


[array([[ 2.37095772,  1.72011592,  1.34581349, -2.33400987, -3.1412032 ],
        [ 1.72011592,  2.77391072, -0.8386675 , -1.94255384, -3.0356469 ],
        [ 1.34581349, -0.8386675 ,  4.73656299, -1.11520579, -1.00737741],
        [-2.33400987, -1.94255384, -1.11520579,  6.17099976, -0.86804342],
        [-3.1412032 , -3.0356469 , -1.00737741, -0.86804342, 10.39309954]]),
 array([[ 2.22422017,  1.5946634 ,  1.44467283, -2.24840034, -2.86432409],
        [ 1.5946634 ,  2.56490065, -0.64236225, -1.88381331, -2.55280137],
        [ 1.44467283, -0.64236225,  4.48378773, -1.21625326, -1.35825765],
        [-2.24840034, -1.88381331, -1.21625326,  6.11239488, -1.14967754],
        [-2.86432409, -2.55280137, -1.35825765, -1.14967754,  9.93512784]])]

In [5]:
from database.database_parser import create_connection
from database.database_parser import select_curr_solution
from distributions.student_poisson_mixture import spm


test_connection = create_connection('abacus_database.db')
opt_sol = select_curr_solution(test_connection, 'HYG')

# Test running inverse CDF. ~ 5 seconds.
spm.ppf(0.5, 0, 1, 10, 0.15, 4)

OperationalError: no such table: asset_model_student_solutions