In [1]:
import warnings
warnings.filterwarnings('ignore')

import copulas
from copulas.datasets import sample_trivariate_xyz
from copulas.multivariate import GaussianMultivariate
from copulas.univariate.student_t import StudentTUnivariate
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols

dataset = pd.read_csv("../data/log_return_data.csv")
dataset

Unnamed: 0,log_return_vkco,log_return_tcs_il,log_return_poly_il,log_return_five_il
0,-0.012110,-0.004162,-0.003712,-0.013377
1,-0.035292,0.000000,-0.029352,-0.024070
2,0.011583,0.002083,-0.013279,0.007894
3,-0.006740,0.004154,0.025964,-0.002916
4,-0.009709,0.015425,-0.010984,-0.007034
...,...,...,...,...
248,-0.007648,0.018653,0.030041,0.010193
249,0.019012,0.044184,0.029452,-0.006216
250,0.018657,0.038540,0.008584,0.036732
251,0.046940,0.035287,0.042393,0.000000


### Получаем параметры для gaussCopula, т.е. матрицу корреляций Кендалла

In [2]:
from copulae import GaussianCopula

_, ndim = dataset.shape
g_cop = GaussianCopula(dim=ndim)  # initializing the copula
g_cop.fit(dataset)  # fit the copula to the data
g_cop.summary()

       log_return_vkco  log_return_tcs_il  log_return_poly_il  \
count       253.000000         253.000000          253.000000   
mean          0.500000           0.500000            0.500000   
std           0.288087           0.288105            0.288106   
min           0.003937           0.003937            0.003937   
25%           0.251969           0.251969            0.251969   
50%           0.480315           0.500000            0.500000   
75%           0.748031           0.748031            0.748031   
max           0.996063           0.996063            0.996063   

       log_return_five_il  
count          253.000000  
mean             0.500000  
std              0.288106  
min              0.003937  
25%              0.251969  
50%              0.500000  
75%              0.748031  
max              0.996063  


0,1,2,3
1.0,0.404077,0.271203,0.405806
0.404077,1.0,0.256553,0.469956
0.271203,0.256553,1.0,0.22744
0.405806,0.469956,0.22744,1.0


In [3]:
from copulae import StudentCopula

degrees_of_freedom = 5.5  # some random number, unnecessary to specify df but done for demonstration purposes
t_cop = StudentCopula(dim=ndim, df=degrees_of_freedom)
t_cop.fit(dataset)
t_cop.summary()


       log_return_vkco  log_return_tcs_il  log_return_poly_il  \
count       253.000000         253.000000          253.000000   
mean          0.500000           0.500000            0.500000   
std           0.288087           0.288105            0.288106   
min           0.003937           0.003937            0.003937   
25%           0.251969           0.251969            0.251969   
50%           0.480315           0.500000            0.500000   
75%           0.748031           0.748031            0.748031   
max           0.996063           0.996063            0.996063   

       log_return_five_il  
count          253.000000  
mean             0.500000  
std              0.288106  
min              0.003937  
25%              0.251969  
50%              0.500000  
75%              0.748031  
max              0.996063  


0,1,2,3
1.0,0.385422,0.232127,0.369613
0.385422,1.0,0.213583,0.45354
0.232127,0.213583,1.0,0.207374
0.369613,0.45354,0.207374,1.0


##### Разные методы

In [4]:
# Fit a gaussian copula to the data
copula = GaussianMultivariate()
copula.fit(dataset)
sample = copula.sample(253)

for i in range(len(sample.columns)):
    model = ols(f'{sample.columns[i]} ~ {sample.columns[(i - 1) % 4]} + {sample.columns[(i - 2) % 4]} + {sample.columns[(i - 3) % 4]}', data=sample).fit()
    print(model.rsquared)

# copulas.univariate.student_t.StudentTUnivariate
# copulas.multivariate.gaussian.GaussianMultivariate


0.1357904279205423
0.1377387620670204
0.03860937348950377
0.19956543874530985
