# Iterative Construction of a Penalised Vine Structure
This notebook iteratively estimate the quantile.

#### Libraries

In [1]:
import openturns as ot
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

%load_ext autoreload
%autoreload 2

random_state = 123
np.random.seed(random_state)

#### Model function
This example consider the simple additive example.

In [2]:
from dependence.tests import func_overflow, margins_overflow, var_names_overflow, func_sum

test_func = func_overflow

#### Dimension and margins
We first define the problem dimension and the margins

In [3]:
if test_func == func_overflow:
    margins = margins_overflow
    dim = len(margins)
else:
    dim = 6
    margins = [ot.Normal()]*dim

We chose the coefficients of the variables throught the additive function.

In [4]:
if test_func == func_sum:
    coeficients = np.logspace(0., 5., dim+1, endpoint=False)[1:]

    n_plot = 10000
    x = np.asarray(ot.ComposedDistribution(margins).getSample(n_plot))
    y = test_func(x, coeficients)

    fig, axes = plt.subplots(dim, 1, sharex=True, sharey=True, figsize=(4, 2*dim))
    for i in range(dim):
        ax = axes[i]
        ax.plot(x[:, i], y, '.')
        ax.set_xlabel(r'$X_%d$' % (i+1), fontsize=12)
        ax.set_ylabel(r'$y$', fontsize=12)
    fig.tight_layout()

#### Copula families
We now consider only Gaussian dependencies for this example

In [5]:
families = np.zeros((dim, dim), dtype=int)
for i in range(1, dim):
    for j in range(i):
        families[i, j] = 1

## Estimations
We create an instance of the main class for conservative estimate, and we define a q_func object for the quantile as a quantity of interest

In [13]:
from dependence import ConservativeEstimate, quantile_func

alpha = 0.99

if alpha > 0.5: # Maximizing the quantile
    def q_func(x, axis=1):
        return - quantile_func(alpha)(x, axis=axis)
else: # Minimizing
    q_func = quantile_func(alpha)

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)

First, we compute the quantile at independence

In [14]:
n = 10000
indep_result = quant_estimate.independence(n_input_sample=n, q_func=q_func, random_state=random_state)[0]

In [15]:
indep_result.compute_bootstrap(1000)
boot_std = indep_result.bootstrap_sample.std()
boot_mean = indep_result.bootstrap_sample.mean()
print('Quantile at independence: %.2f with a C.O.V at %.1f %%' % (boot_mean, abs(boot_std/boot_mean)*100.))

Quantile at independence: 1.91 with a C.O.V at 8.6 %


### Grid Search Approach
Firstly, we consider a grid search approach in order to compare the perfomance with the iterative algorithm.

In [16]:
K = 100
grid_type = 'rand'
quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)
grid_result = quant_estimate.gridsearch_minimize(n_dep_param=K, n_input_sample=n, grid_type=grid_type, 
                                                 q_func=q_func, random_state=random_state)

In [17]:
min_result = grid_result.min_result
min_result.compute_bootstrap(1000)
boot_std = min_result.bootstrap_sample.std()
boot_mean = min_result.bootstrap_sample.mean()
print('Worst Quantile: %.2f with a C.O.V at %.1f %%' % (boot_mean, abs(boot_std/boot_mean)*100.))
print('Cost = %d' % (K*n))

Worst Quantile: -5.51 with a C.O.V at 7.1 %
Cost = 1000000


In [18]:
if dim == 2:
    fig, ax = plt.subplots()
    ax.plot(grid_result.dep_params, grid_result.quantities)

### Iterative Approach
Now lets see how good we can be with the iterative appraoch.

In [24]:
from dependence import iterative_vine_minimize

n = 10000
p_max = 4
K = 10
verbose = True
grid_type = 'lhs'
n_add_pairs = 1
n_remove_pairs = 0

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)

worst_quantities, selected_pairs, removed_pairs = iterative_vine_minimize(quant_estimate, 
                                                                          n_input_sample=n,
                                                                          n_dep_param_init=K,
                                                                          p_max=p_max, 
                                                                          grid_type=grid_type, 
                                                                          q_func=q_func,
                                                                          n_add_pairs=n_add_pairs,
                                                                          n_remove_pairs=n_remove_pairs,
                                                                          verbose=True)

Worst quantile of [(1, 0)] at -3.55081259631
Worst quantile of [(2, 0)] at 0.114800959491
Worst quantile of [(2, 1)] at 0.84447906533
Worst quantile of [(3, 0)] at 0.926509429814
Worst quantile of [(3, 1)] at 1.27318988501
Worst quantile of [(3, 2)] at 1.51270271385
Worst quantile of [(4, 0)] at 0.824718056818
Worst quantile of [(4, 1)] at 0.921730369952
Worst quantile of [(4, 2)] at 1.6503433085
Worst quantile of [(4, 3)] at 1.43820287692
Worst quantile of [(5, 0)] at 1.59998930135
Worst quantile of [(5, 1)] at 1.53700817735
Worst quantile of [(5, 2)] at 1.58402731769
Worst quantile of [(5, 3)] at 1.30270047423
Worst quantile of [(5, 4)] at 1.48465181558
Worst quantile of [(6, 0)] at 1.38187076904
Worst quantile of [(6, 1)] at 1.74354210064
Worst quantile of [(6, 2)] at 1.57852089157
Worst quantile of [(6, 3)] at 1.77599417614
Worst quantile of [(6, 4)] at 1.49271250996
Worst quantile of [(6, 5)] at 1.61158597862
Worst quantile of [(7, 0)] at 1.51770493733
Worst quantile of [(7, 1)] a

In [23]:
worst_quantities

[-4.1228913083545775,
 -4.0679089837627203,
 -4.4797643563633596,
 -4.5652428429638112]