# Iterative Construction of a Penalised Vine Structure
This notebook iteratively estimate the quantile.

#### Libraries

In [1]:
import openturns as ot
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

%load_ext autoreload
%autoreload 2

random_state = 123
np.random.seed(random_state)

#### Model function
This example consider the simple additive example.

In [143]:
from dependence.tests import func_overflow, margins_overflow, var_names_overflow, func_sum

test_func = func_overflow

#### Dimension and margins
We first define the problem dimension and the margins

In [144]:
if test_func == func_overflow:
    margins = margins_overflow
    dim = len(margins)
else:
    dim = 7
    margins = [ot.Normal()]*dim

We chose the coefficients of the variables throught the additive function.

In [145]:
if test_func == func_sum:
    coeficients = np.logspace(0., 3., dim+1, endpoint=False)[1:]

    n_plot = 10000
    x = np.asarray(ot.ComposedDistribution(margins).getSample(n_plot))
    y = test_func(x, coeficients)

    fig, axes = plt.subplots(dim, 1, sharex=True, sharey=True, figsize=(4, 2*dim))
    for i in range(dim):
        ax = axes[i]
        ax.plot(x[:, i], y, '.')
        ax.set_xlabel(r'$X_%d$' % (i+1), fontsize=12)
        ax.set_ylabel(r'$y$', fontsize=12)
    fig.tight_layout()

#### Copula families
We now consider only Gaussian dependencies for this example

In [146]:
families = np.zeros((dim, dim), dtype=int)
for i in range(1, dim):
    for j in range(i):
        families[i, j] = 1

## Estimations
We create an instance of the main class for conservative estimate, and we define a q_func object for the quantile as a quantity of interest

In [147]:
from dependence import ConservativeEstimate, quantile_func

alpha = 0.99

if alpha > 0.5: # Maximizing the quantile
    def q_func(x, axis=1):
        return - quantile_func(alpha)(x, axis=axis)
else: # Minimizing
    q_func = quantile_func(alpha)

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)

First, we compute the quantile at independence

In [148]:
n = 5000
indep_result = quant_estimate.independence(n_input_sample=n, q_func=q_func, random_state=random_state)

In [149]:
indep_result.compute_bootstrap(1000)
boot_std = indep_result.bootstrap_sample.std()
boot_mean = indep_result.bootstrap_sample.mean()
print('Quantile at independence: %.2f with a C.O.V at %.1f %%' % (boot_mean, abs(boot_std/boot_mean)*100.))

Quantile at independence: 1.58 with a C.O.V at 16.3 %


### Iterative Approach
Now lets see how good we can be with the iterative appraoch.

In [152]:
from dependence import iterative_vine_minimize

n = 1000
p_max = 4
K = None
verbose = True
grid_type = 'lhs'
n_add_pairs = 1
n_remove_pairs = 0

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)

worst_quantities, selected_pairs, removed_pairs = iterative_vine_minimize(quant_estimate, 
                                                                          n_input_sample=n,
                                                                          n_dep_param_init=K,
                                                                          p_max=p_max, 
                                                                          grid_type=grid_type, 
                                                                          q_func=q_func,
                                                                          n_add_pairs=n_add_pairs,
                                                                          n_remove_pairs=n_remove_pairs,
                                                                          verbose=True)

[[2 0 0 0 0 0 0 0]
 [8 1 0 0 0 0 0 0]
 [7 8 3 0 0 0 0 0]
 [6 7 8 4 0 0 0 0]
 [5 6 7 8 5 0 0 0]
 [4 5 6 7 8 6 0 0]
 [3 4 5 6 7 8 7 0]
 [1 3 4 5 6 7 8 8]]
Worst quantile of [(1, 0)] at 1.03156347653
[[3 0 0 0 0 0 0 0]
 [8 1 0 0 0 0 0 0]
 [7 8 2 0 0 0 0 0]
 [6 7 8 4 0 0 0 0]
 [5 6 7 8 5 0 0 0]
 [4 5 6 7 8 6 0 0]
 [2 4 5 6 7 8 7 0]
 [1 2 4 5 6 7 8 8]]
Worst quantile of [(2, 0)] at 1.32530863498
[[3 0 0 0 0 0 0 0]
 [8 1 0 0 0 0 0 0]
 [7 8 2 0 0 0 0 0]
 [6 7 8 4 0 0 0 0]
 [5 6 7 8 5 0 0 0]
 [4 5 6 7 8 6 0 0]
 [1 4 5 6 7 8 7 0]
 [2 2 4 5 6 7 8 8]]
Worst quantile of [(2, 1)] at 1.95692478502
[[4 0 0 0 0 0 0 0]
 [8 1 0 0 0 0 0 0]
 [7 8 2 0 0 0 0 0]
 [6 7 8 3 0 0 0 0]
 [5 6 7 8 5 0 0 0]
 [3 5 6 7 8 6 0 0]
 [2 3 5 6 7 8 7 0]
 [1 2 3 5 6 7 8 8]]
Worst quantile of [(3, 0)] at 0.738953251491
[[4 0 0 0 0 0 0 0]
 [8 1 0 0 0 0 0 0]
 [7 8 2 0 0 0 0 0]
 [6 7 8 3 0 0 0 0]
 [5 6 7 8 5 0 0 0]
 [3 5 6 7 8 6 0 0]
 [1 3 5 6 7 8 7 0]
 [2 2 3 5 6 7 8 8]]
Worst quantile of [(3, 1)] at 2.42479156401
[[4 0 0 0 0 0 

### Grid Search Approach
Firstly, we consider a grid search approach in order to compare the perfomance with the iterative algorithm.

In [12]:
K = 1000
grid_type = 'lhs'
quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)
grid_result = quant_estimate.gridsearch_minimize(n_dep_param=K, n_input_sample=n, grid_type=grid_type, 
                                                 q_func=q_func, random_state=random_state)

In [13]:
min_result = grid_result.min_result
min_result.compute_bootstrap(1000)
boot_std = min_result.bootstrap_sample.std()
boot_mean = min_result.bootstrap_sample.mean()
print('Worst Quantile: %.2f with a C.O.V at %.1f %%' % (boot_mean, abs(boot_std/boot_mean)*100.))
print('Cost = %d' % (K*n))

Worst Quantile: -6.87 with a C.O.V at 7.2 %
Cost = 10000000


In [10]:
if dim == 2:
    fig, ax = plt.subplots()
    ax.plot(grid_result.dep_params, grid_result.quantities)