# Iterative Construction of a Penalised Vine Structure
This notebook iteratively estimate the quantile.

#### Libraries

In [1]:
import openturns as ot
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

%load_ext autoreload
%autoreload 2

random_state = 123
np.random.seed(random_state)

#### Model function
This example consider the simple additive example.

In [2]:
from dependence.tests import func_overflow, margins_overflow, var_names_overflow, func_sum

test_func = func_overflow

#### Dimension and margins
We first define the problem dimension and the margins

In [3]:
if test_func == func_overflow:
    margins = margins_overflow
    dim = len(margins)
else:
    dim = 6
    margins = [ot.Normal()]*dim

We chose the coefficients of the variables throught the additive function.

In [4]:
if test_func == func_sum:
    coeficients = np.logspace(0., 5., dim+1, endpoint=False)[1:]

    n_plot = 10000
    x = np.asarray(ot.ComposedDistribution(margins).getSample(n_plot))
    y = test_func(x, coeficients)

    fig, axes = plt.subplots(dim, 1, sharex=True, sharey=True, figsize=(4, 2*dim))
    for i in range(dim):
        ax = axes[i]
        ax.plot(x[:, i], y, '.')
        ax.set_xlabel(r'$X_%d$' % (i+1), fontsize=12)
        ax.set_ylabel(r'$y$', fontsize=12)
    fig.tight_layout()

#### Copula families
We now consider only Gaussian dependencies for this example

In [37]:
families = np.zeros((dim, dim), dtype=int)
for i in range(1, dim):
    for j in range(i):
        families[i, j] = 1

## Estimations
We create an instance of the main class for conservative estimate, and we define a q_func object for the quantile as a quantity of interest

In [6]:
from dependence import ConservativeEstimate, quantile_func

alpha = 0.95

if alpha > 0.5: # Maximizing the quantile
    def q_func(x, axis=1):
        return - quantile_func(alpha)(x, axis=axis)
else: # Minimizing
    q_func = quantile_func(alpha)

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)

First, we compute the quantile at independence

In [7]:
n = 10000
indep_result = quant_estimate.independence(n_input_sample=n, q_func=q_func, random_state=random_state)[0]

In [8]:
indep_result.compute_bootstrap(1000)
boot_std = indep_result.bootstrap_sample.std()
boot_mean = indep_result.bootstrap_sample.mean()
print('Quantile at independence: %.2f with a C.O.V at %.1f %%' % (boot_mean, abs(boot_std/boot_mean)*100.))

Quantile at independence: 4.86 with a C.O.V at 2.1 %


### Grid Search Approach
Firstly, we consider a grid search approach in order to compare the perfomance with the iterative algorithm.

In [21]:
K = 1000
n = 100000
grid_type = 'rand'
grid_result = quant_estimate.gridsearch_minimize(n_dep_param=K, n_input_sample=n, grid_type=grid_type, 
                                                 q_func=q_func, random_state=random_state)

In [22]:
min_result = grid_result.min_result
min_result.compute_bootstrap(1000)
boot_std = min_result.bootstrap_sample.std()
boot_mean = min_result.bootstrap_sample.mean()
print('Worst Quantile: %.2f with a C.O.V at %.1f %%' % (boot_mean, abs(boot_std/boot_mean)*100.))
print('Cost = %d' % (K*n))

Worst Quantile: 1.42 with a C.O.V at 5.3 %
Cost = 25000000


In [11]:
if dim == 2:
    fig, ax = plt.subplots()
    ax.plot(grid_result.dep_params, grid_result.quantities)

### Iterative Approach
Now lets see how good we can be with the iterative appraoch.

In [None]:
from dependence import iterative_vine_minimize

n = 50000
p_max = 3
K = None
verbose = True
grid_type = 'vertices'
n_add_pairs = 1
n_remove_pairs = 0

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)

worst_quantities, selected_pairs, removed_pairs, results = iterative_vine_minimize(quant_estimate, 
                                                                          n_input_sample=n,
                                                                          n_dep_param_init=K,
                                                                          p_max=p_max, 
                                                                          grid_type=grid_type, 
                                                                          q_func=q_func,
                                                                          n_add_pairs=n_add_pairs,
                                                                          n_remove_pairs=n_remove_pairs,
                                                                          verbose=True)

1 0
[[-0.99], [0.99]]
Worst quantile of [(1, 0)] at 1.60902727112
2 0
[[-0.99], [0.99]]
Worst quantile of [(2, 0)] at 3.96596923106
2 1
[[-0.99], [0.99]]
Worst quantile of [(2, 1)] at 4.42975419304
3 0
[[-0.99], [0.99]]
Worst quantile of [(3, 0)] at 4.47842362434
3 1
[[-0.99], [0.99]]
Worst quantile of [(3, 1)] at 4.67123035214
3 2
[[-0.99], [0.99]]
Worst quantile of [(3, 2)] at 4.9474457846
4 0
[[-0.99], [0.99]]
Worst quantile of [(4, 0)] at 4.28146163178
4 1
[[-0.99], [0.99]]
Worst quantile of [(4, 1)] at 4.54567149463
4 2
[[-0.99], [0.99]]
Worst quantile of [(4, 2)] at 4.86520241496
4 3
[[-0.99], [0.99]]
Worst quantile of [(4, 3)] at 4.86018863383
5 0
[[-0.99], [0.99]]
Worst quantile of [(5, 0)] at 4.6150993915
5 1
[[-0.99], [0.99]]
Worst quantile of [(5, 1)] at 4.7491114966
5 2
[[-0.99], [0.99]]
Worst quantile of [(5, 2)] at 4.81021758141
5 3
[[-0.99], [0.99]]
Worst quantile of [(5, 3)] at 4.95646229592
5 4
[[-0.99], [0.99]]
Worst quantile of [(5, 4)] at 4.9453574818
6 0
[[-0.99], 

In [34]:
%debug

> [1;32mc:\users\nazih\google drive\github\impact-of-dependence\dependence\utils.py[0m(258)[0;36mto_copula_params[1;34m()[0m
[1;32m    256 [1;33m        [0mkendalls[0m [1;33m=[0m [0mnp[0m[1;33m.[0m[0masarray[0m[1;33m([0m[1;33m[[0m[0mkendalls[0m[1;33m][0m[1;33m)[0m[1;33m[0m[0m
[0m[1;32m    257 [1;33m[1;33m[0m[0m
[0m[1;32m--> 258 [1;33m    [0mn_params[0m[1;33m,[0m [0mn_pairs[0m [1;33m=[0m [0mkendalls[0m[1;33m.[0m[0mshape[0m[1;33m[0m[0m
[0m[1;32m    259 [1;33m    [0mparams[0m [1;33m=[0m [0mnp[0m[1;33m.[0m[0mzeros[0m[1;33m([0m[0mkendalls[0m[1;33m.[0m[0mshape[0m[1;33m)[0m[1;33m[0m[0m
[0m[1;32m    260 [1;33m    [1;32mfor[0m [0mk[0m [1;32min[0m [0mrange[0m[1;33m([0m[0mn_pairs[0m[1;33m)[0m[1;33m:[0m[1;33m[0m[0m
[0m
ipdb> u
> [1;32mc:\users\nazih\google drive\github\impact-of-dependence\dependence\dependence.py[0m(168)[0;36mgridsearch_minimize[1;34m()[0m
[1;32m    166 [1;33m          

In [25]:
families = np.zeros((dim, dim))
families[1, 0] = 1
families[6, 2] = 1
families[7, 6] = 1

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)
res = quant_estimate.gridsearch_minimize(None, 20000, 'vertices', q_func=q_func)
res.min_quantity

1.4710180342208781