# Iterative Construction of a Penalised Vine Structure
This notebook iteratively estimate the quantile.

#### Libraries

In [1]:
import openturns as ot
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

%load_ext autoreload
%autoreload 2

random_state = 123
np.random.seed(random_state)

#### Model function
This example consider the simple additive example.

In [2]:
from dependence.tests.test_functions import func_overflow, margins_overflow, var_names_overflow

def sum_func(x, a=None):
    """Additive example
    """        
    n, dim = x.shape
    if a is None:
        a = np.ones((dim, 1))

    if a.ndim == 1:
        a = a.reshape(-1, 1)
        assert a.shape[0] == dim, "Shape not good"
    elif a.ndim > 2:
        raise AttributeError('Dimension problem for constant a')
        
    return np.dot(x, a)

test_func = func_overflow

#### Dimension and margins
We first define the problem dimension and the margins

In [3]:
if test_func == sum_func:
    dim = 6
    margins = [ot.Normal()]*dim
else:
    margins = margins_overflow
    dim = len(margins)

We chose the coefficients of the variables throught the additive function.

In [4]:
if test_func == sum_func:
    coeficients = np.logspace(0., 3., dim+1, endpoint=False)[1:]

    n_plot = 10000
    x = np.asarray(ot.ComposedDistribution(margins).getSample(n_plot))
    y = sum_func(x, coeficients)

    fig, axes = plt.subplots(dim, 1, sharex=True, sharey=True, figsize=(4, 2*dim))
    for i in range(dim):
        ax = axes[i]
        ax.plot(x[:, i], y, '.')
        ax.set_xlabel(r'$X_%d$' % (i+1), fontsize=12)
        ax.set_ylabel(r'$y$', fontsize=12)
    fig.tight_layout()

#### Copula families
We now consider only Gaussian dependencies for this example

In [5]:
families = np.zeros((dim, dim), dtype=int)
for i in range(1, dim):
    for j in range(i):
        families[i, j] = 1

## Estimations
We create an instance of the main class for conservative estimate, and we define a q_func object for the quantile as a quantity of interest

In [105]:
from dependence import ConservativeEstimate, quantile_func

alpha = 0.99
def q_func(x, axis=1):
    return - quantile_func(alpha)(x, axis=axis)

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)

First, we compute the quantile at independence

In [7]:
n = 5000
indep_result = quant_estimate.independence(n_input_sample=n, q_func=q_func, random_state=random_state)

In [8]:
indep_result.compute_bootstrap(1000)
boot_std = indep_result.bootstrap_sample.std()
boot_mean = indep_result.bootstrap_sample.mean()
print('Quantile at independence: %.2f with a C.O.V at %.1f %%' % (boot_mean, abs(boot_std/boot_mean)*100.))

Quantile at independence: 1.69 with a C.O.V at 16.5 %


### Iterative Approach
Now lets see how good we can be with the iterative appraoch.

In [99]:
from dependence.iterative_vines import iterative_vine_minimize

n = 1000
p_max = 6
K = None
verbose = True
grid_type = 'lhs'
n_add_pairs = 3
n_remove_pairs = 2

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)
worst_quantities, selected_pairs, removed_pairs = iterative_vine_minimize(quant_estimate, 
                                                                          n_input_sample=n,
                                                                          n_dep_param_init=K,
                                                                          p_max=p_max, 
                                                                          grid_type=grid_type, 
                                                                          q_func=q_func, 
                                                                          n_add_pairs=n_add_pairs,
                                                                          n_remove_pairs=n_remove_pairs,
                                                                          verbose=True)

AttributeError: 'dict' object has no attribute 'dep_full_params'

### Grid Search Approach
Firstly, we consider a grid search approach in order to compare the perfomance with the iterative algorithm.

In [145]:
from dependence.dependence import ListDependenceResult

test = ListDependenceResult()
test.extend(grid_result)
test.



(7, 28)

In [87]:
K = 5
grid_type = 'lhs'
quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)
grid_result2 = quant_estimate.gridsearch_minimize(n_dep_param=K, n_input_sample=n, grid_type=grid_type, 
                                                 q_func=q_func, random_state=random_state)

In [89]:
grid_result.extend(grid_result2)

In [69]:
a = grid_result2 + grid_result

In [103]:
grid_result.output_samples

AttributeError: 'ListDependenceResult' object has no attribute 'output_samples'

In [11]:
min_result = grid_result.min_result
min_result.compute_bootstrap(1000)
boot_std = min_result.bootstrap_sample.std()
boot_mean = min_result.bootstrap_sample.mean()
print('Worst Quantile: %.2f with a C.O.V at %.1f %%' % (boot_mean, abs(boot_std/boot_mean)*100.))
print('Cost = %d' % (K*n))

Worst Quantile: -6.83 with a C.O.V at 8.8 %
Cost = 10000000


In [10]:
if dim == 2:
    fig, ax = plt.subplots()
    ax.plot(grid_result.dep_params, grid_result.quantities)