# Notebook for Testing different confidence bands

In [1]:
import numpy as np
import math
import sys
import os
import itertools
import time
from tqdm import tqdm
from IPython.display import display, HTML
from scipy import stats
from scipy.sparse import csr_matrix
from scipy import special
from numpy.linalg import cholesky
import matplotlib.pyplot as plt
from scipy.sparse import csc_matrix
from scipy.sparse.linalg import splu
src_path = os.path.abspath(os.path.join(os.getcwd(), "../src"))
sys.path.append(src_path)
from forest_v2 import RegressionTreeModel
from forest_v2 import RandomForestModel
from forest_v2 import HistogramEstimator
import functions as fcts

In [2]:
# Regression model
# dimension of the feature space
p = 2

#regression function
regression_fct=fcts.m_p2_01

# betas for confidence levels 1-\beta
beta=np.array([0.1,0.05,0.01])

The section below is used to approximate the quantiles necessary for the CPRF confidence bands. The quantiles for the histogram is below this section. 

# RF


## Covariance estimation and covariance matrix construction

In [3]:
# depth of the trees
k = 5 #5,6

# smaller k for evaluation grid (choice justified later)
k2 = 4 #4,5

#Ehrenfest parameters, constant throughout
B = 12
delta = 7
np.set_printoptions(suppress=True)

In [4]:
# estimation of all possible covariance entries using the parameters for the partition construction
np.random.seed(0)
n_parts=50000 # number of split pairs generated

# list of all combinations of closeness relations between two points in the feature space
comb_list=[]
for com in itertools.combinations_with_replacement(range(0, k + 1), p):
    comb_list.append(com)
combs=np.array(comb_list)

#simulation of splits according to the uniform CPRF and calculation of the average intersection volume for all combinations
cu_vol=np.zeros(len(combs))
for j in range(n_parts):
    s1=np.bincount(np.random.choice(p, k, replace=True),minlength=p)
    s2=np.bincount(np.random.choice(p, k, replace=True),minlength=p)
    for i in range(len(combs)):
        cu_vol[i]+=fcts.vol_intersec_2(combs[i],s1,s2)
av_vol=cu_vol/n_parts

# calculation of estimated covariance from average volumina
V_cap_uni=av_vol[len(av_vol)-1]
cov_entries_uni=av_vol/V_cap_uni

#simulation of splits according to the Ehrenfest CPRF and calculation of the average intersection volume for all combinations
cu_vol=np.zeros(len(combs))
for j in range(n_parts):
    s1=fcts.ehr_splits(np.zeros(p),np.ones(p)*B,delta,k)
    s2=fcts.ehr_splits(np.zeros(p),np.ones(p)*B,delta,k)
    for i in range(len(combs)):
        cu_vol[i]+=fcts.vol_intersec_2(combs[i],s1,s2)
av_vol=cu_vol/n_parts

# calculation of estimated covariance from average volumina
V_cap_ehr=av_vol[len(av_vol)-1]
cov_entries_ehr=av_vol/V_cap_ehr

print("Uniform V_cap:", V_cap_uni,", Ehrenfest V_cap:", V_cap_ehr,", 2^-k:", 1/2**k,", 2^-2k:",1/2**(2*k))

#cov_entries_ehr, cov_entries_uni, #len(cov_entries_uni),special.binom(k+p,p)

Uniform V_cap: 0.0163240625 , Ehrenfest V_cap: 0.017990234375 , 2^-k: 0.03125 , 2^-2k: 0.0009765625


Justification for the choice of $k_2$. For any pair of points in the feature space with closeness $(k_2,k_2)$ or larger, the Gaussian process' covariance should be close to one.

In [23]:
min_prob=0.99 # 0.9
print("Smallest closeness with cov > min_prob, Ehr.:",comb_list[np.argmax(cov_entries_ehr>min_prob)], "and Uni:", comb_list[np.argmax(cov_entries_uni>min_prob)])
print("Corresponding covariances: ", cov_entries_ehr[np.argmax(cov_entries_ehr>min_prob)], "and",cov_entries_uni[np.argmax(cov_entries_uni>min_prob)])

Smallest closeness with cov > min_prob, Ehr.: (4, 4) and Uni: (4, 4)
Corresponding covariances:  0.9993746607317338 and 0.9964775925110555


In [6]:
# Filling in the covariance entries in the matrices
X_test=fcts.test_grid(p,k2) # grid for the cov matrices
start_time = time.time()
X_test_M=np.array(list(itertools.product(X_test, repeat=2)))
X_test_V=X_test_M.reshape(2**(k2*p*2),2*p)
t_values=np.arange(0,k+1)
v_t=(2**t_values).reshape(-1,1)
fin_com_cell=np.empty((2**(k2*p*2),p))
for i in range(p):
    X_temp_1=X_test_V[:,i].reshape(-1,1)
    X_temp_2=X_test_V[:,i+p].reshape(-1,1)
    ints1=np.dot(X_temp_1,v_t.T).astype(int)
    ints2=np.dot(X_temp_2,v_t.T).astype(int)
    equal_mask=ints1==ints2
    fin_com_cell[:,i] = np.max(np.where(equal_mask, t_values, -np.inf),axis=1)
M=fin_com_cell.astype(int)
M=np.sort(M,axis=1)
M_int=np.sum(M*10**np.arange(p-1,-1,-1),axis=1)
int_combs=np.sum(combs*10**np.arange(p-1,-1,-1),axis=1)
indices = np.searchsorted(int_combs, M_int)
cov_uni=cov_entries_uni[indices].reshape((2**(k2*p),2**(k2*p)))
cov_ehr=cov_entries_ehr[indices].reshape((2**(k2*p),2**(k2*p)))
end_time = time.time()
print(f"Run time: {end_time - start_time} seconds")
print("Size of the cov matrix: ",cov_uni.shape) #, np.info(cov_uni), np.info(cov_ehr)

Laufzeit: 0.07515311241149902 Sekunden
Size of the cov matrix:  (256, 256)


The next two cells perform the Cholesky decomposition of the adjusted covariance matrices. Either we add a small diagonal matrix or if this is not possible due to the estimation error, we reconstruct the matrix beforehand, based on all eigenvalues greater than $\epsilon$ and their corresponding eigenvectors. 

In [17]:
epsilon=1e-6
try:
    L_uni = cholesky(cov_uni+epsilon * np.eye(2**(k2*p)))
except Exception as e:
    eig_values, eig_vectors = np.linalg.eigh(cov_uni)
    corrected_eig_values = np.maximum(eig_values, epsilon)
    fixed_cov_uni = eig_vectors @ np.diag(corrected_eig_values) @ eig_vectors.T
    L_uni = cholesky(fixed_cov_uni)
    print(f"An error occured: {e}.", "We used reconstruction of the covariance matrix via eigenvectors before the cholesky decomposition.")
else:
    print("We used the cholesky decomposition of the original covariance matrix.")
#stats.describe((L_uni @ L_uni.T)[:,0] -cov_uni[:,0])

An error occured: Matrix is not positive definite. We used reconstruction of the covariance matrix via eigenvectors before the cholesky decomposition.


In [18]:
epsilon=1e-6
try:
    L_ehr = cholesky(cov_ehr+epsilon * np.eye(2**(k2*p))) #+1e-3 * np.eye(2**(k*p))
except Exception as e:
    eig_values, eig_vectors = np.linalg.eigh(cov_ehr)
    corrected_eig_values = np.maximum(eig_values, epsilon)
    fixed_cov_ehr = eig_vectors @ np.diag(corrected_eig_values) @ eig_vectors.T
    L_ehr = cholesky(fixed_cov_ehr)
    print(f"An error occured: {e}.", "We used reconstruction of the covariance matrix via eigenvectors before the cholesky decomposition.")
else:
    print("We used the cholesky decomposition of the original covariance matrix.")
#stats.describe((L_ehr @ L_ehr.T)[:,0] -cov_ehr[:,0])

An error occured: Matrix is not positive definite. We used reconstruction of the covariance matrix via eigenvectors before the cholesky decomposition.


## Quantile estimation of the Gaussian processes

In [19]:
#Generating suprema of the Gaussian process to estimate the quantiles
np.random.seed(0)
sups_GP_uni=[]
sups_GP_ehr=[]
# number of suprema
n_sups=100000
for _ in range(n_sups):
    e=np.random.normal(0,1,2**(k2*p)) #standard gaussian vector
    gp_uni = L_uni @ e # multiplication with approriate L to get the correct covariance
    gp_ehr = L_ehr @ e
    sups_GP_uni.append(np.max(np.abs(gp_uni))) # calculation of the supremum
    sups_GP_ehr.append(np.max(np.abs(gp_ehr)))

In [20]:
#empirical quantiles
quants_uni=np.quantile(sups_GP_uni,1-beta)
quants_ehr=np.quantile(sups_GP_ehr,1-beta)

#stats.describe(sups_GP_uni), stats.describe(sups_GP_ehr)
print("Quantiles for p = ", p, " and k = ",k, " at ",1-beta)
print("Uniform CPRF:   ",quants_uni)
print("Ehrenfest CPRF: ", quants_ehr)

Quantiles at:    [0.9  0.95 0.99]
Uniform CPRF:    [3.32514131 3.54033604 3.97369232]
Ehrenfest CPRF:  [3.27505637 3.49751455 3.9256088 ]


## RF CB Tests

In [24]:
# if necessary create a text file for the results of the simulations
if not os.path.isfile('Simulation results.txt'):
    with open('Simulation results.txt', 'w') as f:
        f.write( "Simulation results of asymptotic confidence bands"+ '\n')
    print("The file 'Simulation results.txt' was created.")
else:
    print("The file 'Simulation results.txt' already existed.")

The file 'Simulation results.txt' already existed.


In [25]:
# training sample size
n_samples = 2000 #4000

#factor for subsample size
r = 0.75  

# bandwidth for estimation of sigma by kernel estimator
h_g=1/n_samples**(1/2)

In [29]:
# asymptotic standard deviations (pointwise)
as_std_uni=np.sqrt(2**(2*k)*V_cap_uni/n_samples)
as_std_ehr=np.sqrt(2**(2*k)*V_cap_ehr/n_samples)

#confidence band radii based on the quantiles and the standard deviations
cb_rad_uni=as_std_uni*quants_uni
cb_rad_ehr=as_std_ehr*quants_ehr

print("Confidence band radii for n = ",n_samples," p = ", p, " and k = ",k, "with confidence levels :", 1-beta)
print("Uniform CPRF:   ",cb_rad_uni)
print("Ehrenfest CPRF: ", cb_rad_ehr)

Confidence band radii for n =  2000  p =  2  and k =  5 with confidence levels : [0.9  0.95 0.99]
Uniform CPRF:    [0.30398994 0.3236634  0.36328155]
Ehrenfest CPRF:  [0.31432011 0.3356703  0.3767562 ]


The next cell is for the simulation of asymptotic confidence bands for the two random forests.

In [27]:
#grid for supremum calculation for the RF estimators
eps=1/2**20
g=2**k2
splits=np.linspace(0, 1, num=g, endpoint=False)
xl=splits+eps
xr=splits+1/g-eps
axis_grid=np.sort(np.concatenate((xl,xr)))
prod=list(itertools.product(axis_grid, repeat=p))
grid=np.array(prod)
grid.shape

(1024, 2)

In [28]:
# parameters to vary:

# number of confidence bands
n_tests=1000

#random seed for both data and tree construction (two generators for replicable results)
rand_seed=0

# vector of sample sizes
sample_sizes=[2000] #[250,500,1000,2000,4000,8000]

# vector of error std, used: [0.75,1,1.25]
sigs=[0.5]#[0.75,1,1.25]

#vector of error distributions, possible entries: 'norm', 'uni, 'tdx' where x is a place holder for the degrees of freedom
distributions=['norm']#'uni','td4','td6']#'norm', 

# vector with number of trees
n_trees_vec=[50]#,100] 

#multiplier to in/exclude the regression function
# 1 for the normal regression model, 0 to set m=0 to test the asymptotic distribution without approximation error
m_factor=1 

#_____________________________________________________________________________________________________

X_test=grid
m_true_grid=m_factor*regression_fct(X_test)

#Simulation run
for sigma in sigs:
    for dist_name in distributions:
        if dist_name == "norm":
            error_dist=stats.norm(loc=0, scale=sigma)
        elif dist_name == "uni":
            error_dist=stats.uniform(loc=-0.5*sigma*np.sqrt(12), scale=sigma*np.sqrt(12))
        elif dist_name[:2]=="td":
            try:
                df=int(dist_name[2:len(dist_name)])
            except Exception as e:
                print(f"An error occured: {e}.", "Distribution not known. Skipped to next distribution.")
                continue
            error_dist=stats.t(df=df,loc=0,scale = sigma* np.sqrt((df-2)/df))
        else:
            print("Distribution not known. Skipped to next distribution.")
            continue
        
        for n_trees in n_trees_vec:

            for n_samples in sample_sizes:
                   
                # asymptotic standard deviations (pointwise)
                as_std_uni=np.sqrt(2**(2*k)*V_cap_uni/n_samples)
                as_std_ehr=np.sqrt(2**(2*k)*V_cap_ehr/n_samples)
                
                #confidence band radii based on the quantiles and the standard deviations
                cb_rad_uni=as_std_uni*quants_uni
                cb_rad_ehr=as_std_ehr*quants_ehr
                    
                #bandwidth for the variance estimator
                h_g=1/n_samples**(1/2) 

                sups_uni=[]
                sups_ehr=[]
                sigma_hats=[] 
    
                print("Simulation progress for error distribution "+dist_name+" with n_samples =",n_samples,"n_trees = ",n_trees," and sigma =",sigma, ":")
                
                np.random.seed(rand_seed) 
                data_rng = np.random.default_rng(rand_seed)
                
                model_uni=RandomForestModel(n_trees=n_trees, max_depth=k,sample_size_fct=r,tree_type="Uni")
                model_ehr=RandomForestModel(n_trees=n_trees, max_depth=k,sample_size_fct=r,tree_type="Ehr",delta=delta,B=B)
                
                for i in tqdm(range(n_tests)):
                    #data from a different random generator than the one used for the random forests
                    e = error_dist.rvs(size=n_samples,random_state=data_rng)
                    X = data_rng.random(n_samples*p).reshape(n_samples,p)
                    
                    m = m_factor*regression_fct(X)
                    Y=m+e
                    
                    model_uni.clear()
                    model_uni.train(X,Y)
                    model_ehr.clear()
                    model_ehr.train(X,Y)
                    
                    preds_uni=model_uni.predict(X_test)
                    preds_ehr=model_ehr.predict(X_test)
                    
                    error_uni=preds_uni-m_true_grid
                    error_ehr=preds_ehr-m_true_grid
                    
                    sup_uni=np.max(np.abs(error_uni))
                    sup_ehr=np.max(np.abs(error_ehr))
                    
                    sigma_hat=np.sqrt(fcts.sigma_hat_gauss(X,Y,h_g))
                    
                    sups_uni.append(sup_uni)
                    sups_ehr.append(sup_ehr)
                    
                    sigma_hats.append(sigma_hat)
                    
                cover_num_uni=np.zeros(3)
                cover_num_ehr=np.zeros(3)
                for j in range(len(cb_rad_uni)):
                    cover_num_uni[j]=sum(np.array(sups_uni)<np.array(sigma_hats)*cb_rad_uni[j])
                    cover_num_ehr[j]=sum(np.array(sups_ehr)<np.array(sigma_hats)*cb_rad_ehr[j])
                n_CB=len(sups_uni)
                
                avg_cb_rad_uni=cb_rad_uni*np.mean(sigma_hats)
                avg_cb_rad_ehr=cb_rad_ehr*np.mean(sigma_hats)
    
                if m_factor==1:
                    m_info = " and m = "+regression_fct.__name__
                elif m_factor ==0:
                    m_info = " and m set to zero."
                else:
                    m_info = " and m = "+str(m_factor)+"*"+regression_fct.__name__
                
                result_txt = ["","","Results  at "+time.ctime(),
                              "for Random Seed = "+str(rand_seed),
                              "and Regression model with:",
                              "p = "+str(p)+m_info,
                              "Parameters:",
                              "Sample size: "+str(n_samples),
                              "k="+str(k),
                              "r="+str(r*n_samples)]
                if error_dist.dist.name == 't':
                    result_txt.append("Error distribution: t-Distribution with "+str(df)+" degrees of freedom")
                else:
                    result_txt.append("Error distribution: " +str(error_dist.dist.name))
                
                result_txt.append("Error std: "+str(sigma))
                result_txt.append("Number of trees: "+str(n_trees))
                result_txt.append("Number of CBS: "+str(n_CB))
                result_txt+=["","Empirical Coverage for confidence bands with theoretical coverage "+ str(1-beta)+":"]
                result_txt.append("Uniform CPRF, number: "+str(cover_num_uni)+", percentage: "+ str(cover_num_uni/n_CB))
                result_txt.append("Ehrenfest CPRF, number: "+str(cover_num_ehr)+", percentage: "+ str(cover_num_ehr/n_CB))
                result_txt+=["","Average confidence band radius for theoretical coverage "+ str(1-beta)+":"]
                result_txt.append("Uniform CPRF: "+str(avg_cb_rad_uni))
                result_txt.append("Ehrenfest CPRF: "+str(avg_cb_rad_ehr))
                
                # save results of combination in txt file
                with open('Simulation results.txt', 'a') as f:
                    for line in result_txt:
                        f.write(line + '\n')
                
                print("Simulation for error distribution "+dist_name+" with sigma =",sigma," and n_trees =",n_trees, " complete.")

Simulation progress for error distribution norm with n_samples = 2000 n_trees =  50  and sigma = 0.5 :


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [20:01<00:00,  1.20s/it]

Simulation for error distribution norm with sigma = 0.5  and n_trees = 50  complete.





# Histogram
This section is for the simulation of asymptotic confidence bands for the histogram.

In [47]:
# histogram parameter: n_h is ne number of intervals that each axis is divided into
# we used n_h=5,7
n_h = 7

## Quantile estimation

In [51]:
#estimation of the supremum distribution for the histogram estimator
n_sups=100000
np.random.seed(0)
sups_histo=[]
for _ in range(n_sups):
    e=np.random.normal(0,1,n_h**p)
    sups_histo.append(np.max(np.abs(e)))

In [52]:
#empirical quantiles
quants_histo=np.quantile(sups_histo,1-beta)

#stats.describe(sups_GP_uni), stats.describe(sups_GP_ehr)
print("Quantiles at:   ",1-beta)
print("Histogram:      ", quants_histo)

Quantiles at:    [0.9  0.95 0.99]
Histogram:       [3.06892471 3.2777789  3.70602254]


In [53]:
# training sample size
n_samples = 2000 #4000

#factor for subsample size
r = 0.75  

# bandwidth for estimation of sigma by kernel estimator
h_g=1/n_samples**(1/2)

# asymptotic standard deviations (pointwise)
as_std_histo=np.sqrt(n_h**p/n_samples)

#confidence band radii based on the quantiles and the standard deviations
cb_rad_histo=as_std_histo*quants_histo

print("Confidence band radii for n = ",n_samples," and n_h = ",n_h, "with confidence levels :", 1-beta)
print("Histogram:      ", cb_rad_histo)

Confidence band radii for n =  2000  and n_h =  7 with confidence levels : [0.9  0.95 0.99]
Histogram:       [0.4803627  0.51305355 0.58008428]


In [54]:
# grid for supremum calculation histogram
# estimation points at center and corners of cells
eps=1/2**20
g=n_h
splits=np.linspace(0, 1, num=g, endpoint=False)
xl=splits+eps
xr=splits+1/g-eps
xm=splits+1/(2*n_h)
axis_grid=np.sort(np.concatenate((xl,xr,xm)))
prod=list(itertools.product(axis_grid, repeat=p))
histo_grid=np.array(prod)
histo_grid.shape

(441, 2)

In [55]:
# parameters to vary:

# number of confidence bands
n_tests=1000

# random seed for both data and tree construction (two generators for replicable results)
rand_seed=0

# vector of sample sizes
sample_sizes=[250,500,1000,2000,4000,8000]

# vector of error std, used: [0.75,1,1.25]
sigs=[1]#[0.75,1,1.25]

# vector of error distributions, possible entries: 'norm', 'uni, 'tdx' where x is a place holder for the degrees of freedom
distributions=['norm']#,'uni','td4','td6'] 

# multiplier to in/exclude the regression function
# 1 for the normal regression model, 0 to set m=0 to test the asymptotic distribution without approximation error
m_factor=1 

#_____________________________________________________________________________________________________

X_test=histo_grid 
m_true_grid=m_factor*regression_fct(X_test)

for sigma in sigs:
    for dist_name in distributions:
        if dist_name == "norm":
            error_dist=stats.norm(loc=0, scale=sigma)
        elif dist_name == "uni":
            error_dist=stats.uniform(loc=-0.5*sigma*np.sqrt(12), scale=sigma*np.sqrt(12))
        elif dist_name[:2]=="td":
            try:
                df=int(dist_name[2:len(dist_name)])
            except Exception as e:
                print(f"An error occured: {e}.", "Distribution not known. Skipped to next distribution.")
                continue
            error_dist=stats.t(df=df,loc=0,scale = sigma* np.sqrt((df-2)/df))
        else:
            print("Distribution not known. Skipped to next distribution.")
            continue
           
        for n_samples in sample_sizes:
            sups_histo=[]
            sigma_hats=[]  

            #asymptotic standard deviation (pointwise)
            as_std_histo=np.sqrt(n_h**p/n_samples)
            
            #confidence band radii based on the quantiles and the standard deviations
            cb_rad_histo=as_std_histo*quants_histo

            #bandwidth for the variance estimator
            h_g=1/n_samples**(1/2) 

            print("Simulation progress for error distribution "+dist_name+" with n_h = ", n_h, ", n_samples =",n_samples," and sigma =",sigma, ":")
  
            np.random.seed(rand_seed)
            data_rng = np.random.default_rng(rand_seed) 
            
            model_histo=HistogramEstimator(n_cells=n_h)
            
            for i in tqdm(range(n_tests)):
                #data from a different random generator than the one used for the random forests
                e = error_dist.rvs(size=n_samples,random_state=data_rng)
                X = data_rng.random(n_samples*p).reshape(n_samples,p)
                
                m = m_factor*regression_fct(X)
                Y=m+e
                
                model_histo.clear()
                model_histo.train(X,Y)        
                
                preds_histo=model_histo.predict(X_test)
                
                error_histo=preds_histo - m_true_grid
                
                sup_histo=np.max(np.abs(error_histo))
                
                sigma_hat=np.sqrt(fcts.sigma_hat_gauss(X,Y,h_g))
                
                sups_histo.append(sup_histo)
                
                sigma_hats.append(sigma_hat)
                
            cover_num_histo=np.zeros(3)
            for j in range(len(cb_rad_histo)):
                cover_num_histo[j]=sum(np.array(sups_histo)<np.array(sigma_hats)*cb_rad_histo[j])
            n_CB=len(sups_histo)
            
            avg_cb_rad_histo=cb_rad_histo*np.mean(sigma_hats)
    
            #_________________________________________________________________________
            if m_factor==1:
                m_info = " and m = "+regression_fct.__name__
            elif m_factor ==0:
                m_info = " and m set to zero."
            else:
                m_info = " and m = "+str(m_factor)+"*"+regression_fct.__name__
            
            result_txt = ["","","Results  at "+time.ctime(),
                          "for Random Seed = "+str(rand_seed),
                          "and Regression model with:",
                          "p = "+str(p)+m_info,
                          "Parameters:",
                          "Sample size: "+str(n_samples),
                          "n_h="+str(n_h)]
            
            if error_dist.dist.name == 't':
                result_txt.append("Error distribution: t-Distribution with "+str(df)+" degrees of freedom")
            else:
                result_txt.append("Error distribution: " +str(error_dist.dist.name))
            
            result_txt.append("Error std: "+str(sigma))
            result_txt.append("Number of CBS: "+str(n_CB))
            result_txt+=["","Empirical Coverage for confidence bands with theoretical coverage "+ str(1-beta)+":"]
            result_txt.append("Histogram, number: "+str(cover_num_histo)+", percentage: "+ str(cover_num_histo/n_CB))
            result_txt+=["","Average confidence band radius for theoretical coverage "+ str(1-beta)+":"]
            result_txt.append("Histogram: "+str(avg_cb_rad_histo))
            
            # save results of combination in txt file
            with open('Simulation results.txt', 'a') as f:
                for line in result_txt:
                    f.write(line + '\n')
            
            print("Simulation for error distribution "+dist_name+" with n_h = ", n_h, ", n_samples =",n_samples," and sigma =",sigma, " complete.")

Simulation progress for error distribution norm with n_h =  7 , n_samples = 250  and sigma = 1 :


100%|█████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.06it/s]


Simulation for error distribution norm with n_h =  7 , n_samples = 250  and sigma = 1  complete.
Simulation progress for error distribution norm with n_h =  7 , n_samples = 500  and sigma = 1 :


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:25<00:00, 39.78it/s]


Simulation for error distribution norm with n_h =  7 , n_samples = 500  and sigma = 1  complete.
Simulation progress for error distribution norm with n_h =  7 , n_samples = 1000  and sigma = 1 :


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [01:39<00:00, 10.08it/s]


Simulation for error distribution norm with n_h =  7 , n_samples = 1000  and sigma = 1  complete.
Simulation progress for error distribution norm with n_h =  7 , n_samples = 2000  and sigma = 1 :


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [07:24<00:00,  2.25it/s]


Simulation for error distribution norm with n_h =  7 , n_samples = 2000  and sigma = 1  complete.
Simulation progress for error distribution norm with n_h =  7 , n_samples = 4000  and sigma = 1 :


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [34:29<00:00,  2.07s/it]


Simulation for error distribution norm with n_h =  7 , n_samples = 4000  and sigma = 1  complete.
Simulation progress for error distribution norm with n_h =  7 , n_samples = 8000  and sigma = 1 :


100%|████████████████████████████████████████████████████████████████████████████| 1000/1000 [2:31:04<00:00,  9.06s/it]

Simulation for error distribution norm with n_h =  7 , n_samples = 8000  and sigma = 1  complete.





In [23]:
# if necessary create a text file for the version control
import pkg_resources
import sys

if not os.path.isfile('Version control.txt'):
    with open('Version control.txt', 'w') as f:
        f.write( f"Python Version: {sys.version}\n")
        installed_packages = pkg_resources.working_set
        packages_sorted = sorted(f"{p.project_name}=={p.version}" for p in installed_packages)
        f.write("\n".join(packages_sorted))        
    print("The file 'Version control.txt' was created.")
else:
    print("The file 'Version control.txt' already existed.")

The file 'Version control.txt' was created.
