In [632]:
import pingouin as pg
import pandas as pd
import scipy.stats as stats
import numpy as np
from scipy.stats import ttest_1samp

In [633]:
# Import all the text files for the probabilistic sampling methods for the statistical tests

# Define constants
SIGNIFICANCE_LEVEL = 0.05
TRUE_VALUE = 1.506484
PAIRS = [('Latin', 'Random'), ('Latin', 'Orthogonal'), ('Random', 'Orthogonal')]

# Latin hypercube
df_l = pd.read_csv("latin.txt", header=1)
df_l.columns = ["grid_size", "max_iterations", "run", "total_points", "points_inside"]
df_l['fraction'] = (1 - df_l['points_inside'] / df_l['total_points']) * 9
df_l['method'] = 'Latin'
df_l["error"] = np.abs(df_l['fraction'] - TRUE_VALUE)

# Random
df_r = pd.read_csv("random.txt", header=1)
df_r.columns = ["grid_size", "max_iterations", "run", "total_points", "points_inside"]
df_r['fraction'] = (1 - df_r['points_inside'] / df_r['total_points']) * 9
df_r['method'] = 'Random'
df_r["error"] = np.abs(df_r['fraction'] - TRUE_VALUE)

# Orthogonal
df_o = pd.read_csv("orthogonal.txt", header=1)
df_o.columns = ["grid_size", "max_iterations", "run", "total_points", "points_inside"]
df_o['fraction'] = (1 - df_o['points_inside'] / df_o['total_points']) * 9
df_o['method'] = 'Orthogonal'
df_o["error"] = np.abs(df_o['fraction'] - TRUE_VALUE)

data_combined = pd.concat([df_l, df_r, df_o], ignore_index=True)
methods = data_combined['method'].unique()
grid_sizes = data_combined['grid_size'].unique()
iteration_bounds = data_combined['max_iterations'].unique()

In [634]:
# Hypotheses_0^1: for Deterministic data

# Deterministic
df_d = pd.read_csv("question2.txt", header=1)
df_d.columns = ["grid_size", "max_iterations", "total_points", "points_inside"]
df_d['fraction'] = (1 - df_d['points_inside'] / df_d['total_points']) * 9
df_d['method'] = 'Deterministic'
df_d["error"] = np.abs(df_d['fraction'] - TRUE_VALUE)

print("Deterministic sampling method\n")

# Calculate the smallest mean error for the largest grid size and iteration bound
mean_error_largest_df = df_d[(df_d['grid_size'] == 5000) & (df_d['max_iterations'] == 2000)]
mean_error_smallest = mean_error_largest_df['error'].mean()
print(f"Mean error for grid size = 5000 and iteration bound = 2000: {mean_error_smallest}\n")

# Grid sizes and iteration bounds
grid_sizes = [100, 500, 1000, 5000]
iteration_bounds = [50, 500, 2000]

# Loop over grid sizes and iteration bounds
for i, j in [(x, y) for x in iteration_bounds for y in iteration_bounds if x > y]:  # i > j
    for z, s in [(x, y) for x in grid_sizes for y in grid_sizes if x > y]:  # z > s

        # Filter for the current grid size and iteration conditions
        filtered_d = df_d[(df_d['grid_size'] == z) & (df_d['max_iterations'] == i)]

        # Calculate mean error for the filtered data
        mean_error = filtered_d['error'].mean()

        # Print results for the current combination
        print(f"Grid sizes: {z} > {s}")
        print(f"Iterations {i} > {j}")
        print(f"Mean error: {mean_error}")
        print(40*"-")

Deterministic sampling method

Mean error for grid size = 5000 and iteration bound = 2000: 0.0013306400000001606

Grid sizes: 500 > 100
Iterations 500 > 50
Mean error: 0.001196000000000197
----------------------------------------
Grid sizes: 1000 > 100
Iterations 500 > 50
Mean error: 0.004913000000000167
----------------------------------------
Grid sizes: 1000 > 500
Iterations 500 > 50
Mean error: 0.004913000000000167
----------------------------------------
Grid sizes: 5000 > 100
Iterations 500 > 50
Mean error: 0.007230680000000156
----------------------------------------
Grid sizes: 5000 > 500
Iterations 500 > 50
Mean error: 0.007230680000000156
----------------------------------------
Grid sizes: 5000 > 1000
Iterations 500 > 50
Mean error: 0.007230680000000156
----------------------------------------
Grid sizes: 500 > 100
Iterations 2000 > 50
Mean error: 0.004455999999999571
----------------------------------------
Grid sizes: 1000 > 100
Iterations 2000 > 50
Mean error: 0.001009000

In [635]:
# One-sample T-test to test H_0^(1a): mu(2000,5000)=0 (done) (for every method seperately)

# Criteria for variables: grid size and iteration bound
grid_size = 5000
max_iter = 2000

# Subset the data for the specific grid size and iteration bound
subset = data_combined[
    (data_combined['grid_size'] == grid_size) & 
    (data_combined['max_iterations'] == max_iter)
]

# Get the sampling methods
methods = subset['method'].unique()

# Iterate over each sampling method
for method in methods:

    method_subset = subset[subset['method'] == method]

    # Perform 1-sample t-test for each method to evaluate with 0
    t_stat, p_value = ttest_1samp(method_subset['error'], 0)

    # Print results
    print(f"1-Sample t-test results for method: {method}")
    print(f"Mean error: {method_subset['error'].mean()}")
    print(f"t-statistic: {t_stat}")
    print(f"p-value: {p_value}")

    # Check significance
    if p_value < SIGNIFICANCE_LEVEL:
        print("Statistically significant difference")
    else:
        print("Statistically NOT significant difference")
    print(80*"-")

1-Sample t-test results for method: Latin
Mean error: 0.0018002960000000235
t-statistic: 13.126121291762377
p-value: 3.573944715828477e-07
Statistically significant difference
--------------------------------------------------------------------------------
1-Sample t-test results for method: Random
Mean error: 0.002144384000000077
t-statistic: 7.534387036390897
p-value: 3.562106019150578e-05
Statistically significant difference
--------------------------------------------------------------------------------
1-Sample t-test results for method: Orthogonal
Mean error: 0.0019087279999998152
t-statistic: 200.80984621488287
p-value: 9.580966296302393e-18
Statistically significant difference
--------------------------------------------------------------------------------


In [636]:
# Welch's t-test to test H_0^(1b): mu(i,s)=mu(j,s), where i > j (mean error) (within every method) (done)

grid_sizes = [100, 500, 1000, 5000]
iteration_bounds = [50, 2000]

# Loop through grid sizes
for grid_size in grid_sizes:
    # Subset the data for the current grid size
    grid_subset = data_combined[data_combined['grid_size'] == grid_size]
    
    # Loop through all pairs of iteration bounds
    for i in iteration_bounds:
        for j in iteration_bounds:
            if i > j:  # Only compare when i > j (iteration bounds)

                for method in data_combined['method'].unique():
                    method_subset = grid_subset[grid_subset['method'] == method]

                    data_i = method_subset[method_subset['max_iterations'] == i]['error']
                    data_j = method_subset[method_subset['max_iterations'] == j]['error']

                    # Check if either dataset is empty
                    if data_i.empty or data_j.empty:
                        print(f"Skipping test for {method}, grid size {grid_size}, iterations {i} > {j} due to empty data.")
                        continue
                    
                    # Perform Welch's t-test
                    welch_result = pg.ttest(data_i, data_j, paired=False, alternative='two-sided')
                    p_val = welch_result['p-val'].values[0]

                    # Print results
                    print(f"Welch's t-test for method {method}")
                    print(f"Iteration bounds {i} > {j}")
                    print(f"Grid Size {grid_size}")
                    print(welch_result)
                    
                    # Check significance
                    if p_val < SIGNIFICANCE_LEVEL:
                        print("Statistically significant difference")
                    else:
                        print("Statistically NOT significant difference")
                    print(80*"-")

Welch's t-test for method Latin
Iteration bounds 2000 > 50
Grid Size 100
               T        dof alternative     p-val           CI95%   cohen-d  \
T-test -6.440685  13.699255   two-sided  0.000017  [-0.09, -0.04]  3.023952   

            BF10     power  
T-test  2149.688  0.999988  
Statistically significant difference
--------------------------------------------------------------------------------
Welch's t-test for method Random
Iteration bounds 2000 > 50
Grid Size 100
               T        dof alternative     p-val           CI95%   cohen-d  \
T-test -3.755352  13.833088   two-sided  0.002172  [-0.09, -0.02]  1.761897   

          BF10     power  
T-test  20.863  0.950563  
Statistically significant difference
--------------------------------------------------------------------------------
Welch's t-test for method Orthogonal
Iteration bounds 2000 > 50
Grid Size 100
                T        dof alternative         p-val           CI95%  \
T-test -35.443386  11.535376   two-

In [637]:
# Welch's t-test to test H_0^(1c): mu(i,s) = mu(i,s'), where s' > s (z denotes s') (mean error) (within every method) (done)

grid_sizes = [100, 5000]
iteration_bounds = [50, 500, 2000]

# Loop through iteration bounds
for max_iter in iteration_bounds:

    # Subset the data for the current iteration bound
    max_iter_subset = data_combined[data_combined['max_iterations'] == max_iter]
    
    # Loop through all pairs of grid sizes
    for s in grid_sizes:
        for z in grid_sizes:
            if z > s:  # Only compare when z > s (grid sizes)

                for method in data_combined['method'].unique():
                    method_subset = max_iter_subset[max_iter_subset['method'] == method]

                    data_s = method_subset[method_subset['grid_size'] == s]['error']
                    data_z = method_subset[method_subset['grid_size'] == z]['error']

                    # Perform Welch's t-test
                    welch_result = pg.ttest(data_z, data_s, paired=False, alternative='two-sided')
                    p_val = welch_result['p-val'].values[0]

                    # Print results
                    print(f"Welch's t-test for method: {method}")
                    print(f"Iteration bound: {max_iter}")
                    print(f"Grid sizes: {z} > {s}")
                    print(welch_result)
                    
                    # Check significance
                    if p_val < SIGNIFICANCE_LEVEL:
                        print("Statistically significant difference")
                    else:
                        print("Statistically NOT significant difference")
                    print(80 * "-")

Welch's t-test for method: Latin
Iteration bound: 50
Grid sizes: 5000 > 100
               T      dof alternative     p-val          CI95%   cohen-d  \
T-test -0.562197  8.00273   two-sided  0.589377  [-0.02, 0.02]  0.273173   

         BF10     power  
T-test  0.452  0.086848  
Statistically NOT significant difference
--------------------------------------------------------------------------------
Welch's t-test for method: Random
Iteration bound: 50
Grid sizes: 5000 > 100
               T       dof alternative     p-val          CI95%   cohen-d  \
T-test  0.086635  8.008287   two-sided  0.933089  [-0.03, 0.03]  0.042094   

         BF10     power  
T-test  0.406  0.050859  
Statistically NOT significant difference
--------------------------------------------------------------------------------
Welch's t-test for method: Orthogonal
Iteration bound: 50
Grid sizes: 5000 > 100
              T       dof alternative     p-val         CI95%   cohen-d  \
T-test  0.63842  8.003098   two-sid

In [638]:
# # Welch's t-test to test H_0^(1d): mu_Orth = mu_Rand = mu_LHS (mean error) (between method pairs) (done)

# Loop through the grid sizes and iteration bounds
for grid_size in grid_sizes:
    for max_iter in iteration_bounds:

        # Subset the data for the current grid size and max iterations
        subset = data_combined[
            (data_combined['grid_size'] == grid_size) & 
            (data_combined['max_iterations'] == max_iter)
        ]
        
        # Perform pairwise Welch's t-tests for each pair of methods
        for method1, method2 in PAIRS:
            
            data1 = subset[subset['method'] == method1]['error']
            data2 = subset[subset['method'] == method2]['error']
            
            # Perform Welch's t-test using Pingouin's ttest function
            welch_result = pg.ttest(data1, data2, paired=False, alternative='two-sided')
            p_val = welch_result['p-val'].values[0]

            # Print results
            print(f"Welch's t-test for method: {method1} and {method2}")
            print(f"Iteration bound: {max_iter}")
            print(f"Grid size: {grid_size}")
            print(welch_result)
            
            # Check significance
            if p_val < SIGNIFICANCE_LEVEL:
                print("Statistically significant difference (p < 0.05)")
            else:
                print("Statistically NOT significant difference (p >= 0.05)")
            print(80 * "-")

Welch's t-test for method: Latin and Random
Iteration bound: 50
Grid size: 100
               T  dof alternative     p-val          CI95%   cohen-d  BF10  \
T-test -0.295275   18   two-sided  0.771162  [-0.02, 0.02]  0.132051  0.41   

          power  
T-test  0.05902  
Statistically NOT significant difference (p >= 0.05)
--------------------------------------------------------------------------------
Welch's t-test for method: Latin and Orthogonal
Iteration bound: 50
Grid size: 100
               T  dof alternative     p-val        CI95%   cohen-d   BF10  \
T-test  2.591406   18   two-sided  0.018429  [0.0, 0.03]  1.158912  3.352   

           power  
T-test  0.688438  
Statistically significant difference (p < 0.05)
--------------------------------------------------------------------------------
Welch's t-test for method: Random and Orthogonal
Iteration bound: 50
Grid size: 100
               T  dof alternative     p-val         CI95%   cohen-d   BF10  \
T-test  2.084271   18   two

In [639]:
# Fischer's f-test to test H_0^(2a): Var(X_LHS) = Var(X_Rand) = Var(X_Orth) (variance) (between method pairs) (done)

# Loop through grid sizes and iteration bounds
for grid_size in grid_sizes:
    for max_iter in iteration_bounds:
        
        # Subset the data for the current grid size and max iterations
        subset = data_combined[
            (data_combined['grid_size'] == grid_size) &
            (data_combined['max_iterations'] == max_iter)
        ]
        
        # Perform pairwise variance comparison using Fischer's F-test
        for method1, method2 in PAIRS:
            
            subset1 = subset[subset['method'] == method1]['fraction']
            subset2 = subset[subset['method'] == method2]['fraction']

            # Calculate variances
            var1 = subset1.var(ddof=1)  # Variance of method1
            var2 = subset2.var(ddof=1)  # Variance of method2

            # Ensure F-statistic is always the ratio of the larger variance to the smaller variance
            f_statistic = var1 / var2 if var1 >= var2 else var2 / var1
            
            # Degrees of freedom
            df1 = len(subset1) - 1
            df2 = len(subset2) - 1
            
            # Calculate p-value from F-distribution
            p_val = 1 - stats.f.cdf(f_statistic, df1, df2)
    
            # Print the results
            print(f"Fischer's F-test between {method1} and {method2}")
            print(f"Iteration bound: {max_iter}")
            print(f"Grid size: {grid_size}")
            print(f"F-statistic: {f_statistic}")
            print(f"p-value: {p_val}")
            
            # Check significance
            if p_val < SIGNIFICANCE_LEVEL:
                print("Statistically significant difference")
            else:
                print("Statistically NOT significant difference")
            print(80*"-")   

Fischer's F-test between Latin and Random
Iteration bound: 50
Grid size: 100
F-statistic: 2.2048356605088104
p-value: 0.1421644946584777
Statistically NOT significant difference
--------------------------------------------------------------------------------
Fischer's F-test between Latin and Orthogonal
Iteration bound: 50
Grid size: 100
F-statistic: 20.967315716271695
p-value: 0.00013449440241197763
Statistically significant difference
--------------------------------------------------------------------------------
Fischer's F-test between Random and Orthogonal
Iteration bound: 50
Grid size: 100
F-statistic: 46.229485396382664
p-value: 6.683028238474442e-06
Statistically significant difference
--------------------------------------------------------------------------------
Fischer's F-test between Latin and Random
Iteration bound: 2000
Grid size: 100
F-statistic: 1.6663814023282235
p-value: 0.22929635341379417
Statistically NOT significant difference
----------------------------------

In [640]:
# Fischer's f-test to test H_0^(2b): Var(X(i,s)) = Var(X(j,s)), where i > j (variance) (within every method)

grid_sizes = [100, 500,5000]
iteration_bounds = [50, 500, 2000]

grid_sizes = [100, 5000]
iteration_bounds = [50, 2000]

# Loop through grid sizes
for grid_size in grid_sizes:
    
    # Subset the data for the current grid size
    grid_subset = data_combined[data_combined['grid_size'] == grid_size]
    
    # Loop through all pairs of iteration bounds
    for i in iteration_bounds:
        for j in iteration_bounds:
            if i > j:  # Only compare when i > j (iteration bounds)

                for method in data_combined['method'].unique():
                    method_subset = grid_subset[grid_subset['method'] == method]

                    data_i = method_subset[method_subset['max_iterations'] == i]['fraction']
                    data_j = method_subset[method_subset['max_iterations'] == j]['fraction']

                    # Calculate variances
                    var1 = data_i.var(ddof=1)  # Variance of i
                    var2 = data_j.var(ddof=1)  # Variance of j

                    # Ensure F-statistic is always the ratio of the larger variance to the smaller variance
                    f_statistic = var1 / var2 if var1 >= var2 else var2 / var1
                    
                    # Degrees of freedom
                    df1 = len(data_i) - 1
                    df2 = len(data_j) - 1
                    
                    # Calculate p-value from F-distribution
                    p_val = 1 - stats.f.cdf(f_statistic, df1, df2)
            
                    # Print the results
                    print(f"Fischer's F-test for method: {method}")
                    print(f"Iteration bound: {max_iter}")
                    print(f"Grid size: {grid_size}")
                    print(f"F-statistic: {f_statistic}")
                    print(f"p-value: {p_val}")
                    
                    # Check significance
                    if p_val < SIGNIFICANCE_LEVEL:
                        print("Statistically significant difference")
                    else:
                        print("Statistically NOT significant difference")
                    print(80*"-")

Fischer's F-test for method: Latin
Iteration bound: 2000
Grid size: 100
F-statistic: 1.1316506915193785
p-value: 0.4361146748488486
Statistically NOT significant difference
--------------------------------------------------------------------------------
Fischer's F-test for method: Random
Iteration bound: 2000
Grid size: 100
F-statistic: 1.169201654660058
p-value: 0.41809215881238715
Statistically NOT significant difference
--------------------------------------------------------------------------------
Fischer's F-test for method: Orthogonal
Iteration bound: 2000
Grid size: 100
F-statistic: 1.421140472878985
p-value: 0.3155880538907827
Statistically NOT significant difference
--------------------------------------------------------------------------------
Fischer's F-test for method: Latin
Iteration bound: 2000
Grid size: 5000
F-statistic: 1.4625393968951221
p-value: 0.290110622004227
Statistically NOT significant difference
------------------------------------------------------------

In [641]:
# Fischer's f-test to test H_0^(2c): Var(X(i,s)) = Var(X(i,s')), where s' > s (z denotes s') (variance) (within every method) (done)

grid_sizes = [100, 5000]
iteration_bounds = [50, 2000]

# Loop through iteration bounds
for max_iter in iteration_bounds:

    # Subset the data for the current iteration bound
    max_iter_subset = data_combined[data_combined['max_iterations'] == max_iter]
    
    # Loop through all pairs of grid sizes
    for s in grid_sizes:
        for z in grid_sizes:
            if z > s:  # Only compare when z > s (grid sizes)

                for method in data_combined['method'].unique():
                    method_subset = max_iter_subset[max_iter_subset['method'] == method]
        
                    data_s = method_subset[method_subset['grid_size'] == s]['fraction']
                    data_z = method_subset[method_subset['grid_size'] == z]['fraction']

                    # Calculate variances
                    var1 = data_s.var(ddof=1)  # Variance of s
                    var2 = data_z.var(ddof=1)  # Variance of z

                    # Ensure F-statistic is always the ratio of the larger variance to the smaller variance
                    f_statistic = var1 / var2 if var1 >= var2 else var2 / var1
                    
                    # Degrees of freedom
                    df1 = len(data_s) - 1
                    df2 = len(data_z) - 1
                    
                    # Calculate p-value from F-distribution
                    p_val = 1 - stats.f.cdf(f_statistic, df1, df2)
            
                    # Print the results
                    print(f"Fischer's F-test for method: {method}")
                    print(f"Iteration bound: {max_iter}")
                    print(f"Grid size: {grid_size}")
                    print(f"F-statistic: {f_statistic}")
                    print(f"p-value: {p_val}")
                    
                    # Check significance
                    if p_val < SIGNIFICANCE_LEVEL:
                        print("Statistically significant difference")
                    else:
                        print("Statistically NOT significant difference")
                    print(80*"-")   

Fischer's F-test for method: Latin
Iteration bound: 50
Grid size: 5000
F-statistic: 5274.445454210034
p-value: 1.3322676295501878e-15
Statistically significant difference
--------------------------------------------------------------------------------
Fischer's F-test for method: Random
Iteration bound: 50
Grid size: 5000
F-statistic: 1737.629578789304
p-value: 1.9884094371036554e-13
Statistically significant difference
--------------------------------------------------------------------------------
Fischer's F-test for method: Orthogonal
Iteration bound: 50
Grid size: 5000
F-statistic: 4647.95424852433
p-value: 2.3314683517128287e-15
Statistically significant difference
--------------------------------------------------------------------------------
Fischer's F-test for method: Latin
Iteration bound: 2000
Grid size: 5000
F-statistic: 4081.141238526273
p-value: 3.774758283725532e-15
Statistically significant difference
-------------------------------------------------------------------