# LHS is not available on TS machines and will be performed elsewhere

## PORO/PERMX pairs may be sampled more than once

In [15]:
import numpy as np
import pandas as pd
from scipy.stats import qmc
from pathlib import Path
import sys

# --------------------------
# Setup for sampling
# --------------------------
params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg']
l_bounds = [15, 0.2, 25 * 0.9, 14.6 * 0.9, 22.7 * 0.9, 290]
u_bounds = [25, 0.4, 25 * 1.1, 14.6 * 1.1, 22.7 * 1.1, 310]
num_samples = 5  # Change as needed

# --------------------------
# Load PORO and PERMX files
# --------------------------
data_dir = Path('..') / 'data' / 'properties'
# Or use Path('data/properties') if running from repo root
if not data_dir.exists():
    print(f"Error: Directory '{data_dir}' does not exist.")
    sys.exit(1)

poro_files = sorted([f for f in data_dir.iterdir() if "PORO" in f.name])
permx_files = sorted([f for f in data_dir.iterdir() if "PERMX" in f.name])

if not poro_files or not permx_files:
    print("Error: PORO or PERMX files not found.")
    sys.exit(1)

if len(poro_files) != len(permx_files):
    raise ValueError(f"Number of PORO files ({len(poro_files)}) does not match number of PERMX files ({len(permx_files)})")

# --------------------------
# Latin Hypercube Sampling
# --------------------------
# Sample 6 parameters + 1 index for matched poro/permx
sampler = qmc.LatinHypercube(d=len(params) + 1)
sample = sampler.random(n=num_samples)

# Scale physical parameters
sample_params = qmc.scale(sample[:, :len(params)], l_bounds, u_bounds)
df_params = pd.DataFrame(np.round(sample_params, 2), columns=params)

# Scale indices
file_indices = (sample[:, -1] * len(poro_files)).astype(int)
file_indices = np.clip(file_indices, 0, len(poro_files) - 1)

# Get matched file paths
df_params["PORO_file"] = [str(poro_files[i]) for i in file_indices]
df_params["PERMX_file"] = [str(permx_files[i]) for i in file_indices]

# --------------------------
# Calculate stress state parameters
# --------------------------
df_params['beta'] = df_params['SH_azi_deg'] - 90  # Rotate from SH to x-axis
df_params['cos_2beta'] = np.cos(np.radians(2 * df_params['beta']))
df_params['sin_2beta'] = np.sin(np.radians(2 * df_params['beta']))

df_params['sigma_x'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 + \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['sigma_y'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 - \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['tau_xy'] = (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['sin_2beta']

# --------------------------
# Output
# --------------------------
# df_params.to_csv("sampled_parameters_and_files.csv", index=False)
df_params


Unnamed: 0,E_GPa,PR,SH_MPa/km,Sh_MPa/km,Sv_MPa/km,SH_azi_deg,PORO_file,PERMX_file,beta,cos_2beta,sin_2beta,sigma_x,sigma_y,tau_xy
0,16.27,0.23,26.32,14.79,24.87,309.22,../data/properties/JD_BASECASE_1_PORO.dat,../data/properties/JD_BASECASE_1_PERMX.dat,219.22,0.200394,0.979715,21.710271,19.399729,5.648059
1,19.86,0.31,24.69,14.17,22.72,296.87,../data/properties/JD_BASECASE_4_PORO.dat,../data/properties/JD_BASECASE_4_PERMX.dat,206.87,0.59145,0.806341,22.541029,16.318971,4.241356
2,23.4,0.33,23.19,16.05,22.01,290.23,../data/properties/JD_BASECASE_3_PORO.dat,../data/properties/JD_BASECASE_3_PERMX.dat,200.23,0.760859,0.648917,22.336267,16.903733,2.316634
3,18.02,0.26,27.14,15.29,23.35,302.75,../data/properties/JD_BASECASE_6_PORO.dat,../data/properties/JD_BASECASE_6_PERMX.dat,212.75,0.414693,0.909961,23.672057,18.757943,5.391521
4,22.53,0.36,23.9,13.17,20.76,299.02,../data/properties/JD_BASECASE_3_PORO.dat,../data/properties/JD_BASECASE_3_PERMX.dat,209.02,0.529327,0.848418,21.37484,15.69516,4.551762


## PORO/PERMX pairs are NOT sampled more than once

Make sure the number of samples does not exceed the number of available pairs.

In [14]:
import numpy as np
import pandas as pd
from scipy.stats import qmc
from pathlib import Path
import sys

# --------------------------
# Setup for sampling
# --------------------------
params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg']
l_bounds = [15, 0.2, 25 * 0.9, 14.6 * 0.9, 22.7 * 0.9, 290]
u_bounds = [25, 0.4, 25 * 1.1, 14.6 * 1.1, 22.7 * 1.1, 310]
num_samples = 5  # Change as needed

# --------------------------
# Load PORO and PERMX files
# --------------------------
data_dir = Path('..') / 'data' / 'properties'
# Or use Path('data/properties') if running from repo root
if not data_dir.exists():
    print(f"Error: Directory '{data_dir}' does not exist.")
    sys.exit(1)

poro_files = sorted([f for f in data_dir.iterdir() if "PORO" in f.name])
permx_files = sorted([f for f in data_dir.iterdir() if "PERMX" in f.name])

if not poro_files or not permx_files:
    print("Error: PORO or PERMX files not found.")
    sys.exit(1)

if len(poro_files) != len(permx_files):
    raise ValueError(f"Number of PORO files ({len(poro_files)}) does not match number of PERMX files ({len(permx_files)})")

num_pairs = len(poro_files)

if num_samples > num_pairs:
    raise ValueError(f"Cannot sample {num_samples} unique poro/permx pairs: only {num_pairs} available.")

# --------------------------
# Latin Hypercube Sampling for parameters
# --------------------------
sampler = qmc.LatinHypercube(d=len(params))
sample = sampler.random(n=num_samples)
sample_params = qmc.scale(sample, l_bounds, u_bounds)
df_params = pd.DataFrame(np.round(sample_params, 2), columns=params)

# --------------------------
# Random unique selection of poro/permx pairs
# --------------------------
unique_indices = np.random.choice(num_pairs, size=num_samples, replace=False)
df_params["PORO_file"] = [str(poro_files[i]) for i in unique_indices]
df_params["PERMX_file"] = [str(permx_files[i]) for i in unique_indices]

# --------------------------
# Calculate stress state parameters
# --------------------------
df_params['beta'] = df_params['SH_azi_deg'] - 90  # Rotate from SH to x-axis
df_params['cos_2beta'] = np.cos(np.radians(2 * df_params['beta']))
df_params['sin_2beta'] = np.sin(np.radians(2 * df_params['beta']))

df_params['sigma_x'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 + \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['sigma_y'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 - \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['tau_xy'] = (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['sin_2beta']

# --------------------------
# Output
# --------------------------
# df_params.to_csv("sampled_parameters_and_files.csv", index=False)
df_params



Unnamed: 0,E_GPa,PR,SH_MPa/km,Sh_MPa/km,Sv_MPa/km,SH_azi_deg,PORO_file,PERMX_file,beta,cos_2beta,sin_2beta,sigma_x,sigma_y,tau_xy
0,15.44,0.34,23.8,15.53,21.1,294.77,../data/properties/JD_BASECASE_2_PORO.dat,../data/properties/JD_BASECASE_2_PERMX.dat,204.77,0.648917,0.760859,22.348272,16.981728,3.146153
1,22.09,0.39,24.86,15.37,23.43,299.86,../data/properties/JD_BASECASE_4_PORO.dat,../data/properties/JD_BASECASE_4_PERMX.dat,209.86,0.504226,0.863572,22.507553,17.722447,4.097647
2,17.97,0.25,27.34,13.17,22.42,309.31,../data/properties/JD_BASECASE_5_PORO.dat,../data/properties/JD_BASECASE_5_PERMX.dat,219.31,0.197315,0.98034,21.652978,18.857022,6.94571
3,20.19,0.21,26.19,13.83,22.07,292.28,../data/properties/JD_BASECASE_6_PORO.dat,../data/properties/JD_BASECASE_6_PERMX.dat,202.28,0.712516,0.701656,24.413349,15.606651,4.336233
4,23.85,0.29,22.5,14.69,24.95,303.15,../data/properties/JD_BASECASE_1_PORO.dat,../data/properties/JD_BASECASE_1_PERMX.dat,213.15,0.401948,0.915663,20.164606,17.025394,3.575662
