# LHS is not available on TS machines and will be performed elsewhere

## PORO/PERMX pairs may be sampled more than once

In [17]:
import numpy as np
import pandas as pd
from scipy.stats import qmc
from pathlib import Path
import sys

# --------------------------
# Setup for sampling
# --------------------------
params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg']
l_bounds = [15e6, 0.2, 25 * 0.9, 14.6 * 0.9, 22.7 * 0.9, 290]
u_bounds = [25e6, 0.4, 25 * 1.1, 14.6 * 1.1, 22.7 * 1.1, 310]
num_samples = 5  # Change as needed

# --------------------------
# Load PORO and PERMX files
# --------------------------
data_dir = Path('..') / 'data' / 'properties'
# Or use Path('data/properties') if running from repo root
if not data_dir.exists():
    print(f"Error: Directory '{data_dir}' does not exist.")
    sys.exit(1)

poro_files = sorted([f for f in data_dir.iterdir() if "PORO" in f.name])
permx_files = sorted([f for f in data_dir.iterdir() if "PERMX" in f.name])

if not poro_files or not permx_files:
    print("Error: PORO or PERMX files not found.")
    sys.exit(1)

if len(poro_files) != len(permx_files):
    raise ValueError(f"Number of PORO files ({len(poro_files)}) does not match number of PERMX files ({len(permx_files)})")

# --------------------------
# Latin Hypercube Sampling
# --------------------------
# Sample 6 parameters + 1 index for matched poro/permx
sampler = qmc.LatinHypercube(d=len(params) + 1)
sample = sampler.random(n=num_samples)

# Scale physical parameters
sample_params = qmc.scale(sample[:, :len(params)], l_bounds, u_bounds)
df_params = pd.DataFrame(np.round(sample_params, 2), columns=params)

# Scale indices
file_indices = (sample[:, -1] * len(poro_files)).astype(int)
file_indices = np.clip(file_indices, 0, len(poro_files) - 1)

# Get matched file paths
df_params["PORO_file"] = [str(poro_files[i]) for i in file_indices]
df_params["PERMX_file"] = [str(permx_files[i]) for i in file_indices]

# --------------------------
# Calculate stress state parameters
# --------------------------
df_params['beta'] = df_params['SH_azi_deg'] - 90  # Rotate from SH to x-axis
df_params['cos_2beta'] = np.cos(np.radians(2 * df_params['beta']))
df_params['sin_2beta'] = np.sin(np.radians(2 * df_params['beta']))

df_params['sigma_x'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 + \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['sigma_y'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 - \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['tau_xy'] = (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['sin_2beta']

# --------------------------
# Output
# --------------------------
# df_params.to_csv("sampled_parameters_and_files.csv", index=False)
df_params


Unnamed: 0,E_GPa,PR,SH_MPa/km,Sh_MPa/km,Sv_MPa/km,SH_azi_deg,PORO_file,PERMX_file,beta,cos_2beta,sin_2beta,sigma_x,sigma_y,tau_xy
0,23563783.76,0.4,27.03,14.04,22.76,297.77,../data/properties/JD_BASECASE_21_PORO.dat,../data/properties/JD_BASECASE_21_PERMX.dat,207.77,0.565831,0.824521,24.210071,16.859929,5.355267
1,16594595.76,0.26,23.63,14.6,21.1,290.72,../data/properties/JD_BASECASE_26_PORO.dat,../data/properties/JD_BASECASE_26_PERMX.dat,200.72,0.749649,0.661835,22.499666,15.730334,2.988187
2,17506773.47,0.29,23.45,13.34,23.32,304.47,../data/properties/JD_BASECASE_28_PORO.dat,../data/properties/JD_BASECASE_28_PERMX.dat,214.47,0.359345,0.933205,20.211491,16.578509,4.717349
3,21741935.18,0.34,25.18,15.2,21.8,300.59,../data/properties/JD_BASECASE_22_PORO.dat,../data/properties/JD_BASECASE_22_PERMX.dat,210.59,0.48206,0.876138,22.595477,17.784523,4.371931
4,20092696.4,0.22,26.09,15.8,24.96,308.85,../data/properties/JD_BASECASE_27_PORO.dat,../data/properties/JD_BASECASE_27_PERMX.dat,218.85,0.21303,0.977046,22.041041,19.848959,5.026899


## PORO/PERMX pairs are NOT sampled more than once

Make sure the number of samples does not exceed the number of available pairs.

In [18]:
import numpy as np
import pandas as pd
from scipy.stats import qmc
from pathlib import Path
import sys

# --------------------------
# Setup for sampling
# --------------------------
params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg']
l_bounds = [15e6, 0.2, 25 * 0.9, 14.6 * 0.9, 22.7 * 0.9, 290]
u_bounds = [25e6, 0.4, 25 * 1.1, 14.6 * 1.1, 22.7 * 1.1, 310]
num_samples = 5  # Change as needed

# --------------------------
# Load PORO and PERMX files
# --------------------------
data_dir = Path('..') / 'data' / 'properties'
# Or use Path('data/properties') if running from repo root
if not data_dir.exists():
    print(f"Error: Directory '{data_dir}' does not exist.")
    sys.exit(1)

poro_files = sorted([f for f in data_dir.iterdir() if "PORO" in f.name])
permx_files = sorted([f for f in data_dir.iterdir() if "PERMX" in f.name])

if not poro_files or not permx_files:
    print("Error: PORO or PERMX files not found.")
    sys.exit(1)

if len(poro_files) != len(permx_files):
    raise ValueError(f"Number of PORO files ({len(poro_files)}) does not match number of PERMX files ({len(permx_files)})")

num_pairs = len(poro_files)

if num_samples > num_pairs:
    raise ValueError(f"Cannot sample {num_samples} unique poro/permx pairs: only {num_pairs} available.")

# --------------------------
# Latin Hypercube Sampling for parameters
# --------------------------
sampler = qmc.LatinHypercube(d=len(params))
sample = sampler.random(n=num_samples)
sample_params = qmc.scale(sample, l_bounds, u_bounds)
df_params = pd.DataFrame(np.round(sample_params, 2), columns=params)

# --------------------------
# Random unique selection of poro/permx pairs
# --------------------------
unique_indices = np.random.choice(num_pairs, size=num_samples, replace=False)
df_params["PORO_file"] = [str(poro_files[i]) for i in unique_indices]
df_params["PERMX_file"] = [str(permx_files[i]) for i in unique_indices]

# --------------------------
# Calculate stress state parameters
# --------------------------
df_params['beta'] = df_params['SH_azi_deg'] - 90  # Rotate from SH to x-axis
df_params['cos_2beta'] = np.cos(np.radians(2 * df_params['beta']))
df_params['sin_2beta'] = np.sin(np.radians(2 * df_params['beta']))

df_params['sigma_x'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 + \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['sigma_y'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 - \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['tau_xy'] = (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['sin_2beta']

# --------------------------
# Output
# --------------------------
df_params.to_csv("sampled_parameters_and_files.csv", index=False)
df_params



Unnamed: 0,E_GPa,PR,SH_MPa/km,Sh_MPa/km,Sv_MPa/km,SH_azi_deg,PORO_file,PERMX_file,beta,cos_2beta,sin_2beta,sigma_x,sigma_y,tau_xy
0,19696975.3,0.32,25.47,15.89,23.58,290.67,../data/properties/JD_BASECASE_26_PORO.dat,../data/properties/JD_BASECASE_26_PERMX.dat,200.67,0.750803,0.660526,24.276347,17.083653,3.163919
1,21482532.67,0.34,27.1,13.7,24.89,295.68,../data/properties/JD_BASECASE_21_PORO.dat,../data/properties/JD_BASECASE_21_PERMX.dat,205.68,0.624425,0.781085,24.583648,16.216352,5.233268
2,15686406.35,0.24,22.93,14.93,22.75,300.7,../data/properties/JD_BASECASE_28_PORO.dat,../data/properties/JD_BASECASE_28_PERMX.dat,210.7,0.478692,0.877983,20.844767,17.015233,3.511932
3,24698140.61,0.37,26.32,14.12,21.14,306.32,../data/properties/JD_BASECASE_22_PORO.dat,../data/properties/JD_BASECASE_22_PERMX.dat,216.32,0.298375,0.954449,22.040085,18.399915,5.822138
4,17375768.62,0.21,23.72,14.64,21.94,303.64,../data/properties/JD_BASECASE_27_PORO.dat,../data/properties/JD_BASECASE_27_PERMX.dat,213.64,0.386228,0.922403,20.933475,17.426525,4.187711
