# This notebook generates CMG dat files for running CMG simulations

# Step 1: set up a base CMG model
Prepare a base CMG dat file and add it to the wrtcmgdat.py

# Step 2: sample uncertain parameters

## 1. CMG requires initializating stress state using a reference block. For the JD_Sula_2005_gmc grid, there are 10 k layers. Reservoir starts at k=6. Use block (50, 1, 6) as reference block for *STRESSGRAD calculation. Its grid top = 670.7188 m and bottom = 671.9521 m.
## 2. PORO/PERMX pairs are NOT sampled more than once (from file names instead of files)

## Monte Carlo sampling

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import qmc
from pathlib import Path
import sys
import re

# Setup for sampling
random_seed = 11
name_prefix = '251023'
property_file_names_csv = 'property_file_names_seed2&3.csv'
# Note: 1) stress gradients are effective ones (required by CMG) after subtracting 10; 
#       2) stress gradients are negative due to CMG DIR DOWN convention
# params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg']
params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg', 'A_m2']
# OMV_values = [20e6, 0.3, x, 14.6, 22.7, 300]
# base_values = [20e6, 0.3, 28, 16.5, 22.7, 310]
# l_bounds = [15e6, 0.2, -18 * 1.1, -6.5 * 1.1, -12.7 * 1.1, 300]
# u_bounds = [25e6, 0.4, -18 * 0.9, -6.5 * 0.9, -12.7 * 0.9, 320]
l_bounds = [15e6, 0.2, -18 * 1.1, -6.5 * 1.1, -12.7 * 1.1, 300, 16985344.51*0.9]
u_bounds = [25e6, 0.4, -18 * 0.9, -6.5 * 0.9, -12.7 * 0.9, 320, 16985344.51*1.1]
num_samples = 90  # Change as needed

# Load PORO and PERMX file names
# property_file_names = np.load('property_file_names.npy')
property_file_names = np.loadtxt(property_file_names_csv,delimiter=",",dtype=str)

# sort the file names by the number in the name
def extract_number(filename):
    match = re.search(r"(\d+)", filename)
    return int(match.group(1)) if match else float('inf')

poro_file_names = sorted(
    [name for name in property_file_names if "PORO" in name.upper()],
    key=extract_number
)

permx_file_names = sorted(
    [name for name in property_file_names if "PERMX" in name.upper()],
    key=extract_number
)

# check a few things
if not poro_file_names or not permx_file_names:
    print("Error: PORO or PERMX file names not found.")
    sys.exit(1)

if len(poro_file_names) != len(permx_file_names):
    raise ValueError(f"Number of PORO file names ({len(poro_file_names)}) does not match number of PERMX file names ({len(permx_file_names)})")

num_pairs = len(poro_file_names)

if num_samples > num_pairs:
    raise ValueError(f"Cannot sample {num_samples} unique poro/permx pairs: only {num_pairs} available.")

# Latin Hypercube Sampling for parameters
sampler = qmc.LatinHypercube(d=len(params), seed=random_seed)
sample = sampler.random(n=num_samples)
sample_params = qmc.scale(sample, l_bounds, u_bounds)
df_params = pd.DataFrame(sample_params, columns=params)

# Store poro/permx pairs
df_params["PORO_file"] = [str(poro_file_names[i]) for i in range(num_samples)]
df_params["PERMX_file"] = [str(permx_file_names[i]) for i in range(num_samples)]

# add prefix to file names
prefix = "data_properties/"
df_params["PORO_file"] = df_params["PORO_file"].apply(lambda x: f"{prefix}{x}")
df_params["PERMX_file"] = df_params["PERMX_file"].apply(lambda x: f"{prefix}{x}")

# Calculate stress state parameters
df_params['beta'] = df_params['SH_azi_deg'] - 90  # Rotate from SH to x-axis
df_params['cos_2beta'] = np.cos(np.radians(2 * df_params['beta']))
df_params['sin_2beta'] = np.sin(np.radians(2 * df_params['beta']))
# calculate the stress gradients in kPa/km
df_params['sigma_x_grad'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 + \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['sigma_y_grad'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 - \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
# tau_xy_grad should be positive after checking the directions of maximum stress in the CMG Results
df_params['tau_xy_grad'] = -(df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['sin_2beta']
# calculate the stress state for the reference block in kPa
# for the JD_Sula_2005_gmc grid, the reference block is (50, 1, 6) 
grid_top = 670.7188; grid_bottom = 671.9521; grid_ave = (grid_top + grid_bottom)/2
df_params['sigma_x_ref'] = df_params['sigma_x_grad'] * grid_ave *(-1)
df_params['sigma_y_ref'] = df_params['sigma_y_grad'] * grid_ave *(-1)
df_params['sigma_z_ref'] = df_params['Sv_MPa/km'] * grid_ave *(-1)
df_params['tau_xy_ref'] = df_params['tau_xy_grad'] * grid_ave *(-1)

# Output
df_params.to_csv(f"{name_prefix}_sampled_params.csv", index=False,float_format='%.2f')
df_params.round(2)


Unnamed: 0,E_GPa,PR,SH_MPa/km,Sh_MPa/km,Sv_MPa/km,SH_azi_deg,A_m2,PORO_file,PERMX_file,beta,cos_2beta,sin_2beta,sigma_x_grad,sigma_y_grad,tau_xy_grad,sigma_x_ref,sigma_y_ref,sigma_z_ref,tau_xy_ref
0,22541269.98,0.38,-19.78,-5.85,-11.46,317.13,15472878.07,data_properties/JD_BASECASE_5_PORO.dat,data_properties/JD_BASECASE_5_PERMX.dat,227.13,-0.07,1.00,-12.30,-13.33,6.95,8257.72,8951.61,7695.11,-4664.19
1,21207802.89,0.31,-17.02,-6.23,-11.98,311.41,18031818.80,data_properties/JD_BASECASE_6_PORO.dat,data_properties/JD_BASECASE_6_PERMX.dat,221.41,0.13,0.99,-12.30,-10.95,5.35,8259.28,7353.14,8043.04,-3594.75
2,24206892.44,0.33,-16.67,-6.72,-12.19,307.66,17250281.40,data_properties/JD_BASECASE_7_PORO.dat,data_properties/JD_BASECASE_7_PERMX.dat,217.66,0.25,0.97,-12.96,-10.43,4.81,8697.82,7005.31,8181.45,-3228.39
3,20421721.17,0.23,-19.22,-6.85,-13.34,307.06,15407511.59,data_properties/JD_BASECASE_8_PORO.dat,data_properties/JD_BASECASE_8_PERMX.dat,217.06,0.27,0.96,-14.73,-11.34,5.95,9887.32,7615.15,8953.99,-3992.92
4,24459185.16,0.32,-18.02,-6.52,-13.07,303.58,18592160.04,data_properties/JD_BASECASE_9_PORO.dat,data_properties/JD_BASECASE_9_PERMX.dat,213.58,0.39,0.92,-14.50,-10.04,5.30,9734.88,6737.76,8773.84,-3557.52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,16183813.30,0.34,-19.13,-5.93,-11.53,300.68,18000124.95,data_properties/JD_BASECASE_194_PORO.dat,data_properties/JD_BASECASE_194_PERMX.dat,210.68,0.48,0.88,-15.69,-9.36,5.79,10533.85,6286.67,7741.70,-3888.02
86,18435631.15,0.33,-17.61,-7.05,-11.77,315.29,17052403.98,data_properties/JD_BASECASE_197_PORO.dat,data_properties/JD_BASECASE_197_PERMX.dat,225.29,-0.01,1.00,-12.28,-12.38,5.28,8242.11,8313.06,7902.16,-3543.66
87,17724494.55,0.37,-18.17,-6.56,-12.90,305.05,18413215.01,data_properties/JD_BASECASE_198_PORO.dat,data_properties/JD_BASECASE_198_PERMX.dat,215.05,0.34,0.94,-14.34,-10.39,5.46,9629.31,6976.01,8660.38,-3665.89
88,19613218.28,0.38,-19.19,-6.40,-11.43,319.54,17874780.55,data_properties/JD_BASECASE_199_PORO.dat,data_properties/JD_BASECASE_199_PERMX.dat,229.54,-0.16,0.99,-11.79,-13.80,6.31,7912.47,9267.33,7674.04,-4238.33


## Monte Carlo sampling + importance sampling

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import qmc
from pathlib import Path
import sys
import re

from importance_sampling import IS_SH_azi_SH_uniform

# Setup for sampling
random_seed = 13
name_prefix = '251104'
property_file_names_path = 'data/property_file_names_seed4&5.csv'
# Note: 1) stress gradients are effective ones (required by CMG) after subtracting 10; 
#       2) stress gradients are negative due to CMG DIR DOWN convention
# params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg']
params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg', 'A_m2']
# OMV_values = [20e6, 0.3, x, 14.6, 22.7, 300]
# base_values = [20e6, 0.3, 28, 16.5, 22.7, 310]
# l_bounds = [15e6, 0.2, -18 * 1.1, -6.5 * 1.1, -12.7 * 1.1, 300]
# u_bounds = [25e6, 0.4, -18 * 0.9, -6.5 * 0.9, -12.7 * 0.9, 320]
l_bounds = [15e6, 0.2, -18 * 1.1, -6.5 * 1.1, -12.7 * 1.1, 300, 16985344.51*0.9]
u_bounds = [25e6, 0.4, -18 * 0.9, -6.5 * 0.9, -12.7 * 0.9, 320, 16985344.51*1.1]
n_samples = 86 

# Load PORO and PERMX file names
# property_file_names = np.load('property_file_names.npy')
property_file_names = np.loadtxt(property_file_names_path,delimiter=",",dtype=str)

# sort the file names by the number in the name
def extract_number(filename):
    match = re.search(r"(\d+)", filename)
    return int(match.group(1)) if match else float('inf')

poro_file_names = sorted(
    [name for name in property_file_names if "PORO" in name.upper()],
    key=extract_number
)

permx_file_names = sorted(
    [name for name in property_file_names if "PERMX" in name.upper()],
    key=extract_number
)

# check a few things
if not poro_file_names or not permx_file_names:
    print("Error: PORO or PERMX file names not found.")
    sys.exit(1)

if len(poro_file_names) != len(permx_file_names):
    raise ValueError(f"Number of PORO file names ({len(poro_file_names)}) does not match number of PERMX file names ({len(permx_file_names)})")

num_pairs = len(poro_file_names)

if n_samples > num_pairs:
    raise ValueError(f"Cannot sample {n_samples} unique poro/permx pairs: only {num_pairs} available.")

# Latin Hypercube Sampling for parameters
sampler = qmc.LatinHypercube(d=len(params), seed=random_seed)
sample = sampler.random(n=n_samples)
sample_params = qmc.scale(sample, l_bounds, u_bounds)
df_params = pd.DataFrame(sample_params, columns=params)

# Store poro/permx pairs
df_params["PORO_file"] = [str(poro_file_names[i]) for i in range(n_samples)]
df_params["PERMX_file"] = [str(permx_file_names[i]) for i in range(n_samples)]

# add prefix to file names
prefix = "data_properties/"
df_params["PORO_file"] = df_params["PORO_file"].apply(lambda x: f"{prefix}{x}")
df_params["PERMX_file"] = df_params["PERMX_file"].apply(lambda x: f"{prefix}{x}")

########################################## add importance sampling ###############
# perform importanc sampling and save samples to a csv file
IS_SH_azi_SH_uniform(
    random_seed = random_seed,
    name_prefix = name_prefix,
    n_samples = n_samples,
    alpha = 0.9,
    beta = 0.9,
    proposal_SH_azi_low = 319,
    proposal_SH_low = 18*1.05,
    show_summary = True
    )
# load importance samples from the csv file
importance_samples = pd.read_csv(f'{name_prefix}_importance_sampling.csv')
df_params['SH_azi_deg'] = importance_samples['SH_azi_deg'].values
df_params['SH_MPa/km'] = importance_samples['SH_MPa/km'].values

# Calculate stress state parameters
df_params['beta'] = df_params['SH_azi_deg'] - 90  # Rotate from SH to x-axis
df_params['cos_2beta'] = np.cos(np.radians(2 * df_params['beta']))
df_params['sin_2beta'] = np.sin(np.radians(2 * df_params['beta']))
# calculate the stress gradients in kPa/km
df_params['sigma_x_grad'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 + \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['sigma_y_grad'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 - \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
# tau_xy_grad should be positive after checking the directions of maximum stress in the CMG Results
df_params['tau_xy_grad'] = -(df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['sin_2beta']
# calculate the stress state for the reference block in kPa
# for the JD_Sula_2005_gmc grid, the reference block is (50, 1, 6) 
grid_top = 670.7188; grid_bottom = 671.9521; grid_ave = (grid_top + grid_bottom)/2
df_params['sigma_x_ref'] = df_params['sigma_x_grad'] * grid_ave *(-1)
df_params['sigma_y_ref'] = df_params['sigma_y_grad'] * grid_ave *(-1)
df_params['sigma_z_ref'] = df_params['Sv_MPa/km'] * grid_ave *(-1)
df_params['tau_xy_ref'] = df_params['tau_xy_grad'] * grid_ave *(-1)

# Output
df_params.to_csv(f"{name_prefix}_sampled_params.csv", index=False,float_format='%.2f')
df_params.round(2)


SH_azi
Target distribution: U[300, 320], 86 samples
Proposal distribution: 0.1 * U[300, 319] + 0.9 * U[319])
Importance samples min: 301.68, max: 319.96, number of alpha samples: 77
SH
Target distribution: U[16.2, 19.8], 86 samples
Proposal distribution: 0.1 * U[16.2, 18.90] + 0.9 * U[18.90, 19.8]
Importance samples min: 16.32, max: 19.79, number of beta samples: 77


Unnamed: 0,E_GPa,PR,SH_MPa/km,Sh_MPa/km,Sv_MPa/km,SH_azi_deg,A_m2,PORO_file,PERMX_file,beta,cos_2beta,sin_2beta,sigma_x_grad,sigma_y_grad,tau_xy_grad,sigma_x_ref,sigma_y_ref,sigma_z_ref,tau_xy_ref
0,15364558.42,0.40,-19.64,-6.46,-12.05,319.19,15776075.21,data_properties/JD_BASECASE_405_PORO.dat,data_properties/JD_BASECASE_405_PERMX.dat,229.19,-0.15,0.99,-12.09,-14.01,6.52,8114.98,9405.94,8091.28,-4377.23
1,20929926.66,0.20,-16.35,-6.56,-13.34,319.08,16928531.30,data_properties/JD_BASECASE_408_PORO.dat,data_properties/JD_BASECASE_408_PERMX.dat,229.08,-0.14,0.99,-10.76,-12.15,4.84,7226.34,8159.52,8958.48,-3252.46
2,17230499.56,0.32,-19.33,-5.85,-11.84,319.27,15395505.24,data_properties/JD_BASECASE_409_PORO.dat,data_properties/JD_BASECASE_409_PERMX.dat,229.27,-0.15,0.99,-11.59,-13.59,6.67,7781.99,9125.85,7947.27,-4475.25
3,16188757.42,0.24,-19.13,-6.12,-13.02,308.31,15342151.56,data_properties/JD_BASECASE_410_PORO.dat,data_properties/JD_BASECASE_410_PERMX.dat,218.31,0.23,0.97,-14.13,-11.12,6.33,9484.88,7462.60,8743.92,-4247.74
4,24538686.60,0.32,-19.66,-6.96,-13.31,306.46,17161213.86,data_properties/JD_BASECASE_411_PORO.dat,data_properties/JD_BASECASE_411_PERMX.dat,216.46,0.29,0.96,-15.17,-11.45,6.07,10187.13,7684.83,8936.84,-4074.51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,23503676.74,0.38,-18.92,-6.44,-11.54,319.10,16537221.82,data_properties/JD_BASECASE_595_PORO.dat,data_properties/JD_BASECASE_595_PERMX.dat,229.10,-0.14,0.99,-11.79,-13.57,6.17,7915.64,9110.20,7745.16,-4145.47
82,19857299.60,0.29,-18.95,-7.05,-12.42,319.91,16130034.34,data_properties/JD_BASECASE_596_PORO.dat,data_properties/JD_BASECASE_596_PERMX.dat,229.91,-0.17,0.99,-11.99,-14.02,5.86,8048.60,9411.94,8339.73,-3935.55
83,19331005.76,0.36,-17.73,-5.92,-12.28,319.58,17211436.78,data_properties/JD_BASECASE_599_PORO.dat,data_properties/JD_BASECASE_599_PERMX.dat,229.58,-0.16,0.99,-10.89,-12.76,5.83,7307.68,8568.64,8242.86,-3911.72
84,17469723.38,0.25,-19.65,-6.37,-12.76,319.90,18652938.23,data_properties/JD_BASECASE_601_PORO.dat,data_properties/JD_BASECASE_601_PERMX.dat,229.90,-0.17,0.99,-11.88,-14.14,6.55,7976.06,9494.18,8566.49,-4394.23


In [24]:
importance_samples = pd.read_csv(f'{name_prefix}_importance_sampling.csv')
importance_samples

Unnamed: 0,SH_azi_deg,q_SH_azi,SH_MPa/km,q_SH,weights
0,313.4371,0.08,-17.0925,0.2857,0.0068
1,317.0930,0.08,-16.8042,0.2500,0.0077
2,319.7397,0.08,-17.2510,0.2857,0.0068
3,316.2522,0.08,-16.6455,0.2500,0.0077
4,312.6930,0.08,-16.6137,0.2500,0.0077
...,...,...,...,...,...
85,310.5295,0.08,-19.6167,0.2857,0.0068
86,300.9141,0.02,-17.3129,0.2857,0.0271
87,303.0098,0.02,-19.7089,0.2857,0.0271
88,318.3166,0.08,-17.8907,0.2857,0.0068


In [24]:
np.savetxt("property_file_names.csv",property_file_names,delimiter=",",fmt="%s")
property_file_names_txt = np.loadtxt("property_file_names.csv",delimiter=",",dtype=str)
property_file_names_txt.shape

(180,)

# Step3: generate CMG dat files based on the sampled parameters

In [3]:
import pandas as pd
from generate_dat_files import generate_dat_files

name_prefix = '251104'
df_params = pd.read_csv(f"{name_prefix}_sampled_params_seed{random_seed}.csv")
generate_dat_files(
    df_parameters = df_params,
    template_file_path = "dat_file_templates/250913.dat",
    save_folder_path = f"{name_prefix}_dat_files"
)

Generated 86 dat files successfully.
