# This notebook generates CMG dat files for running CMG simulations

# Step 1: set up a base CMG model
Prepare a base CMG dat file and add it to the wrtcmgdat.py

# Step 2: sample uncertain parameters

## 1. CMG requires initializating stress state using a reference block. For the JD_Sula_2005_gmc grid, there are 10 k layers. Reservoir starts at k=6. Use block (50, 1, 6) as reference block for *STRESSGRAD calculation. Its grid top = 670.7188 m and bottom = 671.9521 m.
## 2. PORO/PERMX pairs are NOT sampled more than once (from file names instead of files)

## Monte Carlo sampling

In [28]:
import numpy as np
import pandas as pd
from scipy.stats import qmc
from pathlib import Path
import sys
import re

# Setup for sampling
random_seed = 11
name_prefix = '251023'
# Note: 1) stress gradients are effective ones (required by CMG) after subtracting 10; 
#       2) stress gradients are negative due to CMG DIR DOWN convention
# params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg']
params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg', 'A_m2']
# OMV_values = [20e6, 0.3, x, 14.6, 22.7, 300]
# base_values = [20e6, 0.3, 28, 16.5, 22.7, 310]
# l_bounds = [15e6, 0.2, -18 * 1.1, -6.5 * 1.1, -12.7 * 1.1, 300]
# u_bounds = [25e6, 0.4, -18 * 0.9, -6.5 * 0.9, -12.7 * 0.9, 320]
l_bounds = [15e6, 0.2, -18 * 1.1, -6.5 * 1.1, -12.7 * 1.1, 300, 16985344.51*0.9]
u_bounds = [25e6, 0.4, -18 * 0.9, -6.5 * 0.9, -12.7 * 0.9, 320, 16985344.51*1.1]
num_samples = 90  # Change as needed

# Load PORO and PERMX file names
# property_file_names = np.load('property_file_names.npy')
property_file_names = np.loadtxt("property_file_names.csv",delimiter=",",dtype=str)

# sort the file names by the number in the name
def extract_number(filename):
    match = re.search(r"(\d+)", filename)
    return int(match.group(1)) if match else float('inf')

poro_file_names = sorted(
    [name for name in property_file_names if "PORO" in name.upper()],
    key=extract_number
)

permx_file_names = sorted(
    [name for name in property_file_names if "PERMX" in name.upper()],
    key=extract_number
)

# check a few things
if not poro_file_names or not permx_file_names:
    print("Error: PORO or PERMX file names not found.")
    sys.exit(1)

if len(poro_file_names) != len(permx_file_names):
    raise ValueError(f"Number of PORO file names ({len(poro_file_names)}) does not match number of PERMX file names ({len(permx_file_names)})")

num_pairs = len(poro_file_names)

if num_samples > num_pairs:
    raise ValueError(f"Cannot sample {num_samples} unique poro/permx pairs: only {num_pairs} available.")

# Latin Hypercube Sampling for parameters
sampler = qmc.LatinHypercube(d=len(params), seed=random_seed)
sample = sampler.random(n=num_samples)
sample_params = qmc.scale(sample, l_bounds, u_bounds)
df_params = pd.DataFrame(sample_params, columns=params)

# Store poro/permx pairs
df_params["PORO_file"] = [str(poro_file_names[i]) for i in range(num_samples)]
df_params["PERMX_file"] = [str(permx_file_names[i]) for i in range(num_samples)]

# add prefix to file names
prefix = "data_properties/"
df_params["PORO_file"] = df_params["PORO_file"].apply(lambda x: f"{prefix}{x}")
df_params["PERMX_file"] = df_params["PERMX_file"].apply(lambda x: f"{prefix}{x}")

# Calculate stress state parameters
df_params['beta'] = df_params['SH_azi_deg'] - 90  # Rotate from SH to x-axis
df_params['cos_2beta'] = np.cos(np.radians(2 * df_params['beta']))
df_params['sin_2beta'] = np.sin(np.radians(2 * df_params['beta']))
# calculate the stress gradients in kPa/km
df_params['sigma_x_grad'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 + \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['sigma_y_grad'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 - \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
# tau_xy_grad should be positive after checking the directions of maximum stress in the CMG Results
df_params['tau_xy_grad'] = -(df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['sin_2beta']
# calculate the stress state for the reference block in kPa
# for the JD_Sula_2005_gmc grid, the reference block is (50, 1, 6) 
grid_top = 670.7188; grid_bottom = 671.9521; grid_ave = (grid_top + grid_bottom)/2
df_params['sigma_x_ref'] = df_params['sigma_x_grad'] * grid_ave *(-1)
df_params['sigma_y_ref'] = df_params['sigma_y_grad'] * grid_ave *(-1)
df_params['sigma_z_ref'] = df_params['Sv_MPa/km'] * grid_ave *(-1)
df_params['tau_xy_ref'] = df_params['tau_xy_grad'] * grid_ave *(-1)

# Output
df_params.to_csv(f"{name_prefix}_sampled_params.csv", index=False,float_format='%.2f')
df_params.round(2)


Unnamed: 0,E_GPa,PR,SH_MPa/km,Sh_MPa/km,Sv_MPa/km,SH_azi_deg,A_m2,PORO_file,PERMX_file,beta,cos_2beta,sin_2beta,sigma_x_grad,sigma_y_grad,tau_xy_grad,sigma_x_ref,sigma_y_ref,sigma_z_ref,tau_xy_ref
0,22541269.98,0.38,-19.78,-5.85,-11.46,317.13,15472878.07,data_properties/JD_BASECASE_5_PORO.dat,data_properties/JD_BASECASE_5_PERMX.dat,227.13,-0.07,1.00,-12.30,-13.33,6.95,8257.72,8951.61,7695.11,-4664.19
1,21207802.89,0.31,-17.02,-6.23,-11.98,311.41,18031818.80,data_properties/JD_BASECASE_6_PORO.dat,data_properties/JD_BASECASE_6_PERMX.dat,221.41,0.13,0.99,-12.30,-10.95,5.35,8259.28,7353.14,8043.04,-3594.75
2,24206892.44,0.33,-16.67,-6.72,-12.19,307.66,17250281.40,data_properties/JD_BASECASE_7_PORO.dat,data_properties/JD_BASECASE_7_PERMX.dat,217.66,0.25,0.97,-12.96,-10.43,4.81,8697.82,7005.31,8181.45,-3228.39
3,20421721.17,0.23,-19.22,-6.85,-13.34,307.06,15407511.59,data_properties/JD_BASECASE_8_PORO.dat,data_properties/JD_BASECASE_8_PERMX.dat,217.06,0.27,0.96,-14.73,-11.34,5.95,9887.32,7615.15,8953.99,-3992.92
4,24459185.16,0.32,-18.02,-6.52,-13.07,303.58,18592160.04,data_properties/JD_BASECASE_9_PORO.dat,data_properties/JD_BASECASE_9_PERMX.dat,213.58,0.39,0.92,-14.50,-10.04,5.30,9734.88,6737.76,8773.84,-3557.52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,16183813.30,0.34,-19.13,-5.93,-11.53,300.68,18000124.95,data_properties/JD_BASECASE_194_PORO.dat,data_properties/JD_BASECASE_194_PERMX.dat,210.68,0.48,0.88,-15.69,-9.36,5.79,10533.85,6286.67,7741.70,-3888.02
86,18435631.15,0.33,-17.61,-7.05,-11.77,315.29,17052403.98,data_properties/JD_BASECASE_197_PORO.dat,data_properties/JD_BASECASE_197_PERMX.dat,225.29,-0.01,1.00,-12.28,-12.38,5.28,8242.11,8313.06,7902.16,-3543.66
87,17724494.55,0.37,-18.17,-6.56,-12.90,305.05,18413215.01,data_properties/JD_BASECASE_198_PORO.dat,data_properties/JD_BASECASE_198_PERMX.dat,215.05,0.34,0.94,-14.34,-10.39,5.46,9629.31,6976.01,8660.38,-3665.89
88,19613218.28,0.38,-19.19,-6.40,-11.43,319.54,17874780.55,data_properties/JD_BASECASE_199_PORO.dat,data_properties/JD_BASECASE_199_PERMX.dat,229.54,-0.16,0.99,-11.79,-13.80,6.31,7912.47,9267.33,7674.04,-4238.33


## Add importance sampling

In [29]:
import numpy as np
import pandas as pd
from scipy.stats import qmc
from pathlib import Path
import sys
import re

# Setup for sampling
random_seed = 11
name_prefix = '251023'
# Note: 1) stress gradients are effective ones (required by CMG) after subtracting 10; 
#       2) stress gradients are negative due to CMG DIR DOWN convention
params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg']
# params = ['E_GPa', 'PR', 'SH_MPa/km', 'Sh_MPa/km', 'Sv_MPa/km', 'SH_azi_deg', 'A_m2']
# OMV_values = [20e6, 0.3, x, 14.6, 22.7, 300]
# base_values = [20e6, 0.3, 28, 16.5, 22.7, 310]
l_bounds = [15e6, 0.2, -18 * 1.1, -6.5 * 1.1, -12.7 * 1.1, 300]
u_bounds = [25e6, 0.4, -18 * 0.9, -6.5 * 0.9, -12.7 * 0.9, 320]
# l_bounds = [15e6, 0.2, -18 * 1.1, -6.5 * 1.1, -12.7 * 1.1, 300, 16985344.51*0.9]
# u_bounds = [25e6, 0.4, -18 * 0.9, -6.5 * 0.9, -12.7 * 0.9, 320, 16985344.51*1.1]
num_samples = 90  # Change as needed

# Load PORO and PERMX file names
# property_file_names = np.load('property_file_names.npy')
property_file_names = np.loadtxt("property_file_names.csv",delimiter=",",dtype=str)

# sort the file names by the number in the name
def extract_number(filename):
    match = re.search(r"(\d+)", filename)
    return int(match.group(1)) if match else float('inf')

poro_file_names = sorted(
    [name for name in property_file_names if "PORO" in name.upper()],
    key=extract_number
)

permx_file_names = sorted(
    [name for name in property_file_names if "PERMX" in name.upper()],
    key=extract_number
)

# check a few things
if not poro_file_names or not permx_file_names:
    print("Error: PORO or PERMX file names not found.")
    sys.exit(1)

if len(poro_file_names) != len(permx_file_names):
    raise ValueError(f"Number of PORO file names ({len(poro_file_names)}) does not match number of PERMX file names ({len(permx_file_names)})")

num_pairs = len(poro_file_names)

if num_samples > num_pairs:
    raise ValueError(f"Cannot sample {num_samples} unique poro/permx pairs: only {num_pairs} available.")

# Latin Hypercube Sampling for parameters
sampler = qmc.LatinHypercube(d=len(params), seed=random_seed)
sample = sampler.random(n=num_samples)
sample_params = qmc.scale(sample, l_bounds, u_bounds)
df_params = pd.DataFrame(sample_params, columns=params)

# Store poro/permx pairs
df_params["PORO_file"] = [str(poro_file_names[i]) for i in range(num_samples)]
df_params["PERMX_file"] = [str(permx_file_names[i]) for i in range(num_samples)]

# add prefix to file names
prefix = "data_properties/"
df_params["PORO_file"] = df_params["PORO_file"].apply(lambda x: f"{prefix}{x}")
df_params["PERMX_file"] = df_params["PERMX_file"].apply(lambda x: f"{prefix}{x}")

########################################## add importance sampling ###############
importance_samples = pd.read_csv(f'{name_prefix}_importance_sampling.csv')
df_params['SH_azi_deg'] = importance_samples['SH_azi_deg'].values
df_params['SH_MPa/km'] = importance_samples['SH_MPa/km'].values

# Calculate stress state parameters
df_params['beta'] = df_params['SH_azi_deg'] - 90  # Rotate from SH to x-axis
df_params['cos_2beta'] = np.cos(np.radians(2 * df_params['beta']))
df_params['sin_2beta'] = np.sin(np.radians(2 * df_params['beta']))
# calculate the stress gradients in kPa/km
df_params['sigma_x_grad'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 + \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
df_params['sigma_y_grad'] = (df_params['SH_MPa/km'] + df_params['Sh_MPa/km']) / 2 - \
                       (df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['cos_2beta']
# tau_xy_grad should be positive after checking the directions of maximum stress in the CMG Results
df_params['tau_xy_grad'] = -(df_params['SH_MPa/km'] - df_params['Sh_MPa/km']) / 2 * df_params['sin_2beta']
# calculate the stress state for the reference block in kPa
# for the JD_Sula_2005_gmc grid, the reference block is (50, 1, 6) 
grid_top = 670.7188; grid_bottom = 671.9521; grid_ave = (grid_top + grid_bottom)/2
df_params['sigma_x_ref'] = df_params['sigma_x_grad'] * grid_ave *(-1)
df_params['sigma_y_ref'] = df_params['sigma_y_grad'] * grid_ave *(-1)
df_params['sigma_z_ref'] = df_params['Sv_MPa/km'] * grid_ave *(-1)
df_params['tau_xy_ref'] = df_params['tau_xy_grad'] * grid_ave *(-1)

# Output
df_params.to_csv(f"{name_prefix}_sampled_params.csv", index=False,float_format='%.2f')
df_params.round(2)


Unnamed: 0,E_GPa,PR,SH_MPa/km,Sh_MPa/km,Sv_MPa/km,SH_azi_deg,PORO_file,PERMX_file,beta,cos_2beta,sin_2beta,sigma_x_grad,sigma_y_grad,tau_xy_grad,sigma_x_ref,sigma_y_ref,sigma_z_ref,tau_xy_ref
0,17430158.87,0.32,-19.50,-6.66,-12.37,319.58,data_properties/JD_BASECASE_5_PORO.dat,data_properties/JD_BASECASE_5_PERMX.dat,229.58,-0.16,0.99,-12.06,-14.10,6.34,8095.42,9466.45,8301.40,-4255.45
1,18547731.05,0.23,-18.24,-6.08,-12.80,319.32,data_properties/JD_BASECASE_6_PORO.dat,data_properties/JD_BASECASE_6_PERMX.dat,229.32,-0.15,0.99,-11.24,-13.07,6.01,7548.63,8774.85,8589.79,-4036.65
2,21148573.01,0.27,-19.59,-6.54,-12.92,319.22,data_properties/JD_BASECASE_7_PORO.dat,data_properties/JD_BASECASE_7_PERMX.dat,229.22,-0.15,0.99,-12.11,-14.02,6.45,8127.92,9412.98,8671.29,-4332.35
3,15242584.84,0.31,-17.70,-6.13,-11.98,319.84,data_properties/JD_BASECASE_8_PORO.dat,data_properties/JD_BASECASE_8_PERMX.dat,229.84,-0.17,0.99,-10.94,-12.89,5.71,7345.30,8653.36,8043.84,-3830.35
4,18738525.02,0.32,-19.64,-6.38,-13.77,319.30,data_properties/JD_BASECASE_9_PORO.dat,data_properties/JD_BASECASE_9_PERMX.dat,229.30,-0.15,0.99,-12.02,-14.00,6.55,8068.97,9400.71,9243.42,-4400.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,23335499.95,0.31,-19.74,-6.26,-12.77,319.36,data_properties/JD_BASECASE_194_PORO.dat,data_properties/JD_BASECASE_194_PERMX.dat,229.36,-0.15,0.99,-11.98,-14.02,6.66,8042.41,9414.46,8575.10,-4472.15
86,24592116.34,0.30,-19.00,-5.92,-12.21,319.25,data_properties/JD_BASECASE_197_PORO.dat,data_properties/JD_BASECASE_197_PERMX.dat,229.25,-0.15,0.99,-11.49,-13.43,6.47,7716.92,9013.89,8197.62,-4342.21
87,16276499.15,0.30,-19.77,-5.99,-13.53,319.05,data_properties/JD_BASECASE_198_PORO.dat,data_properties/JD_BASECASE_198_PERMX.dat,229.05,-0.14,0.99,-11.91,-13.85,6.82,7995.91,9300.13,9083.68,-4578.54
88,20519907.95,0.39,-19.19,-6.28,-12.94,319.83,data_properties/JD_BASECASE_199_PORO.dat,data_properties/JD_BASECASE_199_PERMX.dat,229.83,-0.17,0.99,-11.65,-13.82,6.36,7820.68,9275.19,8689.28,-4271.03


In [24]:
importance_samples = pd.read_csv(f'{name_prefix}_importance_sampling.csv')
importance_samples

Unnamed: 0,SH_azi_deg,q_SH_azi,SH_MPa/km,q_SH,weights
0,313.4371,0.08,-17.0925,0.2857,0.0068
1,317.0930,0.08,-16.8042,0.2500,0.0077
2,319.7397,0.08,-17.2510,0.2857,0.0068
3,316.2522,0.08,-16.6455,0.2500,0.0077
4,312.6930,0.08,-16.6137,0.2500,0.0077
...,...,...,...,...,...
85,310.5295,0.08,-19.6167,0.2857,0.0068
86,300.9141,0.02,-17.3129,0.2857,0.0271
87,303.0098,0.02,-19.7089,0.2857,0.0271
88,318.3166,0.08,-17.8907,0.2857,0.0068


In [24]:
np.savetxt("property_file_names.csv",property_file_names,delimiter=",",fmt="%s")
property_file_names_txt = np.loadtxt("property_file_names.csv",delimiter=",",dtype=str)
property_file_names_txt.shape

(180,)

# Step3: generate CMG dat files based on the sampled parameters

In [30]:
import pandas as pd
from generate_dat_files import generate_dat_files

name_prefix = '251023'
df_params = pd.read_csv(f"{name_prefix}_sampled_params_seed11.csv")
generate_dat_files(
    df_parameters = df_params,
    template_file_path = "dat_file_templates/250913.dat",
    save_folder_path = f"{name_prefix}_dat_files"
)

Generated 90 dat files successfully.


# Step 4: run CMG simulations

## Option 1: on TS machines

In [7]:
# Record the start time
start_time = time.time()

from utils.pyCMG_Control import pycmgcontrol

for nn in range(df_input.shape[0]):
    pycmg_ctrl = pycmgcontrol(exp_name=f'case{nn+1}.dat', simfolder=os.path.join(folder_path, 'datfiles'))
    # Available optoins: 'ese-win32-v2022.30', 'ese-ts1win-v2023.20', 'stf-sherlock-v2020.10', 'ese-ts2win-v2024.20'
    pycmg_ctrl.cmg_version = 'ese-ts2win-v2024.20'
    pycmg_ctrl.run_gem_simulation(case_name_suffix=f'case{nn+1}.dat')
    
# Record the end time
end_time = time.time()

# Calculate the running time
elapsed_time = (end_time - start_time)/60
print(f"Elapsed time: {elapsed_time:.2f} minutes")

Elapsed time: 52.20 minutes


## Option 2: on Sherlock using the generated dat files