# Extract finit files from simulation cases

In [1]:
from pathlib import Path
import shutil
import re
from tqdm import tqdm

data_folder_path = Path('../data/sim_cases')
results_folder_path = Path('../results/finit_files')

# Create destination directory if it doesn't exist
results_folder_path.mkdir(parents=True, exist_ok=True)

finit_file_names = []

# Iterate through each simulation case folder
for case_path in tqdm(data_folder_path.iterdir(), desc='Extracting finit files'):
    if case_path.is_dir():
        for src_file in case_path.iterdir():
            if src_file.is_file() and src_file.suffix.upper() == '.FINIT':
                dst_file = results_folder_path / src_file.name
                shutil.copy2(src_file, dst_file)
                finit_file_names.append(src_file.name)

# Function to extract the trailing number
def extract_number(filename):
    match = re.search(r'(\d+)(?=\.FINIT$)', filename.upper())
    return int(match.group(1)) if match else float('inf')

# Sort numerically by extracted number
finit_file_names = sorted(finit_file_names, key=extract_number)

# Print 
print(f"Total FINIT files copied: {len(finit_file_names)}")
print("finite_file_names = [")
for fname in finit_file_names:
    print(f"    '{fname}',")
print("]")


Extracting finit files: 4it [00:01,  3.63it/s]

Total FINIT files copied: 3
finite_file_names = [
    'JD_BASECASE_10.FINIT',
    'JD_BASECASE_25.FINIT',
    'JD_BASECASE_72.FINIT',
]





# Extract properties (porosity and perm)

In [2]:
import sys
from pathlib import Path
repo_root = Path.cwd().parent
sys.path.append(str(repo_root / "src"))

from extract_properties_from_finit import extract_properties_from_finit
from generate_full_properties import generate_full_properties
from CMG_format_compress import CMG_format_compress

finit_dir = Path('../results/finit_files')
save_dir = Path('../results/properties')
save_dir.mkdir(exist_ok=True)

################# extract finit file names from its folder ########## 
# Collect all file names (just the name, not full path)
finit_file_names = [f.name for f in finit_dir.iterdir() if f.is_file()]

# Function to extract number inside filename
def extract_number(filename):
    match = re.search(r'(\d+)(?=\.FINIT$)', filename.upper())
    return int(match.group(1)) if match else float('inf')

# Sort numerically
finit_file_names = sorted(finit_file_names, key=extract_number)

################## extract propertiesr ############################## 
for finit_file_name in finit_file_names:
    finit_file_path = finit_dir / finit_file_name
    # some finit files contains PORO cells < active cells, so use try-except-continue to avoid interuption
    try:
        # STEP 1: Extract properties from FINIT file for active cells only
        extracted_property_dict = extract_properties_from_finit(
            finit_file_path = finit_file_path,
            keywords = ['PORO', 'PERMX', 'ACTID'],
            is_save = False,
            save_dir = save_dir,
            save_name = finit_file_name.split('.')[0],
            show_summary = False
        )

        # STEP 2: Generate full properties for all cells (filled inactive cells with zeros)
        full_property_dict = generate_full_properties(
            property_dict = extracted_property_dict,
            property_list = ['PORO', 'PERMX'], 
            total_cells = 989001,
            is_save = False,
            save_dir = save_dir,
            save_name = finit_file_name.split('.')[0],
            # save_name = finit_file_name.split('.')[0]+'_jreversed',
            show_summary = False,
            reverse_j = True,
            grid_shape = (107, 117, 79),
            )


        # STEP 3: Compress full properties to CMG format (repeated values as N*value)
        for key in ['PORO', 'PERMX']:
            CMG_format_compress(
                array = full_property_dict[key], 
                keyword = key, 
                max_line_length = 80,
                show_summary = False,
                save_dir = save_dir,
                save_name = finit_file_name.split('.')[0]
                # save_name = finit_file_name.split('.')[0]+'_jreversed'
            )
    except ValueError as e:
        print(f"Warning: Skipping '{finit_file_name}'. Reason: {e}")
        continue


Saved compressed PORO data to: ../results/properties/JD_BASECASE_10_PORO.dat
Saved compressed PERMX data to: ../results/properties/JD_BASECASE_10_PERMX.dat
Saved compressed PORO data to: ../results/properties/JD_BASECASE_25_PORO.dat
Saved compressed PERMX data to: ../results/properties/JD_BASECASE_25_PERMX.dat
Saved compressed PORO data to: ../results/properties/JD_BASECASE_72_PORO.dat
Saved compressed PERMX data to: ../results/properties/JD_BASECASE_72_PERMX.dat


# Collect property file names

In [None]:
import os
import re
from pathlib import Path
import numpy as np

"""
Extract file names from the results/properties folder.
Returns a list of file names (excluding hidden files like .DS_Store),
sorted numerically by the number inside the file name.
"""

# Define the path to the properties folder
properties_folder_path = Path('../results/properties')
save_path = Path('../results/property_file_names')

# Check if the directory exists
if not os.path.exists(properties_folder_path):
    print(f"Error: Directory '{properties_folder_path}' does not exist.")

# Use pathlib for better cross-platform compatibility
properties_dir = Path(properties_folder_path)

# Extract number from filename helper
def extract_number(filename):
    match = re.search(r"(\d+)", filename)
    return int(match.group(1)) if match else float('inf')

# Get all files, excluding hidden files
property_file_names = [
    file_path.name for file_path in properties_dir.iterdir()
    if file_path.is_file() and not file_path.name.startswith('.')
]

# Sort numerically by the number inside the filename
property_file_names.sort(key=extract_number)

# Save the list in a csv file
np.savetxt(save_path/'property_file_names.csv', np.array(property_file_names), delimiter=',', fmt='%s')

print("Saved property file names in order:")
for fname in property_file_names:
    print(fname)


Saved property file names in order:
JD_BASECASE_10_PORO.dat
JD_BASECASE_10_PERMX.dat
JD_BASECASE_25_PORO.dat
JD_BASECASE_25_PERMX.dat
JD_BASECASE_72_PERMX.dat
JD_BASECASE_72_PORO.dat


# Assemble

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import sys
import re
repo_root = Path.cwd().parent
sys.path.append(str(repo_root / "src"))

# set up path
base_path = Path('..')
# Setup for sampling
name_prefix = 'test'
property_file_names_path = base_path/'results'/'property_file_names'/'property_file_names.csv'

# Load PORO and PERMX file names
property_file_names = np.loadtxt(property_file_names_path,delimiter=",",dtype=str)

# sort the file names by the number in the name
def extract_number(filename):
    match = re.search(r"(\d+)", filename)
    return int(match.group(1)) if match else float('inf')

poro_file_names = sorted(
    [name for name in property_file_names if "PORO" in name.upper()],
    key=extract_number
)

permx_file_names = sorted(
    [name for name in property_file_names if "PERMX" in name.upper()],
    key=extract_number
)

# check a few things
if not poro_file_names or not permx_file_names:
    print("Error: PORO or PERMX file names not found.")
    sys.exit(1)

if len(poro_file_names) != len(permx_file_names):
    raise ValueError(f"Number of PORO file names ({len(poro_file_names)}) does not match number of PERMX file names ({len(permx_file_names)})")

n_samples = len(poro_file_names)

# Store poro/permx pairs
df_params = pd.DataFrame({'case_num': range(1, n_samples + 1)})
df_params["PORO_file"] = [str(poro_file_names[i]) for i in range(n_samples)]
df_params["PERMX_file"] = [str(permx_file_names[i]) for i in range(n_samples)]

# add prefix to file names
prefix = "data_properties/"
df_params["PORO_file"] = df_params["PORO_file"].apply(lambda x: f"{prefix}{x}")
df_params["PERMX_file"] = df_params["PERMX_file"].apply(lambda x: f"{prefix}{x}")

# Output
df_params.to_csv(base_path/'results'/'sim_files'/f"{name_prefix}_sampled_params.csv", index=False,float_format='%.2f')
df_params.round(2)


Unnamed: 0,case_num,PORO_file,PERMX_file
0,1,data_properties/JD_BASECASE_10_PORO.dat,data_properties/JD_BASECASE_10_PERMX.dat
1,2,data_properties/JD_BASECASE_25_PORO.dat,data_properties/JD_BASECASE_25_PERMX.dat
2,3,data_properties/JD_BASECASE_72_PORO.dat,data_properties/JD_BASECASE_72_PERMX.dat


# generate CMG dat files

In [8]:
import pandas as pd
from pathlib import Path
import sys
repo_root = Path.cwd().parent
sys.path.append(str(repo_root / "src"))

from generate_dat_files import generate_dat_files

name_prefix = 'test'
# set up path
base_path = Path('..')
df_params = pd.read_csv(base_path/'results'/'sim_files'/f"{name_prefix}_sampled_params.csv")
generate_dat_files(
    df_parameters = df_params,
    template_file_path = base_path/'data'/'dat_file_templates'/'250913_no_gmc.dat',
    save_folder_path = base_path/'results'/'sim_files'/f"{name_prefix}_dat_files"
)

Generated 3 dat files successfully.
