# extract finit files from simulation cases

In [2]:
from pathlib import Path
import shutil
import re

src_root = Path('data/sim_cases')
dst_root = Path('data/finit_files')

# Create destination directory if it doesn't exist
dst_root.mkdir(parents=True, exist_ok=True)

finit_file_names = []

# Iterate through each simulation case folder
for case_path in src_root.iterdir():
    if case_path.is_dir():
        for src_file in case_path.iterdir():
            if src_file.is_file() and src_file.suffix.upper() == '.FINIT':
                dst_file = dst_root / src_file.name
                shutil.copy2(src_file, dst_file)
                finit_file_names.append(src_file.name)

# Function to extract the trailing number
def extract_number(filename):
    match = re.search(r'(\d+)(?=\.FINIT$)', filename.upper())
    return int(match.group(1)) if match else float('inf')

# Sort numerically by extracted number
finit_file_names = sorted(finit_file_names, key=extract_number)

# Print 
print(f"Total FINIT files copied: {len(finit_file_names)}")
print("finite_file_names = [")
for fname in finit_file_names:
    print(f"    '{fname}',")
print("]")


Total FINIT files copied: 6
finite_file_names = [
    'JD_BASECASE_1.FINIT',
    'JD_BASECASE_2.FINIT',
    'JD_BASECASE_10.FINIT',
    'JD_BASECASE_25.FINIT',
    'JD_BASECASE_72.FINIT',
    'JD_BASECASE_110.FINIT',
]


# extract finit file names from its folder

In [3]:
from pathlib import Path
import re

# Folder containing the .FINIT files
folder = Path("data/finit_files")

# Collect all file names (just the name, not full path)
finit_file_names = [f.name for f in folder.iterdir() if f.is_file()]

# Function to extract number inside filename
def extract_number(filename):
    match = re.search(r'(\d+)(?=\.FINIT$)', filename.upper())
    return int(match.group(1)) if match else float('inf')

# Sort numerically
finit_file_names = sorted(finit_file_names, key=extract_number)

# Print in the desired format
print("finite_file_names = [")
for fname in finit_file_names:
    print(f"    '{fname}',")
print("]")


finite_file_names = [
    'JD_BASECASE_1.FINIT',
    'JD_BASECASE_2.FINIT',
    'JD_BASECASE_10.FINIT',
    'JD_BASECASE_25.FINIT',
    'JD_BASECASE_72.FINIT',
    'JD_BASECASE_110.FINIT',
    '.DS_Store',
]


# extract properties

In [None]:
from pathlib import Path
from extract_properties_from_finit import extract_properties_from_finit
from generate_full_properties import generate_full_properties
from CMG_format_compress import CMG_format_compress

finit_dir = 'data/finit_files'
save_dir = 'results/properties'

finit_dir = Path(finit_dir)
save_dir = Path(save_dir)
save_dir.mkdir(exist_ok=True)

for finit_file_name in finit_file_names:
    finit_file_path = finit_dir / finit_file_name
    
    # STEP 1: Extract properties from FINIT file for active cells only
    extracted_property_dict = extract_properties_from_finit(
        finit_file_path = finit_file_path,
        keywords = ['PORO', 'PERMX', 'ACTID'],
        is_save = False,
        save_dir = save_dir,
        save_name = finit_file_name.split('.')[0],
        show_summary = False
    )

    # STEP 2: Generate full properties for all cells (filled inactive cells with zeros)
    full_property_dict = generate_full_properties(
        property_dict = extracted_property_dict,
        property_list = ['PORO', 'PERMX'], 
        total_cells = 989001,
        is_save = False,
        save_dir = save_dir,
        save_name = finit_file_name.split('.')[0],
        # save_name = finit_file_name.split('.')[0]+'_jreversed',
        show_summary = False,
        reverse_j = True,
        grid_shape = (107, 117, 79),
        )


    # STEP 3: Compress full properties to CMG format (repeated values as N*value)
    for key in ['PORO', 'PERMX']:
        CMG_format_compress(
            array = full_property_dict[key], 
            keyword = key, 
            max_line_length = 80,
            show_summary = False,
            save_dir = save_dir,
            save_name = finit_file_name.split('.')[0]
            # save_name = finit_file_name.split('.')[0]+'_jreversed'
        )



COMPRESSED PORO SUMMARY
Compressed 989,001 values into 54434 lines
Compression ratio: 18.2:1
Total cells: 989,001
PORO - Mean: 0.083393
PORO - Min: 0.000000
PORO - Max: 0.396792
Saved compressed PORO data to: results/properties/JD_BASECASE_38_PORO.dat

COMPRESSED PERMX SUMMARY
Compressed 989,001 values into 39151 lines
Compression ratio: 25.3:1
Total cells: 989,001
PERMX - Mean: 86.587621
PERMX - Min: 0.000000
PERMX - Max: 5000.000000
Saved compressed PERMX data to: results/properties/JD_BASECASE_38_PERMX.dat


# collect property file names to be used for LSH sampling

In [None]:
import os
from pathlib import Path
import numpy as np
import re

"""
Extract file names from the results/properties folder.
Returns a list of file names (excluding hidden files like .DS_Store),
sorted numerically by the number inside the file name.
"""

# Define the path to the properties folder
properties_path = "results/properties"

# Check if the directory exists
if not os.path.exists(properties_path):
    print(f"Error: Directory '{properties_path}' does not exist.")

# Use pathlib for better cross-platform compatibility
properties_dir = Path(properties_path)

# Extract number from filename helper
def extract_number(filename):
    match = re.search(r"(\d+)", filename)
    return int(match.group(1)) if match else float('inf')

# Get all files, excluding hidden files
property_file_names = [
    file_path.name for file_path in properties_dir.iterdir()
    if file_path.is_file() and not file_path.name.startswith('.')
]

# Sort numerically by the number inside the filename
property_file_names.sort(key=extract_number)

# Save the list as .npy
np.save('property_file_names.npy', property_file_names)

print("Saved property file names in order:")
for fname in property_file_names:
    print(fname)


In [6]:
property_file_names

['JD_BASECASE_38_jreversed_PORO.dat',
 'JD_BASECASE_38_PORO.dat',
 'JD_BASECASE_38_jreversed_PERMX.dat',
 'JD_BASECASE_38_jreversed_PORO.npy',
 'JD_BASECASE_38_PERMX.dat',
 'JD_BASECASE_38_jreversed_PERMX.npy']