# Read simulations

In [159]:
from pathlib import Path

DATA_DIRECTORY = Path('data/')
assert DATA_DIRECTORY.exists()

simulations = sorted([d for d in DATA_DIRECTORY.iterdir() if d.name != "raw"])
for s in simulations:
    print(f'Found Simulation: {s.name}')

Found Simulation: HIST_SG1_2_SG5_7
Found Simulation: model
Found Simulation: Вынгаяхинское
Found Simulation: Д3
Found Simulation: Царичанское13Р
Found Simulation: Царичанское7Р
Found Simulation: Ягодное


# Read filenames

In [160]:
import os

format = ".dat"

def iterate_files_recursively(directory):
    for root, dirs, files in os.walk(directory):
        for file_name in files:
            yield Path(os.path.join(root, file_name))

def find_common_prefix(filenames):
    if not filenames:
        return ""

    # Extract base filenames
    base_filenames = [Path(filename).name for filename in filenames]

    # Find the common prefix among the base filenames
    prefix = os.path.commonprefix(base_filenames)

    return prefix

def sort_filenames(filenames):
    if not filenames:
        return []

    # Convert Path objects to strings
    filenames = [str(filename) for filename in filenames]

    prefix = find_common_prefix(filenames)
    base_file = prefix + format  # Construct the base filename
    sorted_files = []

    # Check if any filename ends with base_file
    for filename in filenames:
        if filename.endswith(base_file):
            sorted_files.append(filename)
            filenames.remove(filename)
            break

    # Process numerical filenames
    num_files = []
    for filename in filenames:
        if filename.endswith(format) and filename != base_file:
            stem = Path(filename).stem
            try:
                num = int(stem.split('_')[-1])
                num_files.append((num, filename))
            except ValueError:
                pass  # Skip filenames with non-integer suffixes
        else:
            sorted_files.append(filename)  # Add non-numerical filenames directly

    num_files.sort()
    sorted_files.extend([filename for _, filename in num_files])

    return [Path(f) for f in sorted_files]

def read_dat_filenames(simulation: Path, verbose: bool = True):
    global format

    dat_files = [f for f in iterate_files_recursively(simulation) if f.name.endswith(format)]
    dat_files = sort_filenames(dat_files)

    if len(dat_files) == 0:
        format = ".DATA"
        dat_files = [f for f in iterate_files_recursively(simulation) if f.name.endswith(format)]
        dat_files = sort_filenames(dat_files)

    if verbose:
        main_dat_file = dat_files[0]
        print(f'Main Simulation: {main_dat_file.name}')
        other_dat_files = [int(f.name.removesuffix(format).split('_')[-1]) for f in dat_files[1:]]
        assert other_dat_files == list(range(1, len(other_dat_files) + 1)), ".dat (or .DATA) files do not form a list of consecutive numbers. Probably you are dealing with the Вынгаяхинское folder. If so, delete all .dat files from data/Вынгаяхинское/300_kust_07052015/WELL"
        print(f'Other Simulations: [{other_dat_files[0]}...{other_dat_files[-1]}]')

    return dat_files

dat_files = read_dat_filenames(simulations[4])
main_dat_file = dat_files[0]
other_dat_files = dat_files[1:]

Main Simulation: carich_DI.dat
Other Simulations: [1...1000]


# Read dat files

In [161]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [162]:
from dat_parser import extract_key_values

def read_dat_file(file: Path):
    return extract_key_values(file)

# Test how the function works.
# Since dat_files[0] doesn't have any RUNCTRL parameters, the function should return {}
read_dat_file(dat_files[0])

{}

In [163]:
import pandas as pd

dicts_list = []

for filename in dat_files:
    dicts_list.append(read_dat_file(filename))

# Convert list of dictionaries to DataFrame
df = pd.DataFrame(dicts_list)

df

Unnamed: 0,MBERRLIM,TOLLIN,TOLVARNEWT,CHECKP,MBERRCTRL,MINNEWTIT
0,,,,,,
1,0.0,0.0001,0.00001,1.0,1.000000e-07,
2,0.0,0.0001,0.00001,1.0,1.000000e-07,0.0
3,0.0,0.0001,0.00001,1.0,1.000000e-06,
4,0.0,0.0001,0.00001,1.0,1.000000e-06,0.0
...,...,...,...,...,...,...
996,2.0,0.0050,0.10000,2.0,1.000000e-05,0.0
997,2.0,0.0050,0.10000,2.0,1.000000e-04,
998,2.0,0.0050,0.10000,2.0,1.000000e-04,0.0
999,2.0,0.0050,0.10000,2.0,1.000000e-03,


# Parse result files

In [164]:
from table_parser import parse_table
from block_finder import find_block_with_min_lines

# parse result.log file
def read_res_file(file: Path):
    return(parse_table(find_block_with_min_lines(file)))
    pass