In [21]:
import os
import glob
import re
import time
import numpy as np
from tqdm import tqdm

In [24]:
def find_outmol_files(directory):
    """
    Find all .outmol files in the specified directory and its subdirectories.
    :param directory: The path of the directory to search.
    :return: A list of paths to .outmol files.
    """
    print("Searching for .outmol files...")
    pattern = os.path.join(directory, "**", "*.outmol")
    return glob.glob(pattern, recursive=True)


def extract_hessian_data(file_path, timeout=30):
    """
    Extract numerical data from the line before 'Hessian diagonal elements will be replaced with constant'.
    :param file_path: The path to the .outmol file.
    :param timeout: The maximum time (in seconds) to process a single file.
    :return: A list of extracted numerical values, or None if not found.
    """
    target_line = "Hessian diagonal elements will be replaced with constant"
    previous_line = ""

    try:
        start_time = time.time()
        with open(file_path, 'r', errors='ignore') as file:
            for line in file:
                # Check if the timeout has been exceeded
                if time.time() - start_time > timeout:
                    print(f"Processing file {file_path} timed out, skipping.")
                    return None

                if target_line in line:
                    # Extract numbers from the previous line using regular expressions
                    numbers = re.findall(r'-?\d+\.?\d*(?:E[-+]?\d+)?', previous_line)
                    return float(numbers[0]) if numbers else None
                previous_line = line
    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
    except Exception as e:
        print(f"An error occurred while processing file {file_path}: {str(e)}")

    return None


def extract_cm1_data(file_path, timeout=30):
    """
    Extract non - zero numbers from the cm-1 column starting from the line
    'mode     au_amu        cm-1         meV         THz      km/mol'
    and ending at 'Frequencies (cm-1) and normal modes'.
    :param file_path: The path to the .outmol file.
    :param timeout: The maximum time (in seconds) to process a single file.
    :return: A list of extracted numerical values, or None if not found.
    """
    cm1_data = []
    start_flag = False
    end_line = "Frequencies (cm-1) and normal modes"
    start_line = "mode     au_amu        cm-1         meV         THz      km/mol"

    try:
        start_time = time.time()
        with open(file_path, 'r', errors='ignore') as file:
            for line in file:
                if time.time() - start_time > timeout:
                    print(f"Processing file {file_path} timed out, skipping.")
                    return None

                if start_line in line:
                    start_flag = True
                    continue

                if end_line in line:
                    break

                if start_flag:
                    values = line.split()
                    if len(values) > 2:
                        try:
                            num = float(values[2])
                            if num != 0:
                                cm1_data.append(num)
                        except ValueError:
                            continue

    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
    except Exception as e:
        print(f"An error occurred while processing file {file_path}: {str(e)}")

    return cm1_data if cm1_data else None


def main():
    # Directly specify the directory path
    directory = r"D:\Materials studio\MS2020\project\Newthing_Files\Documents\layer\NiO(200) Cu2O(111)\Layer-S DMol3 suc\Layer-S OO SUC-tixi1"

    if not os.path.exists(directory):
        print(f"Error: Directory '{directory}' does not exist.")
        return

    # Find all .outmol files
    outmol_files = find_outmol_files(directory)

    if not outmol_files:
        print(f"No .outmol files were found in the directory {directory}.")
        return

    print(f"Found {len(outmol_files)} .outmol files, starting processing...")

    # Process each file and print the results
    hessian_results = {}
    cm1_results = {}
    for file_path in tqdm(outmol_files, desc="Processing files"):
        hessian_data = extract_hessian_data(file_path)
        if hessian_data is not None:
            hessian_results[file_path] = np.array([hessian_data])
        cm1_data = extract_cm1_data(file_path)
        if cm1_data:
            cm1_results[file_path] = np.array(cm1_data)

    # Display the results
    if hessian_results:
        print("\nHessian Data found:")
        for file_path, data in hessian_results.items():
            print(f"\nFile: {file_path}")
            print(f"Hessian Data: {data}")
        print(f"\nHessian Data was found in a total of {len(hessian_results)} files.")
    else:
        print("No Hessian target data was found in any of the files.")

    if cm1_results:
        print("\ncm-1 Data found:")
        for file_path, data in cm1_results.items():
            print(f"\nFile: {file_path}")
            print(f"cm-1 Data: {data}")
        print(f"\ncm-1 Data was found in a total of {len(cm1_results)} files.")
    else:
        print("No cm-1 target data was found in any of the files.")


if __name__ == "__main__":
    main()
    

Searching for .outmol files...
Found 1 .outmol files, starting processing...


Processing files: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 86.70it/s]


Hessian Data found:

File: D:\Materials studio\MS2020\project\Newthing_Files\Documents\layer\NiO(200) Cu2O(111)\Layer-S DMol3 suc\Layer-S OO SUC-tixi1\Layer-S.outmol
Hessian Data: [-25461.781361]

Hessian Data was found in a total of 1 files.

cm-1 Data found:

File: D:\Materials studio\MS2020\project\Newthing_Files\Documents\layer\NiO(200) Cu2O(111)\Layer-S DMol3 suc\Layer-S OO SUC-tixi1\Layer-S.outmol
cm-1 Data: [ 125.1  132.8  169.9  469.9  787.1 3680.2]

cm-1 Data was found in a total of 1 files.



