In [1]:
from pathlib import Path
import re

base_dir = Path("/home/nikolenko/work/orca/orca_files")
error_dir = base_dir / "error"
inp_dir = base_dir / "inp"
out_dir = base_dir / "out"

# Step 1: Count .inp files in error directory
error_inp_files = list(error_dir.glob("*.inp"))
error_inp_count = len(error_inp_files)
print(f"Number of error .inp files: {error_inp_count}")

# Step 2: Count .inp files in inp directory
completed_inp_files = list(inp_dir.glob("*.inp"))
completed_inp_count = len(completed_inp_files)
print(f"Number of completed .inp files: {completed_inp_count}")

# Step 3: Find corresponding .out files in out directory
completed_out_files = []
for inp_file in completed_inp_files:
    base_name = inp_file.stem
    out_file = out_dir / f"{base_name}.out"
    if out_file.exists():
        completed_out_files.append(out_file)

# Step 4: Check how many of them terminated normally
terminated_normally_count = 0
for out_file in completed_out_files:
    with out_file.open() as f:
        if "****ORCA TERMINATED NORMALLY****" in f.read():
            terminated_normally_count += 1

print(f"Number of completed .out files terminated normally: {terminated_normally_count}")

# Step 5: Count files with charges 1, 0, and -1
charges_count = {1: 0, 0: 0, -1: 0}
charges_files = {1: [], 0: [], -1: []}

for out_file in completed_out_files:
    base_name = out_file.stem
    match = re.search(r'_(\-?\d)$', base_name)
    if match:
        charge = int(match.group(1))
        base_name_without_charge = base_name.rsplit('_', 1)[0]
        if charge in charges_count:
            charges_count[charge] += 1
            charges_files[charge].append(base_name_without_charge)

print(f"Number of files with charge 1: {charges_count[1]}")
print(f"Number of files with charge 0: {charges_count[0]}")
print(f"Number of files with charge -1: {charges_count[-1]}")

# Step 6: Count intersections of charge files
charge_files_sets = {charge: set(files) for charge, files in charges_files.items()}

# Intersections
only_1 = charge_files_sets[1] - charge_files_sets[0] - charge_files_sets[-1]
only_0 = charge_files_sets[0] - charge_files_sets[1] - charge_files_sets[-1]
only_neg1 = charge_files_sets[-1] - charge_files_sets[0] - charge_files_sets[1]
one_and_zero = (charge_files_sets[1] & charge_files_sets[0]) - charge_files_sets[-1]
one_and_neg1 = (charge_files_sets[1] & charge_files_sets[-1]) - charge_files_sets[0]
zero_and_neg1 = (charge_files_sets[0] & charge_files_sets[-1]) - charge_files_sets[1]
all_three = charge_files_sets[1] & charge_files_sets[0] & charge_files_sets[-1]

print(f"Number of files with only charge 1: {len(only_1)}")
print(f"Number of files with only charge 0: {len(only_0)}")
print(f"Number of files with only charge -1: {len(only_neg1)}")
print(f"Number of files with charges 1 and 0: {len(one_and_zero)}")
print(f"Number of files with charges 1 and -1: {len(one_and_neg1)}")
print(f"Number of files with charges 0 and -1: {len(zero_and_neg1)}")
print(f"Number of files with all three charges: {len(all_three)}")

Number of error .inp files: 344
Number of completed .inp files: 4405
Number of completed .out files terminated normally: 3405
Number of files with charge 1: 1149
Number of files with charge 0: 1189
Number of files with charge -1: 1186
Number of files with only charge 1: 0
Number of files with only charge 0: 2
Number of files with only charge -1: 13
Number of files with charges 1 and 0: 16
Number of files with charges 1 and -1: 2
Number of files with charges 0 and -1: 40
Number of files with all three charges: 1131


In [3]:
from pathlib import Path
import shutil
from tqdm import tqdm

target_dir = Path("orca2janpa/orca_calc")

target_dir.mkdir(parents=True, exist_ok=True)

for file_name in tqdm(all_three, desc="Copying files"):
    matching_files = list(out_dir.glob(f"{file_name}_*"))

    for file in matching_files:
        if file.suffix in ['.out', '.gbw']:
            shutil.copy(file, target_dir / file.name)

print("Files copied successfully.")

Copying files: 100%|██████████| 1131/1131 [00:30<00:00, 36.49it/s]

Files copied successfully.



