In [114]:
import re

pattern = r"output/RuPNP_tBu_(?P<group>.*)_(?P<position>.*)"
regex = re.compile(pattern)

In [115]:
import glob

glob_pattern = "output/RuPNP_tBu_*"

matched_folders = glob.glob(glob_pattern)

targets = [regex.match(folder_name) for folder_name in matched_folders]
targets = [[target.group("group"), target.group("position")] for target in targets]
targets

[['COMe', 'C2down'],
 ['CN', 'C2down'],
 ['I', 'C1on'],
 ['Cl', 'C1on'],
 ['F', 'C2down'],
 ['tBu', 'C2on'],
 ['Me', 'C1on'],
 ['COOH', 'C2down'],
 ['COH', 'C2down'],
 ['CF3', 'C1on'],
 ['NEt', 'C1on'],
 ['COH', 'C1on'],
 ['NMeiPr', 'C2on'],
 ['COOMe', 'C2down'],
 ['NH2', 'C1on'],
 ['I', 'C2on'],
 ['tBu', 'C1on'],
 ['COEt', 'C1on'],
 ['COOH', 'C1on'],
 ['H', 'C1on'],
 ['CF3', 'C2down'],
 ['Me', 'C2down'],
 ['NMeiPr', 'C1on'],
 ['OH', 'C2down'],
 ['CN', 'C1on'],
 ['COEt', 'C2down'],
 ['OMe', 'C1on'],
 ['NO2', 'C2down'],
 ['COOMe', 'C1on'],
 ['iPr', 'C1on'],
 ['Br', 'C1on'],
 ['CHF2', 'C1on'],
 ['CH2F', 'C2down'],
 ['CHF2', 'C2down'],
 ['F', 'C1on'],
 ['NEt2', 'C1on'],
 ['CBr3', 'C2on'],
 ['Ph', 'C2on'],
 ['NMe', 'C2down'],
 ['NMe', 'C1on'],
 ['CCl3', 'C2on'],
 ['OH', 'C1on'],
 ['NMe2', 'C1on'],
 ['Et', 'C1on'],
 ['iPr', 'C2on'],
 ['NMe2', 'C2down'],
 ['Br', 'C2down'],
 ['CH2F', 'C1on'],
 ['NO2', 'C1on'],
 ['NEt', 'C2down'],
 ['NH2', 'C2down'],
 ['NEt2', 'C2on'],
 ['CBr3', 'C1on'],
 ['Ph

In [116]:
import numpy as np

groups = np.array(targets)[:, 0]
groups = np.unique(groups)
groups

array(['Br', 'CBr3', 'CCl3', 'CF3', 'CH2F', 'CHF2', 'CN', 'COEt', 'COH',
       'COMe', 'COOH', 'COOMe', 'Cl', 'Et', 'F', 'H', 'I', 'Me', 'NEt',
       'NEt2', 'NH2', 'NMe', 'NMe2', 'NMeiPr', 'NO2', 'OH', 'OMe', 'Ph',
       'iPr', 'tBu'], dtype='<U6')

In [117]:
import pandas as pd

available_positions = ["C1on", "C2on", "C1down", "C2down"]

df = pd.DataFrame(columns=["R"] + available_positions)

df["R"] = groups

In [118]:
["OH", "C2down"] in targets

True

In [119]:
df[available_positions] = [
    [([R, position] in targets) for position in available_positions] for R in df["R"]
]

In [120]:
df

Unnamed: 0,R,C1on,C2on,C1down,C2down
0,Br,True,False,False,True
1,CBr3,True,True,False,False
2,CCl3,True,True,False,False
3,CF3,True,False,False,True
4,CH2F,True,False,False,True
5,CHF2,True,False,False,True
6,CN,True,False,False,True
7,COEt,True,False,False,True
8,COH,True,False,False,True
9,COMe,True,False,False,True


In [121]:
import functools


@functools.cache
def read_energy_from_output(filepath) -> float | None:
    with open(filepath) as f:
        energy_line = None
        normal_terminate = False
        for line in f.readlines():
            if "Final Gibbs free energy" in line:
                energy_line = line
            if "ORCA TERMINATED NORMALLY" in line:
                normal_terminate = True
    if normal_terminate:
        return float([token for token in energy_line.split(" ") if token != ""][-2])
    else:
        return None


def load_energy_for_item(group, position, item) -> float | None:
    try:
        return read_energy_from_output(
            f"output/RuPNP_tBu_{group}_{position}/{item}.out"
        )
    except:
        return None


load_energy_for_item("Br", "C1on", "A2S")

-96.46405047

In [122]:
@functools.cache
def load_A1_energy(group) -> float | None:
    filepath = f"A1_xtb/RuPNP_tBu_{group}/A1.out"
    return read_energy_from_output(filepath)


load_A1_energy("tBu")

-89.16192354

In [123]:
df["A1_energy"] = [load_A1_energy(group) for group in df["R"]]

In [124]:
available_structures = ["A2R", "A3R", "TSA2R_3R", "A2S", "A3S", "TSA2S_3S"]

[
    f"{position}_{structure}"
    for position in available_positions
    for structure in available_structures
]

['C1on_A2R',
 'C1on_A3R',
 'C1on_TSA2R_3R',
 'C1on_A2S',
 'C1on_A3S',
 'C1on_TSA2S_3S',
 'C2on_A2R',
 'C2on_A3R',
 'C2on_TSA2R_3R',
 'C2on_A2S',
 'C2on_A3S',
 'C2on_TSA2S_3S',
 'C1down_A2R',
 'C1down_A3R',
 'C1down_TSA2R_3R',
 'C1down_A2S',
 'C1down_A3S',
 'C1down_TSA2S_3S',
 'C2down_A2R',
 'C2down_A3R',
 'C2down_TSA2R_3R',
 'C2down_A2S',
 'C2down_A3S',
 'C2down_TSA2S_3S']

In [125]:
df[
    [
        f"{position}_{structure}"
        for position in available_positions
        for structure in available_structures
    ]
] = [
    [
        load_energy_for_item(group, position, item)
        for position in available_positions
        for item in available_structures
    ]
    for group in df["R"]
]

In [126]:
df

Unnamed: 0,R,C1on,C2on,C1down,C2down,A1_energy,C1on_A2R,C1on_A3R,C1on_TSA2R_3R,C1on_A2S,...,C1down_TSA2R_3R,C1down_A2S,C1down_A3S,C1down_TSA2S_3S,C2down_A2R,C2down_A3R,C2down_TSA2R_3R,C2down_A2S,C2down_A3S,C2down_TSA2S_3S
0,Br,True,False,False,True,-71.318708,-96.462445,-96.422877,-96.418622,-96.46405,...,,,,,-96.459452,-96.424561,,-96.459627,-96.429244,-96.4129
1,CBr3,True,True,False,False,-91.82902,-116.974625,-116.925039,-116.919418,-116.976305,...,,,,,,,,,,
2,CCl3,True,True,False,False,-94.662071,-119.804416,-119.760451,-119.757521,-119.807014,...,,,,,,,,,,
3,CF3,True,False,False,True,-95.745405,-120.88566,-120.848551,-120.84389,-120.886206,...,,,,,-120.884689,-120.852378,-120.841052,-120.882553,-120.859384,-120.842841
4,CH2F,True,False,False,True,-78.793822,-103.93906,-103.902115,-103.897582,-103.936049,...,,,,,-103.933265,-103.906466,,-103.933788,-103.899454,-103.893382
5,CHF2,True,False,False,True,-87.266387,-112.407813,-112.37285,-112.368296,-112.410133,...,,,,,-112.405811,-112.370735,-112.363157,-112.406246,-112.373993,-112.36626
6,CN,True,False,False,True,-73.087162,-98.2302,-98.195163,-98.190404,-98.23248,...,,,,,-98.227584,-98.195487,-98.190099,-98.233892,-98.200632,
7,COEt,True,False,False,True,-82.725343,-107.864171,-107.831064,-107.827855,-107.862335,...,,,,,-107.863099,-107.827893,-107.823322,-107.863342,-107.831747,-107.820578
8,COH,True,False,False,True,-76.412879,-101.550203,-101.521754,-101.516833,-101.554667,...,,,,,-101.551321,-101.523358,-101.501109,-101.551567,-101.52129,-101.508018
9,COMe,True,False,False,True,-82.727175,-107.867636,-107.832884,-107.829251,-107.870725,...,,,,,-107.865141,-107.824623,-107.821274,-107.864299,-107.828857,-107.820814


In [127]:
df.to_csv("RuPNP.csv")