Skip to content

Commit

Permalink
Read a mol repr from an Arkane YAML file if given (#663)
Browse files Browse the repository at this point in the history
Save a mol representation in an Arkane YAML file under a `mol` key
and read the mol object properly if given.

A twin PR of
ReactionMechanismGenerator/RMG-Py#2447
  • Loading branch information
kfir4444 committed May 28, 2023
2 parents 02a83e7 + 5089842 commit 0ad087a
Show file tree
Hide file tree
Showing 12 changed files with 2,257 additions and 42 deletions.
22 changes: 4 additions & 18 deletions arc/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1622,20 +1622,6 @@ def safe_copy_file(source: str,
break


def sort_atoms_in_descending_label_order(mol: 'Molecule') -> None:
"""
A helper function, helpful in the context of atom mapping.
This function reassign the .atoms in Molecule with a list of atoms
with the orders based on the labels of the atoms.
for example, [int(atom.label) for atom in mol.atoms] is [1, 4, 32, 7],
then the function will re-assign the new atom with the order [1, 4, 7, 32]
Args:
mol (Molecule): An RMG Molecule object, with labeled atoms
"""
mol.atoms = sorted(mol.atoms, key=lambda x: int(x.label))


def dfs(mol: Molecule,
start: int,
sort_result: bool = True,
Expand Down Expand Up @@ -1667,20 +1653,20 @@ def dfs(mol: Molecule,
return visited


def sort_atoms_in_descending_label_order(mol: 'Molecule')-> None:
def sort_atoms_in_descending_label_order(mol: 'Molecule') -> None:
"""
If all atoms in the molecule object has a label, This function reassign the
If all atoms in the molecule object have a label, this function reassign the
.atoms in Molecule with a list of atoms with the orders based on the labels of the atoms.
for example, [int(atom.label) for atom in mol.atoms] is [1, 4, 32, 7],
then the function will return the new atom with the order [1, 4, 7, 32]
Args:
mol: An rmg Molecule object, with labeld atoms
mol (Molecule): An RMG Molecule object, with labeled atoms
"""
if any(atom.label is None for atom in mol.atoms):
return None
try:
mol.atoms = sorted(mol.atoms, key = lambda x: int(x.label))
mol.atoms = sorted(mol.atoms, key=lambda x: int(x.label))
except ValueError:
logger.warning(f"Some atom(s) in molecule.atoms are not integers.\nGot {[atom.label for atom in mol.atoms]}")
return None
Expand Down
10 changes: 7 additions & 3 deletions arc/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,11 @@ def parse_scan_conformers(file_path: str) -> pd.DataFrame:
conformers.append(ics)
# Concatenate ICs of conformers to a single table and remove redundant ICs
scan_conformers = pd.concat([scan_ic_info] + conformers, axis=1)
red_ind = scan_conformers[scan_conformers.redundant == True].index
if not red_ind.empty:
scan_conformers.drop(red_ind, inplace=True)
try:
red_ind = scan_conformers[scan_conformers.redundant == True].index
except TypeError:
pass
else:
if not red_ind.empty:
scan_conformers.drop(red_ind, inplace=True)
return scan_conformers
25 changes: 23 additions & 2 deletions arc/plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@
get_logger,
is_notebook,
is_str_float,
read_yaml_file,
rmg_mol_to_dict_repr,
save_yaml_file,
sort_two_lists_by_the_first,)
sort_two_lists_by_the_first,
)
from arc.exceptions import InputError, SanitizationError
from arc.level import Level
from arc.parser import parse_trajectory
Expand All @@ -46,7 +49,8 @@
str_to_xyz,
xyz_from_data,
xyz_to_str,
xyz_to_x_y_z)
xyz_to_x_y_z,
)
from arc.species.species import ARCSpecies


Expand Down Expand Up @@ -958,6 +962,23 @@ def save_conformers_file(project_directory: str,
f.write(content)


def augment_arkane_yml_file_with_mol_repr(species: ARCSpecies,
output_directory: str,
) -> None:
"""
Add a Molecule representation to an Arkane YAML file.
Args:
species (ARCSpecies): The species to process.
output_directory (str): The base path to the project's output directory.
"""
yml_path = os.path.join(output_directory, 'Species', species.label, f'{species.label}.yml')
if os.path.isfile(yml_path) and species.mol is not None:
content = read_yaml_file(yml_path)
content['mol'] = rmg_mol_to_dict_repr(species.mol)
save_yaml_file(yml_path, content)


# *** Torsions ***

def plot_torsion_angles(torsion_angles,
Expand Down
22 changes: 20 additions & 2 deletions arc/plotter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import unittest

import arc.plotter as plotter
from arc.common import ARC_PATH
from arc.common import ARC_PATH, read_yaml_file, safe_copy_file
from arc.species.converter import str_to_xyz
from arc.species.species import ARCSpecies

Expand Down Expand Up @@ -66,6 +66,21 @@ def test_save_geo(self):
data = f.read()
self.assertEqual(data, gjf_data)

def test_augment_arkane_yml_file_with_mol_repr(self):
"""Test the augment_arkane_yml_file_with_mol_repr() function"""
project = 'arc_project_for_testing_delete_after_usage'
project_directory = os.path.join(ARC_PATH, 'Projects', project)
n4h6_yml_path = os.path.join(ARC_PATH, 'arc', 'testing', 'yml_testing', 'N4H6.yml')
n4h6_yml_path_copy = os.path.join(project_directory, 'Species', 'N4H6', 'N4H6.yml')
os.makedirs(os.path.join(project_directory, 'Species', 'N4H6'), exist_ok=True)
safe_copy_file(source=n4h6_yml_path, destination=n4h6_yml_path_copy)
content_0 = read_yaml_file(path=n4h6_yml_path_copy)
self.assertNotIn('mol', content_0.keys())
n4h6 = ARCSpecies(label='N4H6', smiles='NNNN')
plotter.augment_arkane_yml_file_with_mol_repr(species=n4h6, output_directory=project_directory)
content_1 = read_yaml_file(path=n4h6_yml_path_copy)
self.assertIn('mol', content_1.keys())

def test_save_conformers_file(self):
"""test the save_conformers_file function"""
project = 'arc_project_for_testing_delete_after_usage'
Expand Down Expand Up @@ -165,7 +180,10 @@ def tearDownClass(cls):
project = 'arc_project_for_testing_delete_after_usage'
project_directory = os.path.join(ARC_PATH, 'Projects', project)
shutil.rmtree(project_directory, ignore_errors=True)
os.remove(os.path.join(ARC_PATH, 'arc', 'testing', 'bde_report_test.txt'))
files_to_remove = [os.path.join(ARC_PATH, 'arc', 'testing', 'bde_report_test.txt')]
for file_path in files_to_remove:
if os.path.isfile(file_path):
os.remove(file_path)


if __name__ == '__main__':
Expand Down
1 change: 1 addition & 0 deletions arc/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def process_arc_project(thermo_adapter: str,
species_for_thermo_lib.append(species)
elif not species.e0_only and species not in unconverged_species:
unconverged_species.append(species)
plotter.augment_arkane_yml_file_with_mol_repr(species, output_directory)
elif species.compute_thermo and not output_dict[species.label]['convergence'] \
and species not in unconverged_species:
unconverged_species.append(species)
Expand Down
9 changes: 9 additions & 0 deletions arc/reaction_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,15 @@ def test_rxn_family(self):
rxn_1.determine_family(rmg_database=self.rmgdb)
self.assertEqual(rxn_1.family.label, 'H_Abstraction')

# Test identifying the reaction family for with zwitterions read from Arkane YAML files
base_path = os.path.join(ARC_PATH, 'arc', 'testing', 'yml_testing', 'HNNO+NH3O=H2NO+NH2NO')
rxn2 = ARCReaction(r_species=[ARCSpecies(label='HNNO', smiles='[O-][N+]=N', yml_path=os.path.join(base_path, 'HNNO.yml')),
ARCSpecies(label='NH3O', smiles='[O-][NH3+]', yml_path=os.path.join(base_path, 'NH3O.yml'))],
p_species=[ARCSpecies(label='H2NO', smiles='N[O]', yml_path=os.path.join(base_path, 'H2NO.yml')),
ARCSpecies(label='NH2NO', smiles='NN=O', yml_path=os.path.join(base_path, 'NH2NO.yml'))])
rxn2.determine_family(rmg_database=self.rmgdb)
self.assertEqual(rxn2.family.label, 'H_Abstraction')

def test_charge_property(self):
"""Test determining charge"""
self.assertEqual(self.rxn1.charge, 0)
Expand Down
58 changes: 41 additions & 17 deletions arc/species/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,14 +402,26 @@ def __init__(self,
self.rotors_dict = dict()
self.rmg_species = rmg_species
self.tsg_spawned = False
regen_mol = True
if bond_corrections is None:
self.bond_corrections = dict()
else:
self.bond_corrections = bond_corrections

if self.yml_path is not None:
# a YAML path was given
self.from_yml_file(label)
regen_mol = self.from_yml_file(label)
if regen_mol:
if adjlist:
self.mol = Molecule().from_adjacency_list(adjlist=adjlist,
raise_atomtype_exception=False,
raise_charge_exception=False,
)
elif inchi:
self.mol = rmg_mol_from_inchi(inchi)
elif smiles:
self.mol = Molecule(smiles=smiles)
self.set_mol_list()
elif self.rmg_species is not None:
# an RMG Species was given
if not isinstance(self.rmg_species, Species):
Expand Down Expand Up @@ -451,10 +463,11 @@ def __init__(self,
self.multiplicity = self.mol.multiplicity
if self.charge is None:
self.charge = self.mol.get_net_charge()
# Perceive molecule from xyz coordinates. This also populates the .mol attribute of the Species.
# It overrides self.mol generated from adjlist or smiles so xyz and mol will have the same atom order.
if self.final_xyz or self.initial_xyz or self.most_stable_conformer or self.conformers or self.ts_guesses:
self.mol_from_xyz(get_cheap=False)
if regen_mol:
# Perceive molecule from xyz coordinates. This also populates the .mol attribute of the Species.
# It overrides self.mol generated from adjlist or smiles so xyz and mol will have the same atom order.
if self.final_xyz or self.initial_xyz or self.most_stable_conformer or self.conformers or self.ts_guesses:
self.mol_from_xyz(get_cheap=False)
if not self.is_ts:
# We don't care about BACs in TSs
if self.mol is None:
Expand Down Expand Up @@ -884,7 +897,7 @@ def from_dict(self, species_dict):
else:
self.rotors_dict[index][key] = val

def from_yml_file(self, label: str = None):
def from_yml_file(self, label: str = None) -> bool:
"""
Load important species attributes such as label and final_xyz from an Arkane YAML file.
Actual QM data parsing is done later when processing thermo and kinetics.
Expand All @@ -894,25 +907,35 @@ def from_yml_file(self, label: str = None):
Raises:
ValueError: If the adjlist cannot be read.
Returns:
bool: Whether self.mol should be regenerated
"""
regen_mol = True
rmg_spc = Species()
arkane_spc = ArkaneSpecies(species=rmg_spc)
# The data from the YAML file is loaded into the `species` argument of the `load_yaml` method in Arkane
yml_content = read_yaml_file(self.yml_path)
arkane_spc.load_yaml(path=self.yml_path, label=label, pdep=False)
self.label = label if label is not None else arkane_spc.label
self.final_xyz = xyz_from_data(coords=arkane_spc.conformer.coordinates.value,
numbers=arkane_spc.conformer.number.value)
if arkane_spc.adjacency_list is not None:
try:
self.mol = Molecule().from_adjacency_list(adjlist=arkane_spc.adjacency_list,
raise_atomtype_exception=False)
except ValueError:
print(f'Could not read adjlist:\n{arkane_spc.adjacency_list}') # should *not* be logging
raise
elif arkane_spc.inchi is not None:
self.mol = Molecule().from_inchi(inchistr=arkane_spc.inchi, raise_atomtype_exception=False)
elif arkane_spc.smiles is not None:
self.mol = Molecule().from_smiles(arkane_spc.smiles, raise_atomtype_exception=False)
if 'mol' in yml_content:
self.mol = rmg_mol_from_dict_repr(representation=yml_content['mol'], is_ts=yml_content['is_ts'])
if self.mol is not None:
regen_mol = False
if regen_mol:
if arkane_spc.adjacency_list is not None:
try:
self.mol = Molecule().from_adjacency_list(adjlist=arkane_spc.adjacency_list,
raise_atomtype_exception=False)
except ValueError:
print(f'Could not read adjlist:\n{arkane_spc.adjacency_list}') # should *not* be logging
raise
elif arkane_spc.inchi is not None:
self.mol = Molecule().from_inchi(inchistr=arkane_spc.inchi, raise_atomtype_exception=False)
elif arkane_spc.smiles is not None:
self.mol = Molecule().from_smiles(arkane_spc.smiles, raise_atomtype_exception=False)
if self.mol is not None:
self.multiplicity = self.mol.multiplicity
self.charge = self.mol.get_net_charge()
Expand All @@ -932,6 +955,7 @@ def from_yml_file(self, label: str = None):
self.mol_from_xyz()
if self.e0 is None:
self.e0 = arkane_spc.conformer.E0.value_si * 0.001 # convert to kJ/mol
return regen_mol

def set_mol_list(self):
"""
Expand Down
11 changes: 11 additions & 0 deletions arc/species/species_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,17 @@ def test_from_yml_file(self):
c3_2 = ARCSpecies(yml_path=c3_2_yml_path)
self.assertAlmostEqual(c3_2.e0, 72.98479932780415)

nh3o_yml_path = os.path.join(ARC_PATH, 'arc', 'testing', 'yml_testing', 'HNNO+NH3O=H2NO+NH2NO', 'NH3O.yml')
nh3o = ARCSpecies(yml_path=nh3o_yml_path)
self.assertEqual([atom.element.symbol for atom in nh3o.mol.atoms], ['O', 'N', 'H', 'H', 'H'])

s1, s2 = ARCSpecies(label='S1', smiles='[NH3+][O-]'), ARCSpecies(label='S2', smiles='NNNN')
s1.yml_path = nh3o_yml_path
s2.yml_path = n4h6_yml_path
regen_mol_1, regen_mol_2 = s1.from_yml_file(nh3o_yml_path), s2.from_yml_file(n4h6_yml_path)
self.assertFalse(regen_mol_1)
self.assertTrue(regen_mol_2)

def test_str(self):
"""Test the string representation of the object"""
str_representation = str(self.spc9)
Expand Down

0 comments on commit 0ad087a

Please sign in to comment.