Skip to content

Commit

Permalink
Merge pull request #213 from hjkgrp/geometry_changes
Browse files Browse the repository at this point in the history
Classifying geometry based on RMSD
  • Loading branch information
aarongarrison committed May 7, 2024
2 parents da4d0c4 + ae35599 commit ba0ef66
Show file tree
Hide file tree
Showing 8 changed files with 319 additions and 52 deletions.
33 changes: 33 additions & 0 deletions molSimplify/Classes/globalvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import subprocess
from typing import Dict, Tuple
from molSimplify.utils.metaclasses import Singleton
import numpy as np

# Dictionary containing atomic mass, atomic number, covalent radius, num valence electrons
# Data from http://www.webelements.com/ (last accessed May 13th 2015)
Expand Down Expand Up @@ -463,6 +464,28 @@
[[70.5, 70.5, 67.7, 67.7, 120, 120, 135.5, 135.5]]
}

#need each point to be distance 1 from the origin
deg_to_rad = 2*np.pi / 360
all_polyhedra = {
"linear": np.array([(1, 0, 0), (-1, 0, 0)]),
"bent": np.array([(1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0)]),
"trigonal planar": np.array([(1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 0)]),
"T shape": np.array([(1, 0, 0), (np.cos(90*deg_to_rad), np.sin(90*deg_to_rad), 0), (np.cos(180*deg_to_rad), np.sin(180*deg_to_rad), 0)]),
"trigonal pyramidal": np.array([(1, -1, -1), (-1, 1, -1), (-1, -1, 1)])/np.sqrt(3),
"tetrahedral": np.array([(1, 1, 1), (1, -1, -1), (-1, 1, -1), (-1, -1, 1)])/np.sqrt(3),
"square planar": np.array([(1, 0, 0), (0, 1, 0), (-1, 0, 0), (0, -1, 0)]),
"seesaw": np.array([(1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0), (0, 0, 1), (0, 0, -1)]),
"trigonal bipyramidal": np.array([(0, 0, 1), (0, 0, -1), (1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 0)]),
"square pyramidal": np.array([(1, 0, 0), (0, 1, 0), (-1, 0, 0), (0, -1, 0), (0, 0, 1)]),
"pentagonal planar": np.array([(1, 0, 0), (np.cos(72*deg_to_rad), np.sin(72*deg_to_rad), 0), (np.cos(144*deg_to_rad), np.sin(144*deg_to_rad), 0), (np.cos(216*deg_to_rad), np.sin(216*deg_to_rad), 0), (np.cos(288*deg_to_rad), np.sin(288*deg_to_rad), 0)]),
"octahedral": np.array([(1, 0, 0), (0, 1, 0), (-1, 0, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]),
"pentagonal pyramidal": np.array([(1, 0, 0), (np.cos(72*deg_to_rad), np.sin(72*deg_to_rad), 0), (np.cos(144*deg_to_rad), np.sin(144*deg_to_rad), 0), (np.cos(216*deg_to_rad), np.sin(216*deg_to_rad), 0), (np.cos(288*deg_to_rad), np.sin(288*deg_to_rad), 0), (0, 0, 1)]),
"trigonal prismatic": np.array([(1, 0, 1), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 1), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 1), (1, 0, -1), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), -1), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), -1)])/np.sqrt(2),
"pentagonal bipyramidal": np.array([(1, 0, 0), (np.cos(72*deg_to_rad), np.sin(72*deg_to_rad), 0), (np.cos(144*deg_to_rad), np.sin(144*deg_to_rad), 0), (np.cos(216*deg_to_rad), np.sin(216*deg_to_rad), 0), (np.cos(288*deg_to_rad), np.sin(288*deg_to_rad), 0), (0, 0, 1), (0, 0, -1)]),
"square antiprismatic": np.array([(1, 0, 1), (0, 1, 1), (-1, 0, 1), (0, -1, 1), (np.cos(45*deg_to_rad), np.sin(45*deg_to_rad), -1), (np.cos(135*deg_to_rad), np.sin(135*deg_to_rad), -1), (np.cos(225*deg_to_rad), np.sin(225*deg_to_rad), -1), (np.cos(315*deg_to_rad), np.sin(315*deg_to_rad), -1)])/np.sqrt(2),
"tricapped trigonal prismatic": np.array([(1, 0, 1)/np.sqrt(2), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 1)/np.sqrt(2), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 1)/np.sqrt(2), (1, 0, -1)/np.sqrt(2), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), -1)/np.sqrt(2), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), -1)/np.sqrt(2), (np.cos(60*deg_to_rad), np.sin(60*deg_to_rad), 0), (np.cos(180*deg_to_rad), np.sin(180*deg_to_rad), 0), (np.cos(300*deg_to_rad), np.sin(300*deg_to_rad), 0)])
}

# Module for running bash commands
# @param cmd String containing command to be run
# @return bash output string
Expand Down Expand Up @@ -720,6 +743,16 @@ def get_all_angle_refs(self):
"""
return all_angle_refs

def get_all_polyhedra(self):
"""Get reference polyhedra dict.
Returns
-------
all_polyhedra : dict
Reference polyhedra for various geometries.
"""
return all_polyhedra

def add_custom_path(self, path):
"""Record custom path in ~/.molSimplify file
Expand Down
176 changes: 172 additions & 4 deletions molSimplify/Classes/mol3D.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from openbabel import openbabel # version 3 style import
except ImportError:
import openbabel # fallback to version 2
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, Dict, Any
from scipy.spatial import ConvexHull
from molSimplify.utils.decorators import deprecated

Expand All @@ -25,7 +25,8 @@
from molSimplify.Scripts.geometry import (distance, connectivity_match,
vecangle, rotation_params,
rotate_around_axis)
from molSimplify.Scripts.rmsd import rigorous_rmsd
from molSimplify.Scripts.rmsd import rigorous_rmsd, kabsch_rmsd, kabsch_rotate
from itertools import permutations

try:
import PyQt5 # noqa: F401
Expand Down Expand Up @@ -4918,7 +4919,7 @@ def Structure_inspection(self, init_mol=None, catoms_arr=None, num_coord=5, dict
num_coord=num_coord, debug=debug)
return flag_oct, flag_list, dict_oct_info, flag_oct_loose, flag_list_loose

def get_fcs(self, strict_cutoff=False, catom_list=None):
def get_fcs(self, strict_cutoff=False, catom_list=None, max6=True):
"""
Get first coordination shell of a transition metal complex.
Expand All @@ -4928,6 +4929,8 @@ def get_fcs(self, strict_cutoff=False, catom_list=None):
strict bonding cutoff for fullerene and SACs
catom_list : list, optional
List of indices of coordinating atoms.
max6 : bool, optional
If True, will return catoms from oct_comp.
Returns
-------
Expand All @@ -4938,7 +4941,7 @@ def get_fcs(self, strict_cutoff=False, catom_list=None):
self.get_num_coord_metal(debug=False, strict_cutoff=strict_cutoff, catom_list=catom_list)
catoms = self.catoms
# print(catoms, [self.getAtom(x).symbol() for x in catoms])
if len(catoms) > 6:
if max6 and len(catoms) > 6:
_, catoms = self.oct_comp(debug=False)
fcs = [metalind] + catoms
return fcs
Expand Down Expand Up @@ -5683,6 +5686,171 @@ def get_geometry_type(self, dict_check=False, angle_ref=False,
}
return results

def get_geometry_type_distance(
self, max_dev=1e6, close_dev=1e-2,
flag_catoms=False, catoms_arr=None,
skip=False, transition_metals_only=False) -> Dict[str, Any]:
"""
Get the type of the geometry (available options in globalvars all_geometries).
uses hapticity truncated first coordination shell.
Does not require the input of num_coord.
Parameters
----------
max_dev : float, optional
Maximum RMSD allowed between a structure and an ideal geometry before it is classified as unknown. Default is 1e6.
close_dev : float, optional
Maximum difference in RMSD between two classifications allowed before they are compared by maximum single-atom deviation as well.
flag_catoms : bool, optional
Whether or not to return the catoms arr. Default as False.
catoms_arr : Nonetype, optional
Uses the catoms of the mol3D by default. User and overwrite this connection atom array by explicit input.
Default is Nonetype.
skip : list, optional
Geometry checks to skip. Default is False.
transition_metals_only : bool, optional
Flag for considering more than just transition metals as metals. Default is False.
Returns
-------
results : dictionary
Contains the classified geometry and the RMSD from an ideal structure.
Summary contains a list of the RMSD and the maximum single-atom deviation for all considered geometry types.
"""

first_shell, hapt = self.get_first_shell()
num_coord = first_shell.natoms - 1
all_geometries = globalvars().get_all_geometries()
all_polyhedra = globalvars().get_all_polyhedra()
summary = {}

if len(first_shell.graph): # Find num_coord based on metal_cn if graph is assigned
if len(first_shell.findMetal()) > 1:
raise ValueError('Multimetal complexes are not yet handled.')
elif len(first_shell.findMetal(transition_metals_only=transition_metals_only)) == 1:
# Use oct=False to ensure coordination number based on radius cutoffs only
num_coord = len(first_shell.getBondedAtomsSmart(first_shell.findMetal(transition_metals_only=transition_metals_only)[0], oct=False))
else:
raise ValueError('No metal centers exist in this complex.')
if catoms_arr is not None and len(catoms_arr) != num_coord:
raise ValueError("num_coord and the length of catoms_arr do not match.")

if num_coord not in list(all_geometries.keys()):
# should we indicate somehow that these are unknown due to a different coordination number?
results = {
"geometry": "unknown",
"rmsd": np.NAN,
"summary": {},
"hapticity": hapt,
"close_rmsds": False
}
return results

possible_geometries = all_geometries[num_coord]

# for each same-coordinated geometry, get the minimum RMSD and the maximum single-atom deviation in that pairing
for geotype in possible_geometries:
rmsd_calc, max_dist = self.dev_from_ideal_geometry(all_polyhedra[geotype])
summary.update({geotype: [rmsd_calc, max_dist]})

close_rmsds = False
current_rmsd, geometry = max_dev, "unknown"
for geotype in summary:
# if the RMSD for this structure is the lowest seen so far (within a threshold)
if summary[geotype][0] < (current_rmsd + close_dev):
# if the RMSDs are close, flag this in the summary and classify on second criterion
if np.abs(summary[geotype][0] - current_rmsd) < close_dev:
close_rmsds = True
if summary[geotype][1] < summary[geometry][1]:
# classify based on largest singular deviation
current_rmsd = summary[geotype][0]
geometry = geotype
else:
current_rmsd = summary[geotype][0]
geometry = geotype

results = {
"geometry": geometry,
"rmsd": current_rmsd,
"summary": summary,
"hapticity": hapt,
"close_rmsds": close_rmsds
}
return results

def dev_from_ideal_geometry(self, ideal_polyhedron: np.ndarray) -> Tuple[float, float]:
"""
Return the minimum RMSD between a geometry and an ideal polyhedron (with the same average bond distances).
Enumerates all possible indexing of the geometry. As such, only recommended for small systems.
Parameters
----------
ideal_polyhedron: np.array of 3-tuples of coordinates
Reference list of points for an ideal geometry
Returns
-------
rmsd: float
Minimum root mean square distance between the fed geometry and the ideal polyhedron
single_dev: float
Maximum distance between any paired points in the fed geometry and the ideal polyhedron.
"""

metal_idx = self.findMetal()
if len(metal_idx) == 0:
raise ValueError('No metal centers exist in this complex.')
elif len(metal_idx) != 1:
raise ValueError('Multimetal complexes are not yet handled.')
temp_mol = self.get_first_shell()[0]
fcs_indices = temp_mol.get_fcs(max6=False)
# remove metal index from first coordination shell
fcs_indices.remove(temp_mol.findMetal()[0])

if len(fcs_indices) != len(ideal_polyhedron):
raise ValueError('The coordination number differs between the two provided structures.')

# have to redo getting metal_idx with the new mol after running get_first_shell
# want to work with temp_mol since it has the edge and sandwich logic implemented to replace those with centroids
metal_atom = temp_mol.getAtoms()[temp_mol.findMetal()[0]]
fcs_atoms = [temp_mol.getAtoms()[i] for i in fcs_indices]
# construct a np array of the non-metal atoms in the FCS
distances = []
positions = np.zeros([len(fcs_indices), 3])
for idx, atom in enumerate(fcs_atoms):
distance = atom.distance(metal_atom)
distances.append(distance)
# shift so the metal is at (0, 0, 0)
positions[idx, :] = np.array(atom.coords()) - np.array(metal_atom.coords())

current_min = np.inf
orders = permutations(range(len(ideal_polyhedron)))
max_dist = 0

# if desired, make it so the ideal polyhedron has same average bond distance as the mol
# scaled_polyhedron = ideal_polyhedron * np.mean(np.array(distances))

# for all possible assignments, find RMSD between ideal and actual structure
ideal_positions = np.zeros([len(fcs_indices), 3])
for order in orders:
for i in range(len(order)):
# if you wanted to use the same average bond length for all, use the following
# ideal_positions[i, :] = scaled_polyhedron[order[i]]
# if you want to let each ligand scale its length independently, uncomment the following
ideal_positions[i, :] = ideal_polyhedron[order[i]] * distances[i]
rmsd_calc = kabsch_rmsd(ideal_positions, positions)
if rmsd_calc < current_min:
current_min = rmsd_calc
# calculate and store the maximum pairwise distance
rot_ideal = kabsch_rotate(ideal_positions, positions)
diff_matrix = rot_ideal - positions
pairwise_dists = np.sum(diff_matrix**2, axis=1)
max_dist = np.max(pairwise_dists)

# return minimum RMSD, maximum pairwise distance in that structure
return current_min, max_dist

def get_features(self, lac=True, force_generate=False, eq_sym=False,
use_dist=False, NumB=False, Gval=False, size_normalize=False,
alleq=False, strict_cutoff=False, catom_list=None, MRdiag_dict={}, depth=3):
Expand Down
12 changes: 6 additions & 6 deletions molSimplify/Data/tbp.dat
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
0.0 0.0 0.0
-1.7865930597 -0.0003546225 -0.0007425232
1.7865967124 -0.0003546332 -0.0007425118
2.86499999724e-07 1.382576768 -1.1498771629
2.92799999801e-07 0.2985354596 1.7728580026
4.09099999654e-07 -1.6828658296 -0.6318871017
0.00000 0.00000 0.00000
2.00000 0.00000 0.00000
-1.00000 1.73205 0.00000
-1.00000 -1.73205 0.00000
0.00000 0.00000 2.00000
0.00000 0.00000 -2.00000
6 changes: 3 additions & 3 deletions molSimplify/Data/tpl.dat
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
0.00000 0.00000 0.00000
-1.58487 0.39844 0.52472
0.77037 -1.41380 0.59470
0.78682 0.99734 -1.17123
2.00000 0.00000 0.00000
-1.00000 1.73205 0.00000
-1.00000 -1.73205 0.00000
66 changes: 66 additions & 0 deletions tests/test_mol3D.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
from molSimplify.Classes.mol3D import mol3D
from molSimplify.Classes.atom3D import atom3D
from molSimplify.Classes.globalvars import globalvars


def test_adding_and_deleting_atoms():
Expand Down Expand Up @@ -335,3 +336,68 @@ def test_mol3D_from_smiles_benzene():

np.testing.assert_allclose(mol.graph, ref_graph)
np.testing.assert_allclose(mol.bo_graph, ref_bo_graph)


@pytest.mark.parametrize(
"geo_type, key",
[
('linear', 'linear'),
('trigonal_planar', 'trigonal planar'),
('t_shape', 'T shape'),
('trigonal_pyramidal', 'trigonal pyramidal'),
('tetrahedral', 'tetrahedral'),
('square_planar', 'square planar'),
('seesaw', 'seesaw'),
('trigonal_bipyramidal', 'trigonal bipyramidal'),
('square_pyramidal', 'square pyramidal'),
('pentagonal_planar', 'pentagonal planar'),
('octahedral', 'octahedral'),
('pentagonal_pyramidal', 'pentagonal pyramidal'),
# ('trigonal_prismatic', 'trigonal prismatic'),
# ('pentagonal_bipyramidal', 'pentagonal bipyramidal'),
# ('square_antiprismatic', 'square antiprismatic'),
# ('tricapped_trigonal_prismatic', 'tricapped trigonal prismatic'),
]
)
def test_dev_from_ideal_geometry(resource_path_root, geo_type, key):
mol = mol3D()
mol.readfromxyz(resource_path_root / "inputs" / "geometry_type" / f"{geo_type}.xyz")

globs = globalvars()
polyhedra = globs.get_all_polyhedra()
rmsd, max_dev = mol.dev_from_ideal_geometry(polyhedra[key])

print(polyhedra[key])

assert rmsd < 1e-3
assert max_dev < 1e-3


@pytest.mark.parametrize(
"geo_type, ref",
[
('linear', 'linear'),
('trigonal_planar', 'trigonal planar'),
('t_shape', 'T shape'),
('trigonal_pyramidal', 'trigonal pyramidal'),
('tetrahedral', 'tetrahedral'),
('square_planar', 'square planar'),
('seesaw', 'seesaw'),
('trigonal_bipyramidal', 'trigonal bipyramidal'),
('square_pyramidal', 'square pyramidal'),
('pentagonal_planar', 'pentagonal planar'),
('octahedral', 'octahedral'),
('pentagonal_pyramidal', 'pentagonal pyramidal'),
('trigonal_prismatic', 'trigonal prismatic'),
# ('pentagonal_bipyramidal', 'pentagonal bipyramidal'),
# ('square_antiprismatic', 'square antiprismatic'),
# ('tricapped_trigonal_prismatic', 'tricapped trigonal prismatic'),
]
)
def test_geo_geometry_type_distance(resource_path_root, geo_type, ref):
mol = mol3D()
mol.readfromxyz(resource_path_root / "inputs" / "geometry_type" / f"{geo_type}.xyz")

result = mol.get_geometry_type_distance()
print(result)
assert result['geometry'] == ref
26 changes: 13 additions & 13 deletions tests/testresources/inputs/geometry_type/seesaw.xyz
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
13
07/15/2022 21:19, XYZ structure generated by mol3D Class, molSimplify
04/30/2024 13:23, XYZ structure generated by mol3D Class, molSimplify
Fe 0.000000 0.000000 0.000000
O -2.120000 -0.000421 -0.000881
H -2.730515 0.026086 0.779322
H -2.727124 -0.028395 -0.783794
O 2.120000 -0.000421 -0.000881
H 2.728823 -0.572514 -0.533990
H 2.728652 0.572801 0.531233
O 0.000000 1.629945 -1.355611
H 0.153088 1.607265 -2.334413
H -0.153473 2.588291 -1.155199
O 0.000000 -1.984703 -0.745222
H 0.126817 -2.825828 -0.236778
H -0.126096 -2.282875 -1.681767
O 2.120000 0.000000 0.000000
H 2.726198 0.027327 -0.782547
H 2.726184 -0.027327 0.782547
O -1.060000 1.835974 0.000000
H -1.410402 2.333646 -0.781116
H -1.315789 2.388255 0.781116
O 0.000000 0.000000 2.120000
H -0.293326 -0.726007 2.726198
H 0.293326 0.726007 2.726184
O 0.000000 -0.000000 -2.120000
H 0.222961 0.750609 -2.726198
H -0.222961 -0.750609 -2.726184

0 comments on commit ba0ef66

Please sign in to comment.