In [None]:
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from ase import Atoms
from pymatgen.core.structure import Molecule
from pymatgen.analysis.graphs import MoleculeGraph
from pymatgen.analysis.local_env import OpenBabelNN
import networkx as nx
import networkx.algorithms.isomorphism as iso
import rmsd

In [None]:
ts_methods = ['dft1', 'nn1', 'nn0', 'nn0dft1']
irc_methods = ['dft1', 'nn1', 'nn1', 'dft1']
noise_levels = ['00']
indices = pd.MultiIndex.from_tuples([(rxn, noise, ts_method, irc_method) for rxn in range(265) for noise in noise_levels for (ts_method, irc_method) in zip(ts_methods, irc_methods)], names=['rxn', 'noise', 'ts_method', 'irc_method'])
result = pd.DataFrame(index=indices)
rxns = [
    # no match with training set
    [2, 5, 7, 16, 18, 21, 25, 26, 27, 29, 30, 31, 32, 34, 36, 38, 40, 41, 44, 47, 48, 49, 51, 52, 54, 55, 58, 62, 67, 68, 69, 70, 73, 74, 76, 78, 81, 82, 86, 87, 89, 93, 94, 95, 97, 98, 99, 101, 103, 105, 107, 110, 113, 114, 116, 117, 118, 122, 123, 125, 126, 132, 138, 141, 143, 145, 150, 152, 154, 157, 158, 160, 161, 162, 165, 167, 173, 174, 181, 182, 183, 187, 188, 191, 193, 194, 196, 197, 198, 209, 210, 211, 214, 218, 219, 221, 223, 226, 227, 231, 232, 234, 236, 237, 238, 240, 242, 243, 244, 248, 249, 250, 252, 254, 261, 262], 
    # 1-end match with training set
    [0, 1, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 19, 20, 23, 24, 28, 33, 35, 37, 39, 42, 45, 46, 56, 57, 59, 60, 63, 64, 65, 66, 71, 77, 79, 83, 84, 85, 88, 90, 91, 92, 96, 100, 104, 106, 108, 109, 111, 112, 115, 119, 120, 121, 124, 127, 128, 129, 131, 133, 134, 135, 137, 139, 140, 146, 148, 149, 151, 153, 155, 156, 159, 163, 164, 166, 169, 170, 171, 172, 175, 176, 177, 178, 179, 180, 184, 185, 186, 189, 190, 192, 195, 199, 200, 201, 202, 203, 204, 205, 206, 207, 212, 213, 215, 220, 222, 224, 225, 229, 235, 239, 245, 246, 247, 251, 253, 255, 256, 257, 258, 259, 260, 263, 264], 
    # 2-end match with training set
    [15, 17, 22, 43, 50, 53, 61, 72, 75, 80, 102, 130, 136, 142, 144, 147, 168, 208, 216, 217, 228, 230, 233, 241],
    ]

for rxn in tqdm(range(265)):
    try:
        molecule = Molecule.from_file(f'Data/molecules_fromscratch_noised_renamed/{rxn:03}noise{noise}_TS_{ts_method}.xyz')
        assert rxn not in rxns[2]
    except:
        continue

    for noise in noise_levels:
        for ts_method, irc_method in zip(ts_methods, irc_methods):
            try:
                output = json.load(open(f'20230706_Quacc/outputs/{rxn:03}noise{noise}_TS_{ts_method}.json', 'r'))
                if output['nsteps'] < 1000:
                    trajectory = [np.array(json.loads(mol['atoms']['atoms_json'])['positions']['__ndarray__'][-1]).reshape(-1, 3) for mol in output['trajectory']]
                    path_len = [rmsd.kabsch_rmsd(trajectory[i], trajectory[i+1], translate=True) for i in range(len(trajectory)-1)]
                    path_disp = rmsd.kabsch_rmsd(trajectory[0], trajectory[-1], translate=True)
                    result.loc[(rxn, noise, ts_method, irc_method), 'path_len'] = sum(path_len)
                    result.loc[(rxn, noise, ts_method, irc_method), 'path_disp'] = path_disp
                    result.loc[(rxn, noise, ts_method, irc_method), 'nsteps'] = output['nsteps']
                    result.loc[(rxn, noise, ts_method, irc_method), 'nsteps_all'] = len(output['trajectory'])
                    result.loc[(rxn, noise, ts_method, irc_method), 'step_size'] = np.mean(path_len)
                result.loc[(rxn, noise, ts_method, irc_method), 'rxn_status'] = 'converged'
            except FileNotFoundError:
                result.loc[(rxn, noise, ts_method, irc_method), 'path_len'] = np.nan
                result.loc[(rxn, noise, ts_method, irc_method), 'path_disp'] = np.nan
                result.loc[(rxn, noise, ts_method, irc_method), 'nsteps'] = np.nan
                result.loc[(rxn, noise, ts_method, irc_method), 'nsteps_all'] = np.nan
                result.loc[(rxn, noise, ts_method, irc_method), 'step_size'] = np.nan
                result.loc[(rxn, noise, ts_method, irc_method), 'rxn_status'] = 'TS errored'
                result.loc[(rxn, noise, ts_method, irc_method), 'endpoint_match'] = 'TS errored'
                continue
            
            try:
                output_1 = json.load(open(f'20230706_Quacc/outputs/{rxn:03}noise{noise}_R_{ts_method}_{irc_method}.json', 'r'))
                json_1 = json.loads(output_1['atoms']['atoms_json'])
                molecule_1 = Atoms(
                    numbers=np.array(json_1['numbers']['__ndarray__'][-1]), 
                    positions=np.array(json_1['positions']['__ndarray__'][-1]).reshape(-1, 3),
                    )
                graph_1 = nx.Graph(MoleculeGraph.with_local_env_strategy(molecule_1, OpenBabelNN()).graph)
                nx.set_node_attributes(graph_1, {idx: idx for idx in graph_1.nodes()}, 'index')
            except FileNotFoundError:
                result.loc[(rxn, noise, ts_method, irc_method), 'rxn_status'] = 'IRC errored'
                result.loc[(rxn, noise, ts_method, irc_method), 'endpoint_match'] = 'IRC errored'
                continue
            try:
                output_2 = json.load(open(f'20230706_Quacc/outputs/{rxn:03}noise{noise}_P_{ts_method}_{irc_method}.json', 'r'))
                json_2 = json.loads(output_2['atoms']['atoms_json'])
                molecule_2 = Atoms(
                    numbers=np.array(json_2['numbers']['__ndarray__'][-1]),
                    positions=np.array(json_2['positions']['__ndarray__'][-1]).reshape(-1, 3),
                    )
                graph_2 = nx.Graph(MoleculeGraph.with_local_env_strategy(molecule_2, OpenBabelNN()).graph)
                nx.set_node_attributes(graph_2, {idx: idx for idx in graph_2.nodes()}, 'index')
            except FileNotFoundError:
                result.loc[(rxn, noise, ts_method, irc_method), 'rxn_status'] = 'IRC errored'
                result.loc[(rxn, noise, ts_method, irc_method), 'endpoint_match'] = 'IRC errored'
            
            if rmsd.kabsch_rmsd(molecule_1.cart_coords, molecule_2.cart_coords, translate=True) < 0.1:
                result.loc[(rxn, noise, ts_method, irc_method), 'rxn_status'] = 'Conformational change'
            elif nx.is_isomorphic(graph_1, graph_2, node_match=iso.numerical_node_match('index', -1)):
                result.loc[(rxn, noise, ts_method, irc_method), 'rxn_status'] = 'Conformational change'
            else:
                result.loc[(rxn, noise, ts_method, irc_method), 'rxn_status'] = 'Chemical reaction'

            graph_3 = nx.Graph(np.array([line.split() for line in open(f'Data/molecules_kinbotprod_renamed/{rxn:03}_R.bond', 'r').readlines()], dtype=np.float64))
            graph_4 = nx.Graph(np.array([line.split() for line in open(f'Data/molecules_kinbotprod_renamed/{rxn:03}_P.bond', 'r').readlines()], dtype=np.float64))
            nx.set_node_attributes(graph_3, {idx: idx for idx in graph_3.nodes()}, 'index')
            nx.set_node_attributes(graph_4, {idx: idx for idx in graph_4.nodes()}, 'index')

            graph_compares = (
                nx.is_isomorphic(graph_1, graph_3, node_match=iso.numerical_node_match('index', -1)), 
                nx.is_isomorphic(graph_2, graph_4, node_match=iso.numerical_node_match('index', -1)), 
                nx.is_isomorphic(graph_1, graph_4, node_match=iso.numerical_node_match('index', -1)), 
                nx.is_isomorphic(graph_2, graph_3, node_match=iso.numerical_node_match('index', -1)),
            )
            match graph_compares:
                case (True, True, False, False): 
                    result.loc[(rxn, noise, ts_method, irc_method), 'endpoint_match'] = '2-end match' 
                case (False, False, True, True): 
                    result.loc[(rxn, noise, ts_method, irc_method), 'endpoint_match'] = '2-end match' 
                case (True, False, False, False) | (False, True, False, False) | (False, False, True, False) | (False, False, False, True): 
                    result.loc[(rxn, noise, ts_method, irc_method), 'endpoint_match'] = '1-end match' 
                case (True, False, False, True) | (False, True, True, False):
                    result.loc[(rxn, noise, ts_method, irc_method), 'endpoint_match'] = '1-end match'
                case (False, False, False, False):
                    result.loc[(rxn, noise, ts_method, irc_method), 'endpoint_match'] = 'No match'    # TS failure / No match
                case _:
                    result.loc[(rxn, noise, ts_method, irc_method), 'endpoint_match'] = 'Unknown'
                    print(rxn, noise, ts_method, graph_compares)
           
result_pretty = result.reset_index()
count = result_pretty.groupby(['ts_method', 'noise'])['endpoint_match'].value_counts()
print(count)

In [None]:
result_pretty = result.reset_index()
fig, axs = plt.subplots(1, 2, figsize=(4, 3), sharex=True, sharey=True)

count = result_pretty.groupby(['ts_method', 'noise'])['endpoint_match'].value_counts()
noise_levels = ['00']
methods = ['dft1', 'nn1', 'nn0']
colors = ['tab:blue', 'tab:green', 'tab:orange']
groups = ['2-end match', '1-end match', 'No match']
hatches = [None, '//', None]
edges = ['-', '-', '-']
fills = [True, False, False]
legends = [True, True, True]
for noise_, noise in enumerate(noise_levels):
    for method_, method in enumerate(methods):
        bottom = 0
        for group_, group in enumerate(groups):
            try:
                axs[0].bar(
                    method_,
                    count[(method, noise, group)], 
                    bottom=bottom, 
                    width=0.7,
                    hatch=hatches[group_], 
                    fill=fills[group_], 
                    linestyle=edges[group_],
                    color=colors[method_], 
                    edgecolor=colors[method_],
                    label=group if legends[group_] else None,
                    )
                bottom += count[(method, noise, group)]
                legends[group_] = False
            except KeyError:
                pass
leg = axs[0].legend(loc='lower center', bbox_to_anchor=(0.5, 1))
for patch in leg.get_patches():
    patch.set_color('tab:gray')
axs[0].set_xticks(range(len(methods)), ['' for method in methods])
axs[0].set_ylabel('Reaction count')

count = result_pretty.groupby(['ts_method', 'noise'])['rxn_status'].value_counts()
noise_levels = ['00']
methods = ['dft1', 'nn1', 'nn0']
colors = ['tab:blue', 'tab:green', 'tab:orange']
groups = ['Chemical reaction', 'Conformational change']
hatches = [None, None, None]
edges = ['-', '-', '--']
fills = [True, False, False]
legends = [True, True, True]
for noise_, noise in enumerate(noise_levels):
    for method_, method in enumerate(methods):
        bottom = 0
        for group_, group in enumerate(groups):
            try:
                axs[1].bar(
                    method_,
                    count[(method, noise, group)], 
                    bottom=bottom, 
                    width=0.7,
                    hatch=hatches[group_], 
                    fill=fills[group_], 
                    linestyle=edges[group_],
                    color=colors[method_], 
                    edgecolor=colors[method_],
                    label=group.split()[0] if legends[group_] else None,
                    )
                bottom += count[(method, noise, group)]
                legends[group_] = False
            except KeyError:
                pass
leg0 = axs[1].legend(['QN Hessian (DFT)', 'QN Hessian (NewtonNet)', 'Full Hessian (NewtonNet)'], loc='lower right', framealpha=1)
for patch_, patch in enumerate(leg0.get_patches()):
    patch.set_color(colors[patch_])
    patch.set_fill(False)
leg = axs[1].legend(loc='lower center', bbox_to_anchor=(0.5, 1))
for patch in leg.get_patches():
    patch.set_color('tab:gray')
axs[1].add_artist(leg0)
axs[1].set_xticks(range(len(methods)), ['' for method in methods])

fig.text(0.5, 0, 'Optimization method', ha='center')
plt.savefig('ReactionAccuracy.pdf', bbox_inches='tight')

In [None]:
result_pretty = result.reset_index()
fig, axs = plt.subplots(1, 2, figsize=(4, 3), sharex=True, sharey=True)

count = result_pretty.groupby(['ts_method', 'noise'])['endpoint_match'].value_counts()
noise_levels = ['00']
methods = ['dft1', 'nn0dft1']
colors = ['tab:blue', 'tab:purple']
groups = ['2-end match', '1-end match', 'No match']
hatches = [None, '//', None]
edges = ['-', '-', '-']
fills = [True, False, False]
legends = [True, True, True]
for noise_, noise in enumerate(noise_levels):
    for method_, method in enumerate(methods):
        bottom = 0
        for group_, group in enumerate(groups):
            try:
                axs[0].bar(
                    method_,
                    count[(method, noise, group)], 
                    bottom=bottom, 
                    width=0.7,
                    hatch=hatches[group_], 
                    fill=fills[group_], 
                    linestyle=edges[group_],
                    color=colors[method_], 
                    edgecolor=colors[method_],
                    label=group if legends[group_] else None,
                    )
                bottom += count[(method, noise, group)]
                legends[group_] = False
            except KeyError:
                pass
leg = axs[0].legend(loc='lower center', bbox_to_anchor=(0.5, 1))
for patch in leg.get_patches():
    patch.set_color('tab:gray')
axs[0].set_xticks(range(len(methods)), ['' for method in methods])
axs[0].set_ylabel('Reaction count')

count = result_pretty.groupby(['ts_method', 'noise'])['rxn_status'].value_counts()
noise_levels = ['00']
methods = ['dft1', 'nn0dft1']
colors = ['tab:blue', 'tab:purple']
groups = ['Chemical reaction', 'Conformational change']
hatches = [None, None, None]
edges = ['-', '-', '--']
fills = [True, False, False]
legends = [True, True, True]
for noise_, noise in enumerate(noise_levels):
    for method_, method in enumerate(methods):
        bottom = 0
        for group_, group in enumerate(groups):
            try:
                axs[1].bar(
                    method_,
                    count[(method, noise, group)], 
                    bottom=bottom, 
                    width=0.7,
                    hatch=hatches[group_], 
                    fill=fills[group_], 
                    linestyle=edges[group_],
                    color=colors[method_], 
                    edgecolor=colors[method_],
                    label=group.split()[0] if legends[group_] else None,
                    )
                bottom += count[(method, noise, group)]
                legends[group_] = False
            except KeyError:
                pass
leg0 = axs[1].legend(['DFT from KinBot', 'DFT from NewtonNet'], loc='lower right', framealpha=1)
for patch_, patch in enumerate(leg0.get_patches()):
    patch.set_color(colors[patch_])
    patch.set_fill(False)
leg = axs[1].legend(loc='lower center', bbox_to_anchor=(0.5, 1))
for patch in leg.get_patches():
    patch.set_color('tab:gray')
axs[1].add_artist(leg0)
axs[1].set_xticks(range(len(methods)), ['' for method in methods])

fig.text(0.5, 0, 'Optimization method', ha='center')
plt.savefig('ReactionAccuracyReopt.pdf', bbox_inches='tight')

In [None]:
result_pretty = result.reset_index()
count = result_pretty.groupby(['ts_method', 'noise'])['endpoint_match'].value_counts()
noise_levels = ['00', '01', '02', '05', '10']
labels = ['QN Hessian (DFT)', 'QN Hessian (NewtonNet)', 'Full Hessian (NewtonNet)']
colors = ['tab:blue', 'tab:green', 'tab:orange']

plt.figure(figsize=(4, 3))
plt.plot(np.array(['00', '01', '10'], dtype=int), count['dft1'].loc[noise_levels, '2-end match'].values, 'o-', color=colors[0], label=labels[0])
plt.plot(np.array(noise_levels, dtype=int), count['nn1'].loc[noise_levels, '2-end match'].values, 'o-', color=colors[1], label=labels[1])
plt.plot(np.array(noise_levels, dtype=int), count['nn0'].loc[noise_levels, '2-end match'].values, 'o-', color=colors[2], label=labels[2])
plt.legend()
plt.xscale('symlog')
plt.xticks(range(11), [0, 1, '', '', '', '', '', '', '', '', 10])
plt.xlabel('Noise level (pm)')
plt.ylabel('2-end match count')
plt.savefig('NoisedAccuracy.pdf', bbox_inches='tight')