# Model evaluation



## Simple RMSEs

In [1]:
import os
import warnings
from ase.io import read
from aseMolec import extAtoms as ea
from aseMolec import pltProps as pp
import matplotlib.pyplot as plt
import numpy as np

warnings.filterwarnings("ignore")

# Create output directory if it doesn't exist
os.makedirs("tests/mace_learncurve/", exist_ok=True)

# Evaluate the model on training set
!mace_eval_configs \
    --configs="data/solvent_xtb_train_4000.xyz" \
    --model="MACE_models/mace_learncurve_train4000_swa_compiled.model" \
    --output="tests/mace_learncurve/solvent_train.xyz"

# Evaluate the model on test set
!mace_eval_configs \
    --configs="data/solvent_xtb_test.xyz" \
    --model="MACE_models/mace_learncurve_train4000_swa_compiled.model" \
    --output="tests/mace_learncurve/solvent_test.xyz"

# Plotting function for one property at a time
def plot_and_save(x, y, title, xlabel, ylabel, filename):
    plt.figure(figsize=(4, 4), dpi=100)
    pp.plot_prop(x, y, title=title, labs=['XTB', 'MACE'], rel=False)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.tight_layout()
    plt.savefig(filename, dpi=300)
    plt.close()

# Load data
train_data = read('tests/mace_learncurve/solvent_train.xyz', ':')
test_data = train_data[:3] + read('tests/mace_learncurve/solvent_test.xyz', ':')

# Rename properties for compatibility
ea.rename_prop_tag(train_data, 'MACE_energy', 'energy_mace')
ea.rename_prop_tag(train_data, 'MACE_forces', 'forces_mace')
ea.rename_prop_tag(train_data, 'energy_xtb', 'energy_xtb')
ea.rename_prop_tag(train_data, 'forces_xtb', 'forces_xtb')

ea.rename_prop_tag(test_data, 'MACE_energy', 'energy_mace')
ea.rename_prop_tag(test_data, 'MACE_forces', 'forces_mace')
ea.rename_prop_tag(test_data, 'energy_xtb', 'energy_xtb')
ea.rename_prop_tag(test_data, 'forces_xtb', 'forces_xtb')



# Train set plots
plot_and_save(
    ea.get_prop(train_data, 'bind', '_xtb', True).flatten(),
    ea.get_prop(train_data, 'bind', '_mace', True).flatten(),
    title='Atomization Energy (Train)',
    xlabel='XTB (eV/atom)',
    ylabel='MACE (eV/atom)',
    filename='tests/mace_learncurve/rmse_atomization_train.png'
)

plot_and_save(
    ea.get_prop(train_data, 'info', 'energy_xtb', True).flatten(),
    ea.get_prop(train_data, 'info', 'energy_mace', True).flatten(),
    title='Total Energy (Train)',
    xlabel='XTB (eV/atom)',
    ylabel='MACE (eV/atom)',
    filename='tests/mace_learncurve/rmse_energy_train.png'
)

plot_and_save(
    np.concatenate(ea.get_prop(train_data, 'arrays', 'forces_xtb')).flatten(),
    np.concatenate(ea.get_prop(train_data, 'arrays', 'forces_mace')).flatten(),
    title='Forces (Train)',
    xlabel='XTB (eV/Å)',
    ylabel='MACE (eV/Å)',
    filename='tests/mace_learncurve/rmse_forces_train.png'
)

# Test set plots
plot_and_save(
    ea.get_prop(test_data, 'bind', '_xtb', True).flatten(),
    ea.get_prop(test_data, 'bind', '_mace', True).flatten(),
    title='Atomization Energy (Test)',
    xlabel='XTB (eV/atom)',
    ylabel='MACE (eV/atom)',
    filename='tests/mace_learncurve/rmse_atomization_test.png'
)

plot_and_save(
    ea.get_prop(test_data, 'info', 'energy_xtb', True).flatten(),
    ea.get_prop(test_data, 'info', 'energy_mace', True).flatten(),
    title='Total Energy (Test)',
    xlabel='XTB (eV/atom)',
    ylabel='MACE (eV/atom)',
    filename='tests/mace_learncurve/rmse_energy_test.png'
)

plot_and_save(
    np.concatenate(ea.get_prop(test_data, 'arrays', 'forces_xtb')).flatten(),
    np.concatenate(ea.get_prop(test_data, 'arrays', 'forces_mace')).flatten(),
    title='Forces (Test)',
    xlabel='XTB (eV/Å)',
    ylabel='MACE (eV/Å)',
    filename='tests/mace_learncurve/rmse_forces_test.png'
)

  _Jd, _W3j_flat, _W3j_indices = torch.load(os.path.join(os.path.dirname(__file__), 'constants.pt'))
  model = torch.load(f=args.model, map_location=args.device)
  _Jd, _W3j_flat, _W3j_indices = torch.load(os.path.join(os.path.dirname(__file__), 'constants.pt'))
  model = torch.load(f=args.model, map_location=args.device)


In [2]:
import pandas as pd

# Save plot data to CSV
def save_csv(x, y, filename, xlabel='XTB', ylabel='MACE'):
    df = pd.DataFrame({xlabel: x, ylabel: y})
    df.to_csv(filename, index=False)

# Save training data
save_csv(
    ea.get_prop(train_data, 'bind', '_xtb', True).flatten(),
    ea.get_prop(train_data, 'bind', '_mace', True).flatten(),
    'tests/mace_learncurve/data_atomization_train.csv'
)

save_csv(
    ea.get_prop(train_data, 'info', 'energy_xtb', True).flatten(),
    ea.get_prop(train_data, 'info', 'energy_mace', True).flatten(),
    'tests/mace_learncurve/data_energy_train.csv'
)

save_csv(
    np.concatenate(ea.get_prop(train_data, 'arrays', 'forces_xtb')).flatten(),
    np.concatenate(ea.get_prop(train_data, 'arrays', 'forces_mace')).flatten(),
    'tests/mace_learncurve/data_forces_train.csv',
    xlabel='XTB (eV/Å)', ylabel='MACE (eV/Å)'
)

# Save test data
save_csv(
    ea.get_prop(test_data, 'bind', '_xtb', True).flatten(),
    ea.get_prop(test_data, 'bind', '_mace', True).flatten(),
    'tests/mace_learncurve/data_atomization_test.csv'
)

save_csv(
    ea.get_prop(test_data, 'info', 'energy_xtb', True).flatten(),
    ea.get_prop(test_data, 'info', 'energy_mace', True).flatten(),
    'tests/mace_learncurve/data_energy_test.csv'
)

save_csv(
    np.concatenate(ea.get_prop(test_data, 'arrays', 'forces_xtb')).flatten(),
    np.concatenate(ea.get_prop(test_data, 'arrays', 'forces_mace')).flatten(),
    'tests/mace_learncurve/data_forces_test.csv',
    xlabel='XTB (eV/Å)', ylabel='MACE (eV/Å)'
)


## Intra/Inter decomposition

In [5]:
from ase.io import read
from aseMolec import extAtoms as ea
from aseMolec import anaAtoms as aa
from aseMolec import pltProps as pp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# Load reference (XTB) and predicted (MACE) test data
xtb_data = read('tests/mace_learncurve/solvent_test.xyz', ':')
mace_data = read('tests/mace_learncurve/solvent_test.xyz', ':')

# Standardize property names for compatibility
ea.rename_prop_tag(xtb_data, 'energy_xtb', 'energy')
ea.rename_prop_tag(xtb_data, 'forces_xtb', 'forces')
ea.rename_prop_tag(mace_data, 'MACE_energy', 'energy')
ea.rename_prop_tag(mace_data, 'MACE_forces', 'forces')

# Perform intra/inter force decomposition
aa.extract_molecs(xtb_data, intra_inter=True)
aa.extract_molecs(mace_data, intra_inter=True)

# Plot and save each component
components = ['trans', 'rot', 'vib']
titles = {
    'trans': 'Translational Component',
    'rot': 'Rotational Component',
    'vib': 'Vibrational Component'
}
for comp in components:
    x = np.concatenate([a.arrays[f'forces_{comp}'] for a in xtb_data]).flatten()
    y = np.concatenate([a.arrays[f'forces_{comp}'] for a in mace_data]).flatten()

    # Save figure
    plt.figure(figsize=(4,4), dpi=100)
    pp.plot_prop(x, y, title=titles[comp], labs=['XTB', 'MACE'], rel=True)
    plt.xlabel(f'XTB {comp} forces (eV/Å)')
    plt.ylabel(f'MACE {comp} forces (eV/Å)')
    plt.tight_layout()
    plt.savefig(f'tests/mace_learncurve/force_component_{comp}.png', dpi=300)
    plt.close()

    # Save CSV
    df = pd.DataFrame({
        f'XTB_{comp}_force': x,
        f'MACE_{comp}_force': y
    })
    df.to_csv(f'tests/mace_learncurve/data_force_component_{comp}.csv', index=False)
