## Download the data

A dataset could be created and downloaded using the new [views feature](https://docs.qcarchive.molssi.org/user_guide/datasets/caching.html).

Alternatively, download live from QCArchive (see [Retrieving results](https://docs.openforcefield.org/projects/qcsubmit/en/stable/examples/retrieving-results.html) for more).

In [1]:
from collections import defaultdict
from openff.units import unit
import numpy as np
import torch
import tqdm

In [2]:
from qcportal import PortalClient

qc_client = PortalClient("https://api.qcarchive.molssi.org:443", cache_dir=".")

In [3]:
from openff.qcsubmit.results import (
    BasicResultCollection,
    OptimizationResultCollection,
    TorsionDriveResultCollection,
)

In [4]:
# Pull down the torsion drive records from a dataset.
torsion_drive_result_collection = TorsionDriveResultCollection.from_server(
    client=qc_client,
    # small example dataset -- downloading and interacting with a dataset
    # can take a while!
    datasets="OpenFF Cresset Additional Coverage TorsionDrives v4.0",
    spec_name="default",
)

## Convert the data into a smee/descent-friendly format

The data needs to be postprocessed into a useful format for smee. Note, a lot of the functions here will benefit from parallelism as they can be very slow, some examples are provided in [Josh's repo](https://github.com/jthorton/SPICE-SMEE/blob/main/fit-v1).

In [5]:
import descent.targets.energy

bohr_to_angstrom = (1 * unit.bohr).m_as(unit.angstrom)
hartree_to_kcal = (1 * unit.hartree * unit.avogadro_constant).m_as(
    unit.kilocalories_per_mole
)
hartree_to_kcal

627.5094740630658

Ok we now want to create an [energy.Entry](https://simonboothroyd.github.io/descent/latest/reference/targets/energy/#descent.targets.energy.Entry) to be processed into a dataset. The final output is a [Huggingface dataset](https://huggingface.co/docs/datasets/en/index).

How to do this will differ for optimizations and torsiondrives slightly due to the structure of the code.
Here we look at torsiondrives, code for optimizations should be very similar but not need the `minimum_optimizations` part.

In [6]:
# create a dict to hold data by SMILES so conformers are mostly mapped together
# note: a more robust solution would use Molecule.are_isomorphic or similar method,
# but here we lazily just compare the smiles strings

data_by_smiles = defaultdict(list)
records_and_molecules = list(torsion_drive_result_collection.to_records())

In [7]:
records_and_molecules

[(TorsiondriveRecord(id=143657703, record_type='torsiondrive', is_service=True, properties=None, extras={}, status=<RecordStatusEnum.complete: 'complete'>, manager_name=None, created_on=datetime.datetime(2025, 2, 14, 17, 28, 27, 605735, tzinfo=datetime.timezone.utc), modified_on=datetime.datetime(2025, 2, 21, 4, 44, 6, 51126, tzinfo=datetime.timezone.utc), owner_user='openffbot', owner_group=None, compute_history_=None, task_=None, service_=None, comments_=None, native_files_=None, specification=TorsiondriveSpecification(program='torsiondrive', optimization_specification=OptimizationSpecification(program='geometric', qc_specification=QCSpecification(program='psi4', driver=<SinglepointDriver.deferred: 'deferred'>, method='b3lyp-d3bj', basis='dzvp', keywords={'maxiter': 200, 'scf_properties': ['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices']}, protocols=AtomicResultProtocols(wavefunction=<WavefunctionProtocolEnum.none: 'none'>, stdout=True, error_correction=ErrorCorrecti

In [8]:
count = 0 
for td_record, molecule in tqdm.tqdm(records_and_molecules):
    # take only the optimized grid points
    for opt in td_record.minimum_optimizations.values():
        last = opt.trajectory[-1] #qc_client.get_records(record_ids=[opt.trajectory_ids_[-1]])
        last_mol = last.molecule
        mapped_smiles = last_mol.identifiers.canonical_isomeric_explicit_hydrogen_mapped_smiles
        coords = last_mol.geometry * bohr_to_angstrom
        energy = last.properties["return_energy"] * hartree_to_kcal
        gradient = np.array(last.properties["scf total gradient"]).reshape((-1, 3))
        forces = ((-gradient) * hartree_to_kcal / bohr_to_angstrom)
        entry = {
            "coords": coords,
            "energy": energy,
            "forces": forces,
        }
        data_by_smiles[mapped_smiles].append(entry)

    count += 1
    if count > 2:
        break

  2%|██                                                                                  | 2/82 [00:50<33:52, 25.41s/it]


In [10]:
# convert to smee's expected format
descent_entries = []
for mapped_smiles, entries in data_by_smiles.items():
    entry = {
        "smiles": mapped_smiles,
        "coords": torch.tensor([x["coords"] for x in entries]),
        "energy": torch.tensor([x["energy"] for x in entries]),
        "forces": torch.tensor([x["forces"] for x in entries]),
    }
    descent_entries.append(entry)

  "coords": torch.tensor([x["coords"] for x in entries]),


In [11]:
# this dataset can get downloaded, processed once, saved and reused
dataset = descent.targets.energy.create_dataset(entries=descent_entries)
dataset.save_to_disk("test-smee-data")

  "coords": torch.tensor(entry["coords"]).flatten().tolist(),
  "energy": torch.tensor(entry["energy"]).flatten().tolist(),
  "forces": torch.tensor(entry["forces"]).flatten().tolist(),


## Assign parameters to molecules in the dataset

In [1]:
from openff.toolkit import Molecule, ForceField
import tqdm
import smee.converters
from pydantic import Field

In [2]:
# uncomment if reloading data
import datasets

dataset = datasets.Dataset.load_from_disk("test-smee-data")

In [3]:
# reformat dataset lists to torch tensors
dataset.set_format('torch', columns=['energy', 'coords','forces'], output_all_columns=True)

In [4]:
# this is what a single entry looks like
dataset[0]

{'coords': tensor([-0.5399,  2.1674, -3.7936,  ...,  0.1233,  3.3214,  2.1324]),
 'energy': tensor([-322860.4375, -322860.5000, -322860.6562, -322860.7812, -322860.8125,
         -322860.7500, -322860.6250, -322860.5000, -322860.6250, -322860.7500,
         -322860.8125, -322860.7812, -322860.6562, -322860.5000, -322860.4375,
         -322860.5000, -322860.6562, -322860.7812, -322860.8125, -322860.5000,
         -322860.4375, -322860.5000, -322860.6250, -322860.7500]),
 'forces': tensor([ 0.0021,  0.0088, -0.0046,  ..., -0.0018,  0.0060, -0.0051]),
 'smiles': '[H:14][C:7]([H:15])([H:16])[C:6]1=[C:8]([C:3](=[N:4][O:5]1)[O:2][C:1]([H:11])([H:12])[H:13])[C:9]([H:17])([H:18])[O:10][H:19]'}

Below we specify a starting force field.
Normally we would initialize parameters using the Modified Seminario method,
[example here](https://github.com/openforcefield/sage-2.2.1/blob/main/03_generate-initial-ff/create-msm-ff.py),
but here we just start from Sage 2.2.1.

[Josh's repo](https://github.com/jthorton/SPICE-SMEE/blob/main/fit-v1/training/001-expand_torsions.py)
has examples on expanding torsions too.

In [5]:
starting_ff = ForceField("two-minima-force-field.offxml", load_plugins = True)

In [6]:
import torch
import smee

@smee.potentials.potential_energy_fn("OOP", "twominima")
def compute_twominima_energy(
    conformer: torch.Tensor,
    parameters: torch.Tensor,
    attributes: torch.Tensor,
) -> torch.Tensor:
    """Evaluates the out-of-plane (OOP) energy using the Two Minima potential.

    Args:
        conformer: The conformer [Å] to evaluate the potential at.
        parameters: A tensor containing (k1, k2, periodicity, phase) for each interaction.
        attributes: A tensor containing the global settings (if any) for this potential.

    Returns:
        The evaluated potential energy [kcal / mol].
    """

    is_batched = conformer.ndim == 3

    if not is_batched:
        conformer = torch.unsqueeze(conformer, 0)

    k1 = parameters[:, 0]
    k2 = parameters[:, 1]
    periodicity = parameters[:, 2]
    phase = parameters[:, 3]

    central_atom = conformer[:, 0, :]
    bonded_atoms = conformer[:, 1:, :]
    normal_vector = torch.cross(bonded_atoms[:, 1] - bonded_atoms[:, 0], 
                                bonded_atoms[:, 2] - bonded_atoms[:, 0])
    normal_vector /= torch.norm(normal_vector, dim=-1, keepdim=True)

    oop_vector = central_atom - bonded_atoms[:, 0]
    theta = torch.acos(torch.sum(oop_vector * normal_vector, dim=-1) / torch.norm(oop_vector, dim=-1))

    energy_1 = k1 * (1 + torch.cos(periodicity * theta - phase))
    energy_2 = k2 * (1 + torch.cos(2 * periodicity * theta + phase))
    
    energy = (energy_1 - energy_2).sum(-1)

    if not is_batched:
        energy = torch.squeeze(energy, 0)

    return energy

In [8]:
from smirnoff_plugins.collections.bonded import TwoMinimaCollection
import smee.converters
import openff.toolkit
import openff.units

KCAL_PER_MOL = openff.units.unit.kilocalories / openff.units.unit.mole
RADIAN = openff.units.unit.radian
UNITLESS = openff.units.unit.dimensionless

def convert_twominima_handlers(
    handlers: list[TwoMinimaCollection],
    topologies: list[openff.toolkit.Topology],
) -> tuple[smee.TensorPotential, list[smee.BondedParameterMap]]:
    """Convert Two Minima improper torsion handlers into a tensor potential and parameter maps."""
    
    potential = smee.converters.openff._openff._handlers_to_potential(
        handlers,
        "TwoMinima",
        ("k"),
        attribute_cols = (),
    )
    potential.fn = "twominima"

    parameter_key_to_idx = {param_key: i for i, param_key in enumerate(potential.parameter_keys)}
    
    parameter_maps = []
    
    for handler, topology in zip(handlers, topologies, strict=True):
        assignment_map = {}
        
        for key, param_key in handler.key_map.items():
            indices = tuple(key.atom_indices)
            assignment_map[indices] = parameter_key_to_idx[param_key]

        assignment_matrix = torch.zeros(
            (len(assignment_map), len(potential.parameters)), dtype=torch.float64
        )

        for torsion_idx, (atom_indices, parameter_idx) in enumerate(assignment_map.items()):
            assignment_matrix[torsion_idx, parameter_idx] = 1.0

        print(assignment_map)
        parameter_map = smee.BondedParameterMap(
            particle_idxs=torch.tensor(list(assignment_map.keys()), dtype=torch.int64),
            assignment_matrix=assignment_matrix.to_sparse()
        )

        parameter_maps.append(parameter_map)

    return potential, parameter_maps


@smee.converters.smirnoff_parameter_converter(
    "TwoMinima",
    {
        "k1": KCAL_PER_MOL,
        "k2": KCAL_PER_MOL,
        "periodicity": UNITLESS,
        "phase": RADIAN,
    },
)
def convert_twominima(
    handlers: list[TwoMinimaCollection],
    topologies: list[openff.toolkit.Topology],
) -> tuple[smee.TensorPotential, list[smee.BondedParameterMap]]:
    return convert_twominima_handlers(handlers, topologies)


In [9]:
smee.potentials.potential_energy_fn("TwoMinima", "twominima")(compute_twominima_energy);


In [10]:
from openff.toolkit import ForceField
import openff.interchange as interchange

twominima_force_field = ForceField("two-minima-force-field.offxml", load_plugins = True)

twominima_handler = twominima_force_field.get_parameter_handler("TwoMinima")

# twominima_handler.add_parameter(
#     {
#         "smirks": "[*:1]-[#7X3:2](-[*:3])-[*:4]",
#         "k1": 0.2 * KCAL_PER_MOL,
#         "k2": 0.1 * KCAL_PER_MOL,
#         "periodicity": 1.0 * UNITLESS,
#         "phase": 0.5 * RADIAN,
#     }
# )

molecule = openff.toolkit.Molecule.from_smiles("c1n(CCO)c(C(F)(F)(F))cc1CNCCl")
molecule.generate_conformers(n_conformers=1)

conformer = torch.tensor(molecule.conformers[0].m_as(openff.units.unit.angstrom))

twominima_interchange = interchange.Interchange.from_smirnoff(
    twominima_force_field, molecule.to_topology()
)

twominima_interchange

  warn(


Interchange with 7 collections, non-periodic topology with 28 atoms.

In [11]:
twominima_tensor_ff, [twominima_topology] = smee.converters.convert_interchange(twominima_interchange)


{(0, 1, 2, 5): 0, (0, 1, 5, 2): 0, (0, 2, 1, 5): 0, (0, 5, 1, 2): 0, (2, 0, 1, 5): 0, (2, 1, 0, 5): 0, (12, 13, 14, 25): 0, (12, 13, 25, 14): 0, (12, 14, 13, 25): 0, (12, 25, 13, 14): 0, (14, 12, 13, 25): 0, (14, 13, 12, 25): 0}


In [12]:
twominima_tensor_ff

TensorForceField(potentials=[TensorPotential(type='Angles', fn='k/2*(theta-angle)**2', parameters=tensor([[158.9566,   2.1253],
        [183.0371,   1.8880],
        [101.8035,   2.1782],
        [133.5126,   1.9188],
        [239.7918,   1.9700],
        [248.1769,   1.9759],
        [107.6482,   1.9068],
        [ 73.5533,   1.8911]], dtype=torch.float64), parameter_keys=[PotentialKey associated with handler 'Angles' with id '[*:1]~[#7X3$(*~[#6X3,#6X2,#7X2+0]):2]~[*:3]', PotentialKey associated with handler 'Angles' with id '[*;r5:1]1@[*;r5:2]@[*;r5:3]@[*;r5]@[*;r5]1', PotentialKey associated with handler 'Angles' with id '[*:1]~;!@[*;X3;r5:2]~;@[*;r5:3]', PotentialKey associated with handler 'Angles' with id '[*:1]~[#6X4:2]-[*:3]', PotentialKey associated with handler 'Angles' with id '[*:1]-[#8:2]-[*:3]', PotentialKey associated with handler 'Angles' with id '[*:1]~[#7X4,#7X3,#7X2-1:2]~[*:3]', PotentialKey associated with handler 'Angles' with id '[#1:1]-[#7X4,#7X3,#7X2-1:2]-[*:3]'

In [13]:
twominima_topology

TensorTopology(atomic_nums=tensor([ 6,  7,  6,  6,  8,  6,  6,  9,  9,  9,  6,  6,  6,  7,  6, 17,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1]), formal_charges=tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0]), bond_idxs=tensor([[ 0,  1],
        [ 1,  2],
        [ 2,  3],
        [ 3,  4],
        [ 1,  5],
        [ 5,  6],
        [ 6,  7],
        [ 6,  8],
        [ 6,  9],
        [ 5, 10],
        [10, 11],
        [11, 12],
        [12, 13],
        [13, 14],
        [14, 15],
        [11,  0],
        [ 0, 16],
        [ 2, 17],
        [ 2, 18],
        [ 3, 19],
        [ 3, 20],
        [ 4, 21],
        [10, 22],
        [12, 23],
        [12, 24],
        [13, 25],
        [14, 26],
        [14, 27]]), bond_orders=tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1]), parameters={'Angles': ValenceParameterMap(particle_idxs=tensor([[ 0,  1,  2],
        [ 0,  1,  

In [14]:
all_smiles = []
interchanges = []
for entry in tqdm.tqdm(dataset):
    mol = Molecule.from_mapped_smiles(
        entry["smiles"],
        allow_undefined_stereo=True
    )
    all_smiles.append(entry["smiles"])
    interchange = twominima_force_field.create_interchange(mol.to_topology())
    interchanges.append(interchange)
    
smee_force_field, smee_topologies = smee.converters.convert_interchange(interchanges)
topologies = dict(zip(all_smiles, smee_topologies))

100%|█████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:11<00:00,  3.98s/it]

{}
{(1, 2, 3, 10): 0, (1, 2, 10, 3): 0, (1, 3, 2, 10): 0, (1, 10, 2, 3): 0, (3, 1, 2, 10): 0, (3, 2, 1, 10): 0}
{(1, 2, 3, 10): 0, (1, 2, 10, 3): 0, (1, 3, 2, 10): 0, (1, 10, 2, 3): 0, (3, 1, 2, 10): 0, (3, 2, 1, 10): 0}





In [15]:
smee_force_field

TensorForceField(potentials=[TensorPotential(type='Angles', fn='k/2*(theta-angle)**2', parameters=tensor([[239.7918,   1.9700],
        [133.5126,   1.9188],
        [101.8035,   2.1782],
        [183.0371,   1.8880],
        [298.0648,   2.1040],
        [ 73.5533,   1.8911],
        [169.0953,   2.0962],
        [158.9566,   2.1253],
        [ 74.2097,   2.0606],
        [107.6482,   1.9068]], dtype=torch.float64), parameter_keys=[PotentialKey associated with handler 'Angles' with id '[*:1]-[#8:2]-[*:3]', PotentialKey associated with handler 'Angles' with id '[*:1]~[#6X4:2]-[*:3]', PotentialKey associated with handler 'Angles' with id '[*:1]~;!@[*;X3;r5:2]~;@[*;r5:3]', PotentialKey associated with handler 'Angles' with id '[*;r5:1]1@[*;r5:2]@[*;r5:3]@[*;r5]@[*;r5]1', PotentialKey associated with handler 'Angles' with id '[#6X3,#7:1]~;@[#8;r:2]~;@[#6X3,#7:3]', PotentialKey associated with handler 'Angles' with id '[#1:1]-[#6X4:2]-[#1:3]', PotentialKey associated with handler 'Angles' 

In [16]:
smee_topologies

[TensorTopology(atomic_nums=tensor([6, 8, 6, 7, 8, 6, 6, 6, 6, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1]), formal_charges=tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), bond_idxs=tensor([[ 0,  1],
         [ 0, 10],
         [ 0, 11],
         [ 0, 12],
         [ 1,  2],
         [ 2,  3],
         [ 2,  7],
         [ 3,  4],
         [ 4,  5],
         [ 5,  6],
         [ 5,  7],
         [ 6, 13],
         [ 6, 14],
         [ 6, 15],
         [ 7,  8],
         [ 8,  9],
         [ 8, 16],
         [ 8, 17],
         [ 9, 18]]), bond_orders=tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1]), parameters={'Angles': ValenceParameterMap(particle_idxs=tensor([[ 0,  1,  2],
         [ 1,  0, 10],
         [ 1,  0, 11],
         [ 1,  0, 12],
         [ 1,  2,  3],
         [ 1,  2,  7],
         [ 2,  3,  4],
         [ 2,  7,  5],
         [ 2,  7,  8],
         [ 3,  2,  7],
         [ 3,  4,  5],
         [ 4,  5,  6],
         [ 4,  5,  7],
         [ 5,  6, 

## Fit

Now we can set up and run the fit.

In [17]:
import descent.train
import descent.targets.energy

import math
import pathlib
import tensorboardX
import more_itertools


In [25]:
# specify which parameters to train
# and some details about them
# they're scaled so they're roughly on the same order of magnitude

parameters = {
    "Bonds": descent.train.ParameterConfig(
        cols=["k", "length"],
        scales={"k": 1e-2, "length": 1.0}, # normalize so roughly equal
        limits={"k":[0.0, None], "length": [0.0, None]}
        # the include/exclude types are Interchange PotentialKey.id's -- typically SMIRKS
        # include=[], <-- bonds to train. Not specifying trains all
        # exclude=[], <-- bonds NOT to train
    ),
    "Angles": descent.train.ParameterConfig(
        cols=["k", "angle"],
        scales={"k": 1e-2, "angle": 1.0},
        limits={"k": [0.0, None], "angle": [0.0, math.pi]}
    ),
    "ProperTorsions": descent.train.ParameterConfig(
        # fit ks
        cols=["k"],
        scales={"k": 1.0},
    ),
    "TwoMinima": descent.train.ParameterConfig(
        # fit ks
        cols=["k"],
        scales={"k": 1.0},
    )
}

ValidationError: 1 validation error for ParameterConfig
  Value error, cannot scale non-trainable parameters [type=value_error, input_value={'cols': ['k1', 'k2', 'pe...], 'scales': {'k': 1.0}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/value_error

In [24]:
trainable = descent.train.Trainable(
    force_field=smee_force_field,
    parameters=parameters,
    attributes={}
)

AssertionError: unknown columns: ('k1', 'k2', 'periodicity', 'phase')

In [21]:
# optional below if you want cool tensorboard logging
def write_metrics(
        epoch: int,
        loss: torch.Tensor,
        loss_energy: torch.Tensor,
        loss_forces: torch.Tensor,
        writer: tensorboardX.SummaryWriter
):
    print(f"epoch={epoch} loss={loss.detach().item():.6f}", flush=True)

    writer.add_scalar("loss", loss.detach().item(), epoch)
    writer.add_scalar("loss_energy", loss_energy.detach().item(), epoch)
    writer.add_scalar("loss_forces", loss_forces.detach().item(), epoch)

    writer.add_scalar("rmse_energy", math.sqrt(loss_energy.detach().item()), epoch)
    writer.add_scalar("rmse_forces", math.sqrt(loss_forces.detach().item()), epoch)
    writer.flush()

Specify some hyperparameters, n_epochs is intentionally very low to guarantee fast execution.

In [22]:
N_EPOCHS = 10
LEARNING_RATE = 0.01
BATCH_SIZE = 500

In [23]:
# make directory to save files in
directory = pathlib.Path("my-smee-fit")
directory.mkdir(exist_ok=True, parents=True)


Run fit below.

In [24]:
# load tensorboard extension so we can view in notebook
%load_ext tensorboard

In [25]:
trainable_parameters = trainable.to_values()
device = trainable_parameters.device.type

with tensorboardX.SummaryWriter(str(directory)) as writer:
    optimizer = torch.optim.Adam([trainable_parameters], lr=LEARNING_RATE, amsgrad=True)
    dataset_indices = list(range(len(dataset)))

    for i in range(N_EPOCHS):
        ff = trainable.to_force_field(trainable_parameters)
        total_loss = torch.zeros(size=(1,), device=device)
        energy_loss = torch.zeros(size=(1,), device=device)
        force_loss = torch.zeros(size=(1,), device=device)
        grad = None
    
        for batch_ids in tqdm.tqdm(
            more_itertools.batched(dataset_indices, BATCH_SIZE),
            desc='Calculating energies',
            ncols=80, total=math.ceil(len(dataset) / BATCH_SIZE)
        ):
            batch = dataset.select(indices=batch_ids)
            true_batch_size = len(dataset)
            batch_configs = sum([len(d["energy"]) for d in batch])

            e_ref, e_pred, f_ref, f_pred = descent.targets.energy.predict(
                batch, ff, topologies, "mean"
            )   
            # L2 loss
            batch_loss_energy = ((e_pred - e_ref) ** 2).sum() / true_batch_size
            batch_loss_force = ((f_pred - f_ref) ** 2).sum() / true_batch_size

            # Equal sum of L2 loss on energies and forces
            batch_loss = batch_loss_energy + batch_loss_force

            (batch_grad, ) = torch.autograd.grad(batch_loss, trainable_parameters, create_graph=True)
            batch_grad = batch_grad.detach()
            if grad is None:
                grad = batch_grad
            else:
                grad += batch_grad
            
            # keep sum of squares to report MSE at the end
            total_loss += batch_loss.detach()
            energy_loss += batch_loss_energy.detach()
            force_loss += batch_loss_force.detach()
        
        trainable_parameters.grad = grad
        
        write_metrics(
            epoch=i, loss=total_loss, loss_energy=energy_loss,
            loss_forces=force_loss, writer=writer
        )

        optimizer.step()
        optimizer.zero_grad()

        if i % 10 == 0:
            torch.save(
                trainable.to_force_field(trainable_parameters),
                directory / f"force-field-epoch-{i}.pt"
            )

    torch.save(
        trainable.to_force_field(trainable_parameters),
        directory / "final-force-field.pt"
    )
    

Calculating energies: 100%|███████████████████████| 1/1 [00:00<00:00,  4.27it/s]

epoch=0 loss=108.659149



Calculating energies: 100%|███████████████████████| 1/1 [00:00<00:00, 46.51it/s]

epoch=1 loss=89.041008



Calculating energies: 100%|███████████████████████| 1/1 [00:00<00:00, 55.38it/s]

epoch=2 loss=73.131607



Calculating energies: 100%|███████████████████████| 1/1 [00:00<00:00, 40.10it/s]

epoch=3 loss=68.602669



Calculating energies: 100%|███████████████████████| 1/1 [00:00<00:00, 26.01it/s]

epoch=4 loss=65.299484



Calculating energies: 100%|███████████████████████| 1/1 [00:00<00:00, 25.26it/s]

epoch=5 loss=60.438900



Calculating energies: 100%|███████████████████████| 1/1 [00:00<00:00, 37.75it/s]

epoch=6 loss=56.625820



Calculating energies: 100%|███████████████████████| 1/1 [00:00<00:00, 38.46it/s]

epoch=7 loss=55.129471



Calculating energies: 100%|███████████████████████| 1/1 [00:00<00:00, 25.23it/s]

epoch=8 loss=55.173851



Calculating energies: 100%|███████████████████████| 1/1 [00:00<00:00, 25.05it/s]

epoch=9 loss=55.215843





Metrics can be viewed in tensorboard below.

`tensorboard --logdir my-smee-fit` can also be run on command line instead of in the notebook.

In [26]:
%tensorboard --logdir my-smee-fit

## Convert back to OFFXML

In [27]:
for potential in smee_force_field.potentials:
    handler_name = potential.parameter_keys[0].associated_handler

    parameter_attrs = potential.parameter_cols
    parameter_units = potential.parameter_units

    if handler_name in ["Bonds", "Angles"]:
        handler = starting_ff.get_parameter_handler(handler_name)
        for i, opt_parameters in enumerate(potential.parameters):
            smirks = potential.parameter_keys[i].id
            ff_parameter = handler[smirks]
            opt_parameters = opt_parameters.detach().cpu().numpy()
            for j, (p, unit) in enumerate(zip(parameter_attrs, parameter_units)):
                setattr(ff_parameter, p, opt_parameters[j] * unit)

    elif handler_name in ["ProperTorsions"]:
        handler = starting_ff.get_parameter_handler(handler_name)
        k_index = parameter_attrs.index('k')
        p_index = parameter_attrs.index('periodicity')
        # we need to collect the k values into a list across the entries
        collection_data = defaultdict(dict)
        for i, opt_parameters in enumerate(potential.parameters):
            smirks = potential.parameter_keys[i].id
            ff_parameter = handler[smirks]
            opt_parameters = opt_parameters.detach().cpu().numpy()
            # find k and the periodicity
            k = opt_parameters[k_index] * parameter_units[k_index]
            p = int(opt_parameters[p_index])
            collection_data[smirks][p] = k
        # now update the force field
        for smirks, k_s in collection_data.items():
            ff_parameter = handler[smirks]
            k_mapped_to_p = [k_s[p] for p in ff_parameter.periodicity]
            ff_parameter.k = k_mapped_to_p

    elif handler_name in ["ImproperTorsions"]:
        k_index = parameter_attrs.index('k')
        handler = starting_ff.get_parameter_handler(handler_name)
        # we only fit the v2 terms for improper torsions so convert to list and set
        for i, opt_parameters in enumerate(potential.parameters):
            smirks = potential.parameter_keys[i].id
            ff_parameter = handler[smirks]
            opt_parameters = opt_parameters.detach().cpu().numpy()
            ff_parameter.k = [opt_parameters[k_index] * parameter_units[k_index]]

starting_ff.to_file("final-force-field.offxml")