# Create the training data

In [51]:
import json

with open('energies.json') as f:
    edata = json.load(f)

with open('structures.json') as f:
    sdata = json.load(f)
    
coverages = ['0.25', '0.5', '0.75', '1.0']
metals = ['Pt', 'Ag', 'Ir', 'Au', 'Pd', 'Rh', 'Cu']

! rm -fr coverages.db

from ase.atoms import Atoms
from ase.calculators.singlepoint import SinglePointCalculator

from ase.db import connect
db = connect('coverages.db')

for metal in metals:
    for coverage in coverages:
        struc = sdata[metal]['O']['fcc'][coverage]
        ene = edata[metal]['O']['fcc'][coverage]  # this is an adsorption energy per oxygen

        # we convert this to the energy that OCP will predict
        no = 0
        for sym in struc['symbols']:
            if sym == 'O':
                no += 1
        ene *= no
        re3 = -2.58  # O -> 1/2 O2         re3 = -2.58 eV
        re1 = -3.03

        for i in range(no):
            ene -= re1 + re3

        atoms = Atoms(struc['symbols'], positions=struc['pos'], cell=struc['cell'], pbc=True)
        calc = SinglePointCalculator(atoms, energy=ene, forces=[[0, 0, 0]] * len(atoms))
        atoms.set_calculator(calc)
        db.write(atoms, y_relaxed=ene)
                             

Training is done at the command line.

In [33]:
[[0, 0,0]] *3

[[0, 0, 0], [0, 0, 0], [0, 0, 0]]

In [23]:
from pathlib import Path
import ocpmodels as om
Path(om.__file__).parent.parent / "main.py"

PosixPath('/home/mambauser/ocp/main.py')

We are using the checkpoint `escn_l6_m3_lay20_all_md_s2ef`.

In [30]:
import os
Path(om.__file__).parent.parent / 'configs/s2ef/all/escn/eSCN-L6-M2-Lay12-All-MD.yml'

PosixPath('/home/mambauser/ocp/configs/s2ef/all/escn/eSCN-L6-M2-Lay12-All-MD.yml')

In [31]:
import shutil
shutil.copy(Path(om.__file__).parent.parent / 'configs/s2ef/all/escn/eSCN-L6-M2-Lay12-All-MD.yml', 'params.yml')

'params.yml'

In [32]:
%%writefile -a params.yml

task:
  dataset: ase_db
  
dataset:
  train:
    src: coverages.db # The path/address to your ASE DB
    connect_args:
      # Keyword arguments for ase.db.connect()
    select_args:
      # Keyword arguments for ase.db.select()
      # These can be used to query/filter the ASE DB
    a2g_args:
      r_energy: True
      r_forces: False
      # Set these if you want to train on energy/forces
      # Energy/force information must be in the ASE DB!
    keep_in_memory: True # Keeping the dataset in memory reduces random reads and is extremely fast, but this is only feasible for relatively small datasets!
  val:
    src:
    a2g_args:
      r_energy: True
      r_forces: True
  test:
    src:
    a2g_args:
      r_energy: False
      r_forces: False
      # It is not necessary to have energy or forces if you are just making predictions.


Appending to params.yml


In [20]:
! python /home/mambauser/ocp/main.py --mode train --config-yml params.yml

DATASET.md		  TRAIN.md	  env.yml	       scripts
DATASET_PER_ADSORBATE.md  configs	  licenses	       setup.py
INSTALL.md		  docs		  main.py	       tests
LICENSE.md		  env.common.yml  ocp_models.egg-info  tutorials
MODELS.md		  env.cpu.yml	  ocpmodels
README.md		  env.gpu.yml	  pyproject.toml


In [41]:
from yaml import load, dump
from yaml import CLoader as Loader, CDumper as Dumper

with open(Path(om.__file__).parent.parent / 'configs/s2ef/all/escn/eSCN-L6-M2-Lay12-All-MD.yml') as f:
    d = load(f, Loader=Loader)
d['includes'] = []
d['model']['regress_forces'] = True
d['optim'][force_coefficient] = 0.0

d.update(dict(task=dict(dataset='ase_db'),
              dataset=dict(train=dict(src='/home/mambauser/tutorial/ocp-tutorial/coverages.db',
                                      a2g_args=dict(r_energy=True, r_forces=False),
                                      keep_in_memory=True))))
with open('test.yml', 'wb') as f:
    f.write(dump(d))

'dataset:\n  train:\n    a2g_args:\n      r_energy: true\n      r_forces: false\n    keep_in_memory: true\n    src: /home/mambauser/tutorial/ocp-tutorial/coverages.db\nincludes:\n- configs/s2ef/all/base.yml\nmodel:\n  basis_width_scalar: 2.0\n  cutoff: 12.0\n  distance_function: gaussian\n  hidden_channels: 256\n  lmax_list:\n  - 6\n  max_neighbors: 20\n  mmax_list:\n  - 2\n  name: escn\n  num_layers: 12\n  num_sphere_samples: 128\n  otf_graph: true\n  regress_forces: false\n  sphere_channels: 128\n  use_pbc: true\noptim:\n  batch_size: 6\n  clip_grad_norm: 20\n  ema_decay: 0.999\n  energy_coefficient: 4\n  eval_batch_size: 6\n  eval_every: 5000\n  force_coefficient: 100\n  loss_energy: mae\n  loss_force: l2mae\n  lr_gamma: 0.3\n  lr_initial: 0.0008\n  lr_milestones:\n  - 218750\n  - 281250\n  - 343750\n  max_epochs: 24\n  num_workers: 8\n  optimizer: AdamW\n  optimizer_params:\n    amsgrad: true\n  warmup_factor: 0.2\n  warmup_steps: 100\ntask:\n  dataset: ase_db\ntrainer: energy\n'

In [56]:
with open(Path(om.__file__).parent.parent / 'configs/s2ef/all/base.yml') as f:
    yml = load(f, Loader=Loader)

with open(Path(om.__file__).parent.parent / 'configs/s2ef/all/escn/eSCN-L6-M2-Lay12-All-MD.yml') as f:
    yml.update(load(f, Loader=Loader))
    del yml['includes']

yml['model']['regress_forces'] = True
yml['optim']['force_coefficient'] = 0.0

yml.update(dict(task=dict(dataset='ase_db'),
                dataset=dict(train=dict(src='/home/mambauser/tutorial/ocp-tutorial/coverages.db',
                                      a2g_args=dict(r_energy=True, r_forces=True),
                                      keep_in_memory=True))))
with open('test.yml', 'wb') as f:
    f.write(dump(yml).encode('utf-8'))

In [None]:
! python ....main.py --config-yml ....

In [55]:
! cat test.yml

dataset:
  train:
    a2g_args:
      r_energy: true
      r_forces: false
    keep_in_memory: true
    src: /home/mambauser/tutorial/ocp-tutorial/coverages.db
logger: tensorboard
model:
  basis_width_scalar: 2.0
  cutoff: 12.0
  distance_function: gaussian
  hidden_channels: 256
  lmax_list:
  - 6
  max_neighbors: 20
  mmax_list:
  - 2
  name: escn
  num_layers: 12
  num_sphere_samples: 128
  otf_graph: true
  regress_forces: true
  sphere_channels: 128
  use_pbc: true
optim:
  batch_size: 6
  clip_grad_norm: 20
  ema_decay: 0.999
  energy_coefficient: 4
  eval_batch_size: 6
  eval_every: 5000
  force_coefficient: 0.0
  loss_energy: mae
  loss_force: l2mae
  lr_gamma: 0.3
  lr_initial: 0.0008
  lr_milestones:
  - 218750
  - 281250
  - 343750
  max_epochs: 24
  num_workers: 8
  optimizer: AdamW
  optimizer_params:
    amsgrad: true
  warmup_factor: 0.2
  warmup_steps: 100
task:
  dataset: ase_db
trainer: forces
