In [1]:
import os
import requests

checkpoint = 'gnoc_oc22_oc20_all_s2ef.pt'

if not os.path.exists(checkpoint):
    url = 'https://dl.fbaipublicfiles.com/opencatalystproject/models/2022_09/oc22/s2ef/gnoc_oc22_oc20_all_s2ef.pt'
    with open(checkpoint, 'wb') as f:
        f.write(requests.get(url).content)
        
from ocpmodels.common.relaxation.ase_utils import OCPCalculator
calc = OCPCalculator(checkpoint=os.path.expanduser(checkpoint), cpu=False)



amp: false
cmd:
  checkpoint_dir: /home/jovyan/shared-scratch/jkitchin/tutorial/ocp-tutorial/checkpoints/2023-07-18-20-28-48
  commit: 999c1ac
  identifier: ''
  logs_dir: /home/jovyan/shared-scratch/jkitchin/tutorial/ocp-tutorial/logs/tensorboard/2023-07-18-20-28-48
  print_every: 100
  results_dir: /home/jovyan/shared-scratch/jkitchin/tutorial/ocp-tutorial/results/2023-07-18-20-28-48
  seed: null
  timestamp_id: 2023-07-18-20-28-48
dataset: null
gpus: 1
logger: tensorboard
model: gemnet_oc
model_attributes:
  activation: silu
  atom_edge_interaction: true
  atom_interaction: true
  cbf:
    name: spherical_harmonics
  cutoff: 12.0
  cutoff_aeaint: 12.0
  cutoff_aint: 12.0
  cutoff_qint: 12.0
  direct_forces: true
  edge_atom_interaction: true
  emb_size_aint_in: 64
  emb_size_aint_out: 64
  emb_size_atom: 256
  emb_size_cbf: 16
  emb_size_edge: 512
  emb_size_quad_in: 32
  emb_size_quad_out: 32
  emb_size_rbf: 16
  emb_size_sbf: 32
  emb_size_trip_in: 64
  emb_size_trip_out: 64
  env

# Create the training data

In [2]:
import json

with open('energies.json') as f:
    edata = json.load(f)

with open('structures.json') as f:
    sdata = json.load(f)
    
coverages = ['0.25', '0.5', '0.75', '1.0']
metals = ['Pt', 'Ag', 'Ir', 'Au', 'Pd', 'Rh', 'Cu']

! rm -fr coverages.db

from ase.atoms import Atoms
from ase.calculators.singlepoint import SinglePointCalculator

from ase.db import connect
db = connect('coverages.db')

for metal in metals:
    for coverage in coverages:
        struc = sdata[metal]['O']['fcc'][coverage]
        ene = edata[metal]['O']['fcc'][coverage]  # this is an adsorption energy per oxygen

        # we convert this to the energy that OCP will predict
        no = 0
        for sym in struc['symbols']:
            if sym == 'O':
                no += 1
        ene *= no
        re3 = -2.58  # O -> 1/2 O2         re3 = -2.58 eV
        re1 = -3.03

        for i in range(no):
            ene -= re1 + re3

        atoms = Atoms(struc['symbols'], positions=struc['pos'], cell=struc['cell'], pbc=True)
        # we use fake forces
        atoms.set_calculator(SinglePointCalculator(atoms, energy=ene, forces=[[0, 0, 0]] * len(atoms)))
        db.write(atoms)
                             

In [3]:
! ase db coverages.db

id|age|formula|calculator|energy|natoms| fmax|pbc| volume|charge|    mass
 1| 1s|Pt16O  |unknown   | 1.346|    17|0.000|TTT|847.607| 0.000|3137.343
 2| 1s|Pt16O2 |unknown   | 1.439|    18|0.000|TTT|847.607| 0.000|3153.342
 3| 1s|Pt16O3 |unknown   | 2.321|    19|0.000|TTT|847.607| 0.000|3169.341
 4| 1s|Pt16O4 |unknown   | 3.900|    20|0.000|TTT|847.607| 0.000|3185.340
 5| 1s|Ag16O  |unknown   | 2.149|    17|0.000|TTT|935.354| 0.000|1741.890
 6| 1s|Ag16O2 |unknown   | 3.458|    18|0.000|TTT|935.354| 0.000|1757.889
 7| 1s|Ag16O3 |unknown   | 5.892|    19|0.000|TTT|935.354| 0.000|1773.888
 8| 1s|Ag16O4 |unknown   | 8.814|    20|0.000|TTT|935.354| 0.000|1789.887
 9| 1s|Ir16O  |unknown   | 0.853|    17|0.000|TTT|801.033| 0.000|3091.471
10| 1s|Ir16O2 |unknown   | 0.159|    18|0.000|TTT|801.033| 0.000|3107.470
11| 1s|Ir16O3 |unknown   | 0.120|    19|0.000|TTT|801.033| 0.000|3123.469
12| 1s|Ir16O4 |unknown   | 0.787|    20|0.000|TTT|801.033| 0.000|3139.468
13| 1s|Au16O  |unknown   | 2.565|    1

In [4]:
import numpy as np
rng = np.random.default_rng(seed=42)

ids = np.arange(1, 29)
rng.shuffle(ids)
ids 

array([ 8, 17, 24, 23, 18,  6, 28, 25, 11, 19, 20, 10,  7, 21,  4,  1, 16,
       22, 13, 12, 15, 27,  3,  5, 26,  2, 14,  9])

In [7]:
train_end = int(len(ids) * 0.8)
test_end = train_end + int(len(ids) * 0.1)

! rm -fr train.db test.db val.db # start clean

train = connect('train.db')
test = connect('test.db')
val = connect('val.db')

for _id in ids[0:train_end]:
    row = db.get(id=int(_id))
    train.write(row.toatoms())
    
for _id in ids[train_end:test_end]:
    row = db.get(id=int(_id))
    test.write(row.toatoms())
    
for _id in ids[test_end:]:
    row = db.get(id=int(_id))
    val.write(row.toatoms())
    
train.count(), test.count(), val.count()

(22, 2, 4)

Training is done at the command line.

In [6]:
from yaml import load, dump
from yaml import CLoader as Loader, CDumper as Dumper

yml = calc.config

del yml['logger']
del yml['dataset']
del yml['optim']['loss_force']
yml['optim']['eval_every'] = 1
yml['optim']['lr_initial'] = 1e-5

yml.update({'task': {'dataset': 'ase_db'},
            'dataset': {'train': {'src': str(Path('train.db').absolute()),
                                    'a2g_args': {'r_energy': True,
                                                 'r_forces': True},
                                    'keep_in_memory' : True},
                       'test': {'src': str(Path('test.db').absolute()),
                                    'a2g_args': {'r_energy': False,
                                                 'r_forces': False},
                                    'keep_in_memory' : True},
                       'val': {'src': str(Path('val.db').absolute()),
                                    'a2g_args': {'r_energy': True,
                                                 'r_forces': True},
                                    'keep_in_memory' : True}}})

runyml = Path('run.yml').absolute()
with open(runyml, 'wb') as f:
    f.write(dump(yml).encode('utf-8'))
    
print(yml)
! cat run.yml

NameError: name 'Path' is not defined

In [None]:
from pathlib import Path
import ocpmodels as om
import os

ocp_root = Path(om.__file__).parent.parent


cmd = f'''cd {ocp_root} 
python main.py --mode train \\
 --config-yml {Path("run.yml").absolute()} \\
 --checkpoint {Path(checkpoint).absolute()}'''
print(cmd)

In [None]:
%%bash
cd /home/jovyan/shared-scratch/jkitchin/tutorial/ocp-tutorial/fine-tuning/ocp 
python main.py --mode train \
 --config-yml /home/jovyan/shared-scratch/jkitchin/tutorial/ocp-tutorial/run.yml \
 --checkpoint /home/jovyan/shared-scratch/jkitchin/tutorial/ocp-tutorial/gnoc_oc22_oc20_all_s2ef.pt

In [None]:
newckpt = ocp_root / './checkpoints/2023-07-18-20-01-04/checkpoint.pt'
from ocpmodels.common.relaxation.ase_utils import OCPCalculator
newcalc = OCPCalculator(checkpoint=os.path.expanduser(newckpt), cpu=False)

In [None]:
import json
from ase.optimize import BFGS

with open('energies.json') as f:
    edata = json.load(f)

with open('structures.json') as f:
    s = json.load(f)
    
  


from tqdm import tqdm
import time

t0 = time.time()

data = {'fcc': [],
       'hcp': []}

refdata = {'fcc': [],
           'hcp': []}


for metal in ['Cu', 'Ag', 'Pd', 'Pt', 'Rh', 'Ir']:
    print(metal)
    for site in ['fcc', 'hcp']:
        for adsorbate in ['O']:
            for coverage in tqdm(['0.25']):
                 
                
                entry = s[metal][adsorbate][site][coverage]
                
                atoms = Atoms(entry['symbols'],
                              positions=entry['pos'],
                              cell=entry['cell'],
                              pbc=True)
    
                atoms.set_calculator(newcalc)
                opt = BFGS(atoms, logfile=None)  # no logfile to suppress output

                opt.run(fmax=0.05, steps=100)
                
                re2 = atoms.get_potential_energy()
                nO = 0
                for atom in atoms:
                    if atom.symbol == 'O':
                        nO += 1
                        re2 += re1 + re3
                
                data[site] += [re2 / nO]
                refdata[site] += [edata[metal][adsorbate][site][coverage]]  
                
f'Elapsed time = {time.time() - t0} seconds' 