<a href="https://colab.research.google.com/github/rohskopf/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Tantalum_MLFF_FitSnap.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 1: Install LAMMPS and FitSNAP

In [1]:
!python --version

Python 3.10.12


In [2]:
pip install jarvis-tools

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting jarvis-tools
  Downloading jarvis_tools-2023.5.26-py2.py3-none-any.whl (974 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.6/974.6 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
Collecting spglib>=1.14.1 (from jarvis-tools)
  Downloading spglib-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (515 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m515.3/515.3 kB[0m [31m58.5 MB/s[0m eta [36m0:00:00[0m
Collecting xmltodict>=0.11.0 (from jarvis-tools)
  Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict, spglib, jarvis-tools
Successfully installed jarvis-tools-2023.5.26 spglib-2.0.2 xmltodict-0.13.0


If you are running locally and have already installed LAMMPS and FitSNAP, skip this step.

In [3]:
# Install LAMMPS with Python interface.

!apt-get update
!apt install -y cmake build-essential git ccache openmpi-bin libopenmpi-dev python3.10-venv
!pip install --upgrade pip
!pip install numpy torch scipy virtualenv psutil pandas tabulate mpi4py Cython sklearn
!pip install ase
!pip install fitsnap3
%cd /content
!rm -rf lammps
!git clone https://github.com/lammps/lammps.git lammps
%cd /content/lammps
!rm -rf build
!mkdir build
%cd build
!cmake ../cmake -DLAMMPS_EXCEPTIONS=yes \
               -DBUILD_SHARED_LIBS=yes \
               -DMLIAP_ENABLE_PYTHON=yes \
               -DPKG_PYTHON=yes \
               -DPKG_ML-SNAP=yes \
               -DPKG_ML-IAP=yes \
               -DPKG_ML-PACE=yes \
               -DPKG_SPIN=yes \
               -DPYTHON_EXECUTABLE:FILEPATH=`which python`
!make -j 2
!make install-python

# Install FitSNAP.

%cd /content
!rm -rf FitSNAP
#!git clone https://github.com/FitSNAP/FitSNAP
!git clone -b hackathon https://github.com/rohskopf/FitSNAP

# Set environment variables.

!$PYTHONPATH
%env PYTHONPATH=/env/python:/bin/bash:
%env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/content/lammps/build

# Move into FitSNAP directory
%cd FitSNAP

0% [Working]            Get:1 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]
Get:2 https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/ InRelease [3,622 B]
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease
Hit:4 http://archive.ubuntu.com/ubuntu focal InRelease
Hit:5 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu focal InRelease
Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]
Get:7 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1,064 kB]
Hit:8 http://ppa.launchpad.net/cran/libgit2/ubuntu focal InRelease
Get:9 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2,803 kB]
Hit:10 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal InRelease
Get:11 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]
Hit:12 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu focal InRelease
Get:13 http://archive.ubuntu.com/ubuntu focal-updates/re

# Check if Python LAMMPS is working

In [4]:
import lammps
lmp = lammps.lammps()
print(lmp)

<lammps.core.lammps object at 0x7f13ac11b0a0>


# Scrape data with FitSNAP ASE scraper

In [6]:
import numpy as np
from mpi4py import MPI
from fitsnap3lib.fitsnap import FitSnap
from fitsnap3lib.scrapers.ase_funcs import get_apre
from fitsnap3lib.scrapers.ase_funcs import ase_scraper
from fitsnap3lib.tools.group_tools import make_table
from ase.io import read

In [7]:
# Create an input dictionary containing settings.

settings = \
{
"BISPECTRUM":
    {
    "numTypes": 1,
    "twojmax": 6,
    "rcutfac": 4.67637,
    "rfac0": 0.99363,
    "rmin0": 0.0,
    "wj": 1.0,
    "radelem": 0.5,
    "type": "Ta",
    "wselfallflag": 0,
    "chemflag": 0,
    "bzeroflag": 0,
    "quadraticflag": 0,
    },
"CALCULATOR":
    {
    "calculator": "LAMMPSSNAP",
    "energy": 1,
    "force": 1,
    "stress": 1
    },
"SOLVER":
    {
    "solver": "SVD"
    },
"OUTFILE":
    {
    "metrics": "Ta_metrics.md",
    "potential": "Ta_pot"
    },
"REFERENCE":
    {
    "units": "metal",
    "atom_style": "atomic",
    "pair_style": "hybrid/overlay zero 10.0 zbl 4.0 4.8",
    "pair_coeff1": "* * zero",
    "pair_coeff2": "* * zbl 73 73"
    }
}

In [8]:
# Make a fitsnap instance.
fs = FitSnap(settings, arglist=["--overwrite"])

In [9]:
# When dealing with groups, it is best to use the `group_table`.
# First make a dictionary of settings for each group, which contains:
# - a key called "group_sections" with a list that names the columns of the table.
# - keys of group names where each key contains a list of column data.

group_settings = {
    "group_sections": ["training_size", "testing_size", "eweight", "fweight", "vweight"],
    "Displaced_A15" :  [0.8,    0.2,      100,            1,               1.00E-05],
    "Displaced_BCC" :  [0.8,    0.2,       100,             1,             1.00E-05],
    "Displaced_FCC" :  [0.8,    0.2,       100,             1,             1.00E-05],
    "Elastic_BCC"   :  [0.8,    0.2,     1.00E-08,        1.00E-08,        1.00E-05],
    "Elastic_FCC"   :  [0.8,    0.2,     1.00E-09,        1.00E-09,        1.00E-05],
    "GSF_110"       :  [0.8,    0.2,      100,             1,              1.00E-05],
    "GSF_112"       :  [0.8,    0.2,      100,             1,              1.00E-05],
    "Liquid"        :  [0.8,    0.2,       4.67E+02,        1,             1.00E-05],
    "Surface"       :  [0.8,    0.2,       100,             1,             1.00E-05],
    "Volume_A15"    :  [0.8,    0.2,      1.00E+00,        1.00E-09,       1.00E-05],
    "Volume_BCC"    :  [0.8,    0.2,      1.00E+00,        1.00E-09,       1.00E-05],
    "Volume_FCC"    :  [0.8,    0.2,      1.00E+00,        1.00E-09,       1.00E-05]
    }

In [10]:
group_table = make_table(group_settings)

Displaced_A15 [0.8, 0.2, 100, 1, 1e-05]
Displaced_BCC [0.8, 0.2, 100, 1, 1e-05]
Displaced_FCC [0.8, 0.2, 100, 1, 1e-05]
Elastic_BCC [0.8, 0.2, 1e-08, 1e-08, 1e-05]
Elastic_FCC [0.8, 0.2, 1e-09, 1e-09, 1e-05]
GSF_110 [0.8, 0.2, 100, 1, 1e-05]
GSF_112 [0.8, 0.2, 100, 1, 1e-05]
Liquid [0.8, 0.2, 467.0, 1, 1e-05]
Surface [0.8, 0.2, 100, 1, 1e-05]
Volume_A15 [0.8, 0.2, 1.0, 1e-09, 1e-05]
Volume_BCC [0.8, 0.2, 1.0, 1e-09, 1e-05]
Volume_FCC [0.8, 0.2, 1.0, 1e-09, 1e-05]


In [12]:
# Make ASE frames for each group; do this however you want, we simply read from filenames that share
# group names here.
for name in group_table:
    frames = read(f"examples/Ta_XYZ/XYZ/{name}.xyz", ":")
    group_table[name]["frames"] = frames
    group_table[name]["nconfigs"] = len(frames)

In [18]:
# Inject group data into the fitsnap list of data dictionaries.
data = ase_scraper(group_table)
print(f"Found {len(data)} configurations")

Found 363 configurations


In [14]:
# Calculate descriptors for all configurations.
fs.process_configs(data)

# Perform a fit.
fs.solver.perform_fit()

# Analyze error metrics.
fs.solver.error_analysis()

'process_configs' took 1736.78 ms on rank 0


  rsq = 1 - ssr / np.sum(np.square(g['truths'] - (g['truths'] / nconfig).sum()))
  w_rsq = 1 - w_ssr / np.sum(np.square((g['weights'] * g['truths']) - (g['weights'] * g['truths'] / w_nconfig).sum()))
  rsq = 1 - ssr / np.sum(np.square(g['truths'] - (g['truths'] / nconfig).sum()))
  w_rsq = 1 - w_ssr / np.sum(np.square((g['weights'] * g['truths']) - (g['weights'] * g['truths'] / w_nconfig).sum()))
  rsq = 1 - ssr / np.sum(np.square(g['truths'] - (g['truths'] / nconfig).sum()))
  w_rsq = 1 - w_ssr / np.sum(np.square((g['weights'] * g['truths']) - (g['weights'] * g['truths'] / w_nconfig).sum()))
  rsq = 1 - ssr / np.sum(np.square(g['truths'] - (g['truths'] / nconfig).sum()))
  w_rsq = 1 - w_ssr / np.sum(np.square((g['weights'] * g['truths']) - (g['weights'] * g['truths'] / w_nconfig).sum()))


In [15]:
# Dataframe of detailed errors per group.
print(fs.solver.errors)

                                          ncount           mae          rmse  \
Group      Weighting  Testing  Subsystem                                       
*ALL       Unweighted Training Energy        298  6.176049e-02  2.080920e-01   
                               Force       10878  1.067931e-01  2.191925e-01   
                               Stress       1788  1.242705e+04  3.110458e+04   
                      Testing  Energy         65  4.909511e-02  1.072730e-01   
                               Force        1794  7.241899e-02  1.447309e-01   
...                                          ...           ...           ...   
Volume_FCC weighted   Training Force         300  7.616755e-24  1.562500e-23   
                               Stress        150  4.301890e-01  7.727505e-01   
                      Testing  Energy          6  2.214104e-01  2.344502e-01   
                               Force          72  4.825520e-24  7.696636e-24   
                               Stress   

# Convert to JARVIS Atoms format

In [16]:
from jarvis.core.atoms import Atoms as JAtoms
def fs_to_jatoms(fs_entry=[]):
  elements=fs_entry['AtomTypes']
  cart_coords=fs_entry['Positions']
  lattice_mat=fs_entry['Lattice']
  atoms=JAtoms(elements=elements,coords=cart_coords,lattice_mat=lattice_mat,cartesian=True)
  return atoms



In [23]:
atms = fs_to_jatoms(data[0])

In [24]:
atms

Ta64
1.0
10.6000003815 6.490628269082348e-16 6.490628269082348e-16
0.0 10.6000003815 6.490628269082347e-16
0.0 0.0 10.6000003815
Ta
64
Cartesian
10.54497 10.54551 10.5956
6.561260000000001 7.9104 0.04457
9.26485 7.955080000000001 10.58594
5.26103 6.62842 2.63265
5.289550000000001 9.23968 2.5673
8.00221 5.33276 1.34132
7.91864 5.28423 3.97891
5.27037 0.011770000000000325 5.31967
7.975770000000001 2.5884600000000004 7.90284
6.67111 2.7094500000000004 5.31433
9.29025 2.5810500000000003 5.29066
5.2532000000000005 1.2679500000000004 7.91799
5.327030000000001 4.047470000000001 7.87622
7.899100000000001 10.56576 6.68405
8.02116 8.00301 2.56791
8.01539 10.54873 9.32177
2.717800000000001 8.02017 8.01996
1.2875500000000006 7.95382 5.35947
3.981490000000001 8.00712 5.27935
0.047710000000000904 6.701300000000001 8.02541
10.53454 9.27538 7.94586
2.603840000000001 5.37826 6.65237
2.6502600000000007 5.257820000000001 9.27591
5.30975 5.33548 5.25863
7.9815200000000015 7.931470000000001 7.94727
6.58336

Convert to JARVIS Atoms from the FitSNAP `data` list.

In [26]:
from jarvis.db.jsonutils import dumpjson
mem=[]
for ii,i in enumerate(data):
  ta_id='Ta_fit_'+str(ii)
  atms = fs_to_jatoms(i)
  info={}
  info['atoms']=atms.to_dict()
  info['id']=ta_id
  info['energy']=i['Energy']
  info['energy_per_atom']=i['Energy']/atms.num_atoms
  info['forces']=i['Forces'].tolist()
  info['stress']=i['Stress'].tolist()
  mem.append(info)



In [27]:
dumpjson(data=mem,filename='ta_fitsnap.json')

In [28]:
!ls

docs	  fitsnap3lib  pyproject.toml  ta_fitsnap.json	tutorial.ipynb
examples  LICENSE      README.md       tests
fitsnap3  log.lammps   setup.cfg       tools
