# SCC-DFTB repulsive potential generation (Manybody)

Input: DFTB_DB.db DFTB_DB.db CCS_DB.db 

Output: PiNN model to be used in conjunction with SCC-DFTB+CCS+PiNN calculations. 

In [None]:
import os
PARAUTOMATIK_PATH='WRITE-Your-PATH-PARAUTOMATIK-Here'   # Add your installation path here
# CPU is used for test generation. Production runs should use GPU, it is way faster
os.environ['CUDA_VISIBLE_DEVICES'] = ''   # Add you CUD-device here. Leave blank for GPU- 

In [None]:
import os, sys, warnings
import tensorflow as tf
from glob import glob
from collections import OrderedDict, defaultdict
from ase.collections import g2
import pinn
from pinn.io import load_ase, sparse_batch
import ase.db
from rep_parautomatik import generate_pinndata



base_dir=os.getcwd()
os.chdir(base_dir)
print('Base dir:', base_dir)
sys.path.append(PARAUTOMATIK_PATH+'/pgm/python_scripts')


# PINN heavily use indexed slices to do sparse summations,
# which causes tensorflow to complain,
# we believe it's safe to ignore this warning.
index_warning = 'Converting sparse IndexedSlices'
warnings.filterwarnings('ignore', index_warning)

# Generate the dataset 
Here we use DFT_DB.db, DFTB_DB.db and CCS_DB.db create the training set for the training of the machine learning potential. The outpout of this step is trainset.xyz 
trainset.xyz is the traingset for the SCC-DFTB+CCS+PiNN model. 

In [None]:
# generate the data set 
os.chdir(base_dir)
if not os.path.isdir(base_dir+"/PiNN/"):
    os.mkdir(base_dir+"/PiNN/")

db_ccs=ase.db.connect("CCS_DB.db")     # CCS database
db_dft=ase.db.connect("DFT_DB.db")     # DFT database, original keys for structure
db_dftb=ase.db.connect("DFTB_DB.db")   # DFTB database  

os.chdir(base_dir+"/PiNN/")
generate_pinndata(db_dft,db_dftb,db_ccs)
os.chdir(base_dir)

In [2]:
os.chdir(base_dir)
os.chdir(base_dir+"/PiNN/")

from pinn.io import load_ase, write_tfrecord

filelist = 'trainset.xyz'
dataset = lambda: load_ase(filelist, splits={'train':8, 'test':2})
train = lambda: dataset()['train'].repeat().shuffle(1000).apply(sparse_batch(1))
test = lambda: dataset()['test'].apply(sparse_batch(1))
os.chdir(base_dir)

# Train PiNN 
Here we use the generated 80:20 training set to generate the PiNN model. 

In [None]:
# start tensorboard to enable us to display results from the training. Don't forget to update the web page after you started the training.  
os.chdir(base_dir)
%load_ext tensorboard
%tensorboard --logdir base_dir

In [None]:
#Input for the traing of the PiNN model
from pinn import get_model
import ase.db as db
os.chdir(base_dir)
# find unique elements in DataBase 
DFT_DB=db.connect("DFTB_DB.db")
elements=[]
for row in DFT_DB.select():
    structure=row.toatoms()
    a=structure.get_atomic_numbers()
    for i in a: 
        if i not in elements:
            elements.append(i)
print(elements)
os.chdir(base_dir+"/PiNN/")

#Input to PiNN 
params={'model_dir': './',
          'network': {
              'name': 'PiNet',
              'params': {
                  'pp_nodes': [8,8],
                  'pi_nodes':[8,8],
                  'ii_nodes': [8,8],
                  'basis_type': 'gaussian',
                  'n_basis': 5,
                  'depth': 4,
                  'rc':4.0,
                  'atom_types':elements
              },
          },
          'model': {
              'name': 'potential_model',
              'params': {
                  'use_force': True, 
                  'no_force_comp': 20,
                  'separate_errors': True
              }},
        'optimizer': {
    'class_name': 'EKF',
    'config': {
        'learning_rate': 0.001,
        'q_0': 0.000000,
        'q_min': 0.00000000,
    }}
}

from pinn.utils import init_params  

init_params(params, dataset()['train'])

model = get_model(params)

os.chdir(base_dir)

In [13]:
os.chdir(base_dir)
os.chdir(base_dir+"/PiNN/")
train_spec = tf.estimator.TrainSpec(input_fn=train, max_steps=250000)  # starting with 250000 steps. Check model and increase if necessary
eval_spec = tf.estimator.EvalSpec(input_fn=test)
os.chdir(base_dir)

In [None]:
#Train model
os.chdir(base_dir)
os.chdir(base_dir+"/PiNN/")
tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
os.chdir(base_dir)

# Generate PiNN_DB.DB for Model Analysis

Test model on DFT_DB.db structures and generate PiNN_DB.db. Run Model_Analysis.ipynb for checing the model. 

In [8]:
from ase import Atoms
from tqdm import tqdm 
from ase import io
from ase.io import read, write
import numpy as np
import pinn
from ase.calculators.mixing import LinearCombinationCalculator
import ase.db as db
from ase.db.row import atoms2dict
base_dir=os.getcwd()
os.chdir(base_dir)

PiNN_calc = pinn.get_calc(base_dir+'/PiNN/') 
def setup_model(**kwargs):
    calcs =[PiNN_calc]
    weights=[1]
    calc=LinearCombinationCalculator(calcs, weights) 
    return calc

In [None]:
dbname_DFTB='DFTB_DB.db'
dbname_PiNN='PiNN_DB.db'
db_dftb=db.connect(dbname_DFTB)
db_pinn=db.connect(dbname_PiNN)
for row in tqdm(db_dftb.select()):
    structure=row.toatoms()
    try: 
        structure.calc=setup_model()
        structure.get_potential_energy()
        structure.get_forces()
        structure.calc.results['forces']=np.array(structure.calc.results['forces'], np.float64)
        db_pinn.write(structure,key=row.key)
    except:
        print('Error')
os.chdir(base_dir) 