# Example of from DFTTK to ESPEI

The following code snippet will take ESPEI datasets from a QHA database, optionally writing the (nicely named) files to dict.<br/>

### Add phase name and sublattice information into metadata

The QHA database requires the following metadata schema:

```json
{
metadata_dict={
    'phase_name': 'FCC_A1',
    'tag': 'ed447049-ad67-4090-ba99-378188d3416b',
    'sublattice':{
        'configuration': [['Cr', 'Ni']],
        'occupancies': [[0.03125, 0.96875]]
    },
}
}
```

dfttk `PRLStructure` function will automatically anlyze the structure and get sublattice configuration, sublattice occupancies, and sublattice site ratios for the structure. You can also define sublattice information manually.

If metadata does not contain sublattice information and the phase name, you can add these information by following input and script<br/>
#### Input (can be edit)
Phase name, metadata tag for targeted result, absolute path for db.json file to connect to the MongoDB, and the collection in the MongoDB database.

In [1]:
phase_name='BCC_B2'
metadata_tag='e645e352-a307-49d5-9ff8-cfb3bf868297'
db_file='/Users/thea/Desktop/dfttk/config/db.json'
collection='qha_phonon'

#### Script

In [2]:
from pymatgen.core import Structure
from dfttk import PRLStructure
from atomate.vasp.database import VaspCalcDb
import json
import os

vasp_db = VaspCalcDb.from_db_file(db_file, admin=True)
result_db=vasp_db.db[collection]
tag_results=result_db.find_one({"metadata.tag": metadata_tag})
structure=Structure.from_dict(tag_results['structure'])
ps = PRLStructure.from_structure(structure)
subl_configuration=ps.sublattice_configuration
subl_occ=ps.sublattice_occupancies
subl_ratios=ps.sublattice_site_ratios

  from tqdm.autonotebook import tqdm


In [3]:
metadata_dict={
    'phase_name': phase_name,
    'tag': metadata_tag,
    'sublattice':{
        'configuration': subl_configuration,
        'occupancies': subl_occ
    }
}

In [4]:
result_db.find_one_and_update({'metadata.tag': metadata_tag},{ '$set': { 'metadata' : metadata_dict}}, upsert=True)

{'_id': ObjectId('61bf6bc6847c4c530d8c2193'),
 'structure': {'@module': 'pymatgen.core.structure',
  '@class': 'Structure',
  'charge': None,
  'lattice': {'matrix': [[3.115615, 0.0, 0.0],
    [0.0, 3.115615, 0.0],
    [0.0, 0.0, 3.115615]],
   'a': 3.115615,
   'b': 3.115615,
   'c': 3.115615,
   'alpha': 90.0,
   'beta': 90.0,
   'gamma': 90.0,
   'volume': 30.24345185987023},
  'sites': [{'species': [{'element': 'Al', 'occu': 1}],
    'abc': [0.0, 0.0, 0.0],
    'xyz': [0.0, 0.0, 0.0],
    'label': 'Al',
    'properties': {'magmom': -0.0}},
   {'species': [{'element': 'Ni', 'occu': 1}],
    'abc': [0.5, 0.5, 0.5],
    'xyz': [1.5578075, 1.5578075, 1.5578075],
    'label': 'Ni',
    'properties': {'magmom': -0.0}}]},
 'formula_pretty': 'AlNi',
 'elements': ['Al', 'Ni'],
 'metadata': {'phase_name': 'BCC_B2',
  'tag': 'e645e352-a307-49d5-9ff8-cfb3bf868297',
  'sublattice': {'configuration': [['AL', 'NI']],
   'occupancies': [[0.5, 0.5]]}},
 'has_phonon': True,
 'phonon': {'pressure': 0

### ESPEI datasets from a QHA database

Get ESPEI datasets from MongoDB database by following input and script

#### Input (can be edit)
Absolute path for db.json file to connect to the MongoDB, and the collection in the MongoDB database, metadata tag for reference states, phase name, configuration to find, and sublattice site ratios.

In [5]:
db_file='/Users/thea/Desktop/dfttk/config/db.json'
collection='qha_phonon'
refstate_tags = {
    'Al': '3c01629a-b287-41f5-8826-d77912f54177',
    'Ni': '8427ba90-3181-4632-aa49-5823d4aa5082'
}
phase_name = 'BCC_B2'
configuration_to_find = subl_configuration # from previous PRLStucture function, or you can input manually
sublattice_site_ratios = subl_ratios # from previous PRLStucture function, or you can input manually
WRITE_FILES=1 # write output to json file
temperature_index = 59 # index of 300 K temperature (close to 298 K), found by hand

#### Script

In [6]:
from dfttk.analysis.formation_energies import get_formation_energy, get_thermal_props
from dfttk.espei_compat import make_dataset, dfttk_config_to_espei, dfttk_occupancies_to_espei
from pymatgen.core import Structure
import numpy as np
from dfttk.utils import recursive_flatten
import json
from pymongo import MongoClient

Get energies for reference states, energies are J/mol-atom

In [7]:
vasp_db = VaspCalcDb.from_db_file(db_file, admin=True)
coll=vasp_db.db[collection]
refstate_energies = {}
for el, tag in refstate_tags.items():
    qha_result = coll.find_one({'metadata.tag': tag})
    refstate_energies[el] = get_thermal_props(qha_result)

Get energies for targeted phase

In [9]:
configs     = []
occupancies = []
hm_values   = []
sm_values   = []
cpm_values  = []
fixed_conds = {'P': 101325, 'T': 0}
temp_conds = {'P': 101325, 'T': 0}
for qha_res in coll.find({'metadata.sublattice.configuration': configuration_to_find, 'metadata.phase_name': phase_name}):
    configs.append(qha_res['metadata']['sublattice']['configuration'])
    occupancies.append(qha_res['metadata']['sublattice']['occupancies'])
    
    tprops = get_thermal_props(qha_res)
    struct = Structure.from_dict(qha_res['structure'])
    hm_form = get_formation_energy(tprops, struct, refstate_energies, 'HM', idx=temperature_index)
    sm_form = get_formation_energy(tprops, struct, refstate_energies, 'SM', idx=temperature_index)
    cpm_form = get_formation_energy(tprops, struct, refstate_energies, 'CPM', thin=10)[:-2]
    fixed_temp = tprops['T'][temperature_index]
    cpm_temps = tprops['T'][::10][:-2]

    hm_values.append(hm_form)
    sm_values.append(sm_form)
    cpm_values.append(cpm_form)
fixed_conds['T'] = fixed_temp.tolist()
temp_conds['T'] = cpm_temps.tolist()

# make the HM, SM, CPM values arrays of the proper shape
hm_values = np.array([[hm_values]])
sm_values = np.array([[sm_values]])
cpm_values = np.array(cpm_values).T[np.newaxis, ...]

if WRITE_FILES:
    # write JSON files
    comps = [c.upper() for c in sorted(recursive_flatten(configuration_to_find))]
    for prop, vals, conds in [('HM_FORM', hm_values, fixed_conds), ('SM_FORM', sm_values, fixed_conds), ('CPM_FORM', cpm_values, temp_conds)]:
        ds = make_dataset(phase_name, prop, sublattice_site_ratios, configs, conds, vals, occupancies=occupancies, tag=tag)
        with open('{}-{}-{}-DFTTK.json'.format('-'.join(comps), phase_name, prop), 'w') as fp:
            json.dump(ds, fp)