In [2]:
from ase.io.extxyz import read_xyz
import numpy as np
import json
import os

In [218]:
file = '/home/joe/mlip_test/gp_iter6_liqamo_lp.xyz'
db = list(read_xyz(file, index=slice(0,None)))
db_min = db[0:4]

In [21]:
def write_json_db(a, d="", force_name='dft_force', virial_name='dft_virial', energy_name='dft_energy',
                  weights_dict={}, weights_default=[100, 1, 1e-8]):
    """Writes a list of atoms to the MTP mlip-2 cfg database file format
    WARNING: incurs loss of array information other than positions and forces of atoms.
    Atoms.info dict is stored as features
    Params: set the key words for the forces, energy, and virials if they differ from the defaults above
    """
    d = os.path.join(d, "JSON")
    if not os.path.isdir(d):
        os.makedirs(d)

   # divide DB into list of lists based on config_type
    configs = []; types_list = []
    for i in a:
        if 'config_type' not in i.info.keys():
            i.info['config_type'] = 'default'
        if i.info['config_type'] not in types_list:
            types_list.append(i.info['config_type'])
            configs.append([])
            configs[-1].append(i)
        else:
            configs[types_list.index(i.info['config_type'])].append(i)

    type_lengths = [len(i) for i in configs]
    for i in types_list:
        if i not in weights_dict.keys():
            weights_dict.update({i: weights_default})

    for ci, i in enumerate(configs):
        path = os.path.join(d, types_list[ci])
        os.mkdir(path)
        no_stress_indices = []

        for ct, val in enumerate(i):
            Lattice = val.get_cell()
            det = np.linalg.det(Lattice)
            if det < 0:
                print('Warning, left-handed axes detected, skipping this config:')
                print(det, types_list[ci], 'No.', ct)
                type_lengths[ci] -= 1
                continue
            NumAtoms = len(val)
            AtomTypes = val.get_chemical_symbols()
            Positions = val.get_positions(wrap=True)

            if virial_name in val.info.keys() and 'no_stress' not in val.info['config_type']:
                if isinstance(val.info[virial_name], np.ndarray):
                    Stress = -1e4*np.reshape(val.info[virial_name], (3,3))/val.get_volume()
                    # Need to check that this is the correct form for the stress:
                    # currently as Castep outputs (i.e. non-virial (mechanical) stress, converted to Bar
            elif 'no_stress' not in val.info['config_type']:
                if (td := types_list[ci] + '_no_stress') not in types_list: # move to the relevant *no_stress folder
                    configs.append([])
                    type_lengths.append(0)
                    types_list.append(td)
                    weights_dict.update({td: weights_dict[val.info['config_type']][:-1] + [0]}) # set stress weight to 0
                val.info['config_type'] = td
                configs[(ti := types_list.index(td))].append(val)
                type_lengths[ci] -= 1
                type_lengths[ti] += 1
                continue
            else:
                Stress = np.zeros((3,3))

            Forces = val.get_array(force_name)
            Energy = val.info[energy_name]

            data = {"Dataset": {"Data": [{
                    "Stress": Stress.tolist(),
                    "Positions": Positions.tolist(),
                    "Energy": Energy.tolist(),
                    "AtomTypes": AtomTypes,
                    "Lattice": Lattice.tolist(),
                    "NumAtoms": NumAtoms,
                    "Forces": Forces.tolist(),
                    }],
                    "PositionsStyle": "angstrom", "AtomTypeStyle": "chemicalsymbol",
                    "Label": types_list[ci], "StressStyle": "bar",
                    "LatticeStyle": "angstrom", "EnergyStyle": "electronvolt",
                    "ForcesStyle": "electronvoltperangstrom"
                    }}

            with open(os.path.join(d, types_list[ci], val.info['config_type'] + '_{}.json'.format(str(ct))), 'w') as f:
                f.write('# Comment line\n')
                json.dump(data, f)

    with open(os.path.join(d[:-4] + 'grouplist'), 'w') as f:
        f.write('# name size eweight fweight vweight\n')
        for i, val in enumerate(types_list):
            if type_lengths[i] > 0:
                f.write('{:40s}     {:<6d}     {:<7.10f}     {:<7.10f}    {:<7.10f}\n'.format(val, type_lengths[i], *weights_dict[val]))

In [176]:
write_json_db(db)

In [4]:
os.chdir('/home/joe/Documents/Ge_od/Potentials/SNAP_pots/litDB_full_costbased_paper')


In [89]:
write_json_db(db_min)

['isolated_atom', 'liq']
[1]
[64, 64, 64]


In [5]:
file = '/home/joe/Documents/Ge_od/Structure_databases/gp_iter6_sparse9k.xml.xyz'
litDB_full = list(read_xyz(file, index=slice(0,None)))


In [22]:
write_json_db(litDB_full, weights_dict={'amorph':[2918, 21, 1e-8], 'liq':[2918, 21, 1e-8],
                                        'vacancy':[9308, 26, 1e-8], 'interstitial':[9308, 26, 1e-8]},
              weights_default=[2959, 62, 1e-5])

-328.14806848881676 decohesion No. 11
-325.2042066684052 decohesion No. 12
-336.2834397744042 decohesion No. 13
-354.1002531456985 decohesion No. 14
-358.28414584870234 decohesion No. 15
-353.528383869545 decohesion No. 16
-373.9481675578679 decohesion No. 17
-381.1629262665485 decohesion No. 18
-393.1966652384923 decohesion No. 19
-403.13971540917254 decohesion No. 20
-398.47840456477996 decohesion No. 21


In [227]:
write_json_db(litDB_full, d='sw0_001', weights_default=[100, 1, 1e-3])



-328.14806848881676 decohesion No. 11
-325.2042066684052 decohesion No. 12
-336.2834397744042 decohesion No. 13
-354.1002531456985 decohesion No. 14
-358.28414584870234 decohesion No. 15
-353.528383869545 decohesion No. 16
-373.9481675578679 decohesion No. 17
-381.1629262665485 decohesion No. 18
-393.1966652384923 decohesion No. 19
-403.13971540917254 decohesion No. 20
-398.47840456477996 decohesion No. 21


In [229]:
write_json_db(litDB_full, d='sw0_1', weights_default=[100, 1, 1e-1])

-328.14806848881676 decohesion No. 11
-325.2042066684052 decohesion No. 12
-336.2834397744042 decohesion No. 13
-354.1002531456985 decohesion No. 14
-358.28414584870234 decohesion No. 15
-353.528383869545 decohesion No. 16
-373.9481675578679 decohesion No. 17
-381.1629262665485 decohesion No. 18
-393.1966652384923 decohesion No. 19
-403.13971540917254 decohesion No. 20
-398.47840456477996 decohesion No. 21
