#### Encode the ground truth latent space of Ag 

In [1]:
from pymatgen.core import Structure, Lattice
from pymatgen.io.ase import AseAtomsAdaptor
from chgnet.model import StructOptimizer

relaxer = StructOptimizer()

CHGNet initialized with 400,438 parameters
CHGNet will run on cpu




In [2]:
# Reference structure of Ag from Materials Project, id=mp-124
ag_structure = Structure.from_file("/mnt/c/Users/Lenovo/Downloads/cdvae2/structures_GA/ag_cif/Ag_124.cif")
ag_atoms = AseAtomsAdaptor.get_atoms(ag_structure)

# Reference structure of Ag from Materials Project, id=mp-8566
ag_structure1 = Structure.from_file("/mnt/c/Users/Lenovo/Downloads/cdvae2/structures_GA/ag_cif/Ag_8566.cif")
ag_atoms1 = AseAtomsAdaptor.get_atoms(ag_structure1)

# Reference structure of Ag from Materials Project, id=mp-10597
ag_structure2 = Structure.from_file("/mnt/c/Users/Lenovo/Downloads/cdvae2/structures_GA/ag_cif/Ag_10597.cif")
ag_atoms2 = AseAtomsAdaptor.get_atoms(ag_structure2)

# Reference structure of Ag from Materials Project, id=mp-989737
ag_structure3 = Structure.from_file("/mnt/c/Users/Lenovo/Downloads/cdvae2/structures_GA/ag_cif/Ag_989737.cif")
ag_atoms3 = AseAtomsAdaptor.get_atoms(ag_structure3)

# Reference structure of Ag from Materials Project, id=mp-2646971
ag_structure4 = Structure.from_file("/mnt/c/Users/Lenovo/Downloads/cdvae2/structures_GA/ag_cif/Ag_2646971.cif")
ag_atoms4 = AseAtomsAdaptor.get_atoms(ag_structure4)



In [3]:
b_ref = relaxer.calculator.model.graph_converter(ag_structure)
h_ref = relaxer.calculator.model.forward([b_ref], return_crystal_feas=True)["crystal_fea"][0]

In [4]:
from ase.calculators.singlepoint import SinglePointCalculator
from ase.ga import set_raw_score
import io
from ase.constraints import ExpCellFilter
import contextlib
from ase.optimize import FIRE
import ase
from ase.build import niggli_reduce
from torch.nn import functional as F

def atoms_to_structure(atoms):
    lattice = Lattice(atoms.cell)
    symbols = atoms.get_chemical_symbols()
    positions = atoms.get_positions()
    return Structure(lattice, symbols, positions, coords_are_cartesian=True)

def finalize(atoms, energy=None, forces=None, stress=None):
    # Finalizes the atoms by attaching a SinglePointCalculator
    # and setting the raw score as the negative of the total energy
    atoms.wrap()
    calc = SinglePointCalculator(atoms, energy=energy, forces=forces,
                                 stress=stress)
    atoms.calc = calc
    raw_score = -atoms.get_potential_energy()
    set_raw_score(atoms, raw_score)

def relax(atoms, cellbounds=None):
    atoms.calc = relaxer.calculator  # assign model used to predict forces

    converged = False
    niter = 0
    stream = io.StringIO()
    with contextlib.redirect_stdout(stream):
        while not converged and niter < 10:
            if cellbounds is not None:
                cell = atoms.get_cell()
                if not cellbounds.is_within_bounds(cell):
                    niggli_reduce(atoms)
                cell = atoms.get_cell()
                if not cellbounds.is_within_bounds(cell):
                    # Niggli reduction did not bring the unit cell
                    # within the specified bounds; this candidate should
                    # be discarded so we set an absurdly high energy
                    finalize(atoms, 1e9)
                    return
                
            ecf = ExpCellFilter(atoms)
            dyn = FIRE(ecf, maxstep=0.2, logfile=None, trajectory=None)
            dyn.run(fmax=1e-3, steps=100)

            converged = dyn.converged()
            niter += 1
    
    dyn = FIRE(atoms, maxstep=0.2, logfile=None, trajectory=None)
    dyn.run(fmax=1e-2, steps=100)

    e = atoms.get_potential_energy()
    f = atoms.get_forces()
    s = atoms.get_stress()

    b2 = relaxer.calculator.model.graph_converter(atoms_to_structure(atoms))
    h2 = relaxer.calculator.model.forward([b2], return_crystal_feas=True)["crystal_fea"][0]
    cos_sim = float(F.cosine_similarity(h_ref, h2, dim=0, eps=1e-8))

    finalize(atoms, energy=-cos_sim, forces=f, stress=s)

In [8]:
from pathlib import Path

Path('Ag.db').unlink()

In [9]:
from ase import Atoms
from ase.data import atomic_numbers
from ase.ga.utilities import closest_distances_generator, CellBounds
from ase.ga.startgenerator import StartGenerator
from ase.ga.data import PrepareDB

# Number of random initial structures to generate
N = 20

# Target cell volume for the initial structures, in angstrom^3
volume = 69

natoms = 4

blocks = ['Ag'] * natoms

# Define the composition of the atoms to optimize
Z = atomic_numbers['Ag']
blmin = closest_distances_generator(atom_numbers=[Z],
                                    ratio_of_covalent_radii=0.5)

# Specify reasonable bounds on the minimal and maximal
# cell vector lengths (in angstrom) and angles (in degrees)
cellbounds = CellBounds(bounds={'phi': [35, 145], 'chi': [35, 145],
                                'psi': [35, 145], 'a': [2, 50],
                                'b': [2, 50], 'c': [2, 50]})

# Choose an (optional) 'cell splitting' scheme which basically
# controls the level of translational symmetry (within the unit
# cell) of the randomly generated structures. Here a 1:1 ratio
# of splitting factors 2 and 1 is used:
splits = {(2,): 1, (1,): 1}
# There will hence be a 50% probability that a candidate
# is constructed by repeating an randomly generated Ag12
# structure along a randomly chosen axis. In the other 50%
# of cases, no cell cell splitting will be applied.

# The 'slab' object in the GA serves as a template
# in the creation of new structures, which inherit
# the slab's atomic positions (if any), cell vectors
# (if specified), and periodic boundary conditions.
# Here only the last property is relevant:
slab = Atoms('', pbc=True)

# Initialize the random structure generator
sg = StartGenerator(slab, blocks, blmin, box_volume=volume,
                    number_of_variable_cell_vectors=3,
                    cellbounds=cellbounds, splits=splits)

# Create the database
da = PrepareDB(db_file_name='Ag.db',
               stoichiometry=[Z] * natoms)

# Generate N random structures
# and add them to the database
for i in range(N):
    a = sg.get_new_candidate()
    da.add_unrelaxed_candidate(a)

In [10]:
from ase.io import write
from ase.ga import get_raw_score
from ase.ga.data import DataConnection
from ase.ga.population import Population
from ase.ga.utilities import closest_distances_generator, CellBounds
from ase.ga.ofp_comparator import OFPComparator
from ase.ga.offspring_creator import OperationSelector
from ase.ga.standardmutations import StrainMutation, MirrorMutation
from ase.ga.soft_mutation import SoftMutation
from ase.ga.cutandsplicepairing import CutAndSplicePairing
import numpy as np

# Connect to the database and retrieve some information
da = DataConnection('Ag.db')
slab = da.get_slab()
atom_numbers_to_optimize = da.get_atom_numbers_to_optimize()
n_top = len(atom_numbers_to_optimize)

# Use Oganov's fingerprint functions to decide whether
# two structures are identical or not
comp = OFPComparator(n_top=n_top, dE=1.0,
                     cos_dist_max=1e-3, rcut=10., binwidth=0.05,
                     pbc=[True, True, True], sigma=0.05, nsigma=4,
                     recalculate=False)

# Define the cell and interatomic distance bounds
# that the candidates must obey
blmin = closest_distances_generator(atom_numbers_to_optimize, 0.5)

cellbounds = CellBounds(bounds={'phi': [35, 145], 'chi': [35, 145],
                                'psi': [35, 145], 'a': [2, 50],
                                'b': [2, 50], 'c': [2, 50]})

# Define a pairing operator with 100% (0%) chance that the first
# (second) parent will be randomly translated, and with each parent
# contributing to at least 15% of the child's scaled coordinates
pairing = CutAndSplicePairing(slab, n_top, blmin, p1=1., p2=0., minfrac=0.15,
                              number_of_variable_cell_vectors=3,
                              cellbounds=cellbounds, use_tags=False)

# Define a strain mutation with a typical standard deviation of 0.7
# for the strain matrix elements (drawn from a normal distribution)
strainmut = StrainMutation(blmin, stddev=0.7, cellbounds=cellbounds,
                           number_of_variable_cell_vectors=3,
                           use_tags=False)

# Define a soft mutation; we need to provide a dictionary with
# (typically rather short) minimal interatomic distances which
# is used to determine when to stop displacing the atoms along
# the chosen mode. The minimal and maximal single-atom displacement
# distances (in Angstrom) for a valid mutation are provided via
# the 'bounds' keyword argument.
blmin_soft = closest_distances_generator(atom_numbers_to_optimize, 0.1)
softmut = SoftMutation(blmin_soft, bounds=[2., 5.], use_tags=False)
# By default, the operator will update a "used_modes.json" file
# after every mutation, listing which modes have been used so far
# for each structure in the database. The mode indices start at 3
# as the three lowest frequency modes are translational modes.

# Set up the relative probabilities for the different operators
operators = OperationSelector([4., 3., 3.],
                              [pairing, MirrorMutation(blmin, n_top), strainmut])

# Relax the initial candidates
while da.get_number_of_unrelaxed_candidates() > 0:
    a = da.get_an_unrelaxed_candidate()

    relax(a, cellbounds=cellbounds)
    da.add_relaxed_step(a)
    print(get_raw_score(a))

    cell = a.get_cell()
    if not cellbounds.is_within_bounds(cell):
        da.kill_candidate(a.info['confid'])

# Initialize the population
population_size = 20
population = Population(data_connection=da,
                        population_size=population_size,
                        comparator=comp,
                        use_extinct=True)

# Update the scaling volume used in some operators
# based on a number of the best candidates
current_pop = population.get_current_population()
strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)
pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)

# Test n_to_test new candidates; in this example we need
# only few GA iterations as the global minimum (FCC Ag)
# is very easily found (typically already after relaxation
# of the initial random structures).
n_to_test = 20

for step in range(n_to_test):
    print('Now starting configuration number {0}'.format(step))

    # Create a new candidate
    a3 = None
    while a3 is None:
        a1, a2 = population.get_two_candidates()
        a3, desc = operators.get_new_individual([a1, a2])

    a3.set_pbc(np.array([True, True, True]))
    # Save the unrelaxed candidate
    da.add_unrelaxed_candidate(a3, description=desc)

    # Relax the new candidate and save it
    relax(a3, cellbounds=cellbounds)
    da.add_relaxed_step(a3)

    # If the relaxation has changed the cell parameters
    # beyond the bounds we disregard it in the population
    cell = a3.get_cell()
    if not cellbounds.is_within_bounds(cell):
        da.kill_candidate(a3.info['confid'])

    # Update the population
    population.update()

    current_pop = population.get_current_population()
    print('Step %d %s %.3f %.3f %.3f' % (step, desc, get_raw_score(a1), get_raw_score(a2), get_raw_score(a3)))
    print('Step %d highest raw score in pop: %.3f' % (step, get_raw_score(current_pop[0])))

    if step % 10 == 0:
        # Update the scaling volumes of the strain mutation
        # and the pairing operator based on the current
        # best structures contained in the population
        strainmut.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)
        pairing.update_scaling_volume(current_pop, w_adapt=0.5, n_adapt=4)
        write('current_population.traj', current_pop)

print('GA finished after step %d' % step)
hiscore = get_raw_score(current_pop[0])
print('Highest raw score = %8.4f eV' % hiscore)

all_candidates = da.get_all_relaxed_candidates()
write('all_candidates.traj', all_candidates)

current_pop = population.get_current_population()
write('current_population.traj', current_pop)

0.9990436434745789
0.9990450143814087
0.9990445375442505
0.9990602731704712
0.9990612268447876
0.9990618228912354
0.9990614652633667
0.9990608096122742
0.9990444183349609
0.9990606904029846
0.9990614056587219
0.998017430305481
0.9990612864494324
0.9990450143814087
0.9990613460540771
0.9990441203117371
0.9904617071151733
0.9990613460540771
0.9990612268447876
0.9980184435844421
Now starting configuration number 0
Step 0 mutation: mirror 0.999 0.999 0.999
Step 0 highest raw score in pop: 0.999
Now starting configuration number 1
Step 1 mutation: strain 0.999 0.998 0.999
Step 1 highest raw score in pop: 0.999
Now starting configuration number 2
Step 2 pairing: 21 3 0.998 0.999 0.999
Step 2 highest raw score in pop: 0.999
Now starting configuration number 3
Step 3 pairing: 18 21 0.990 0.998 0.998
Step 3 highest raw score in pop: 0.999
Now starting configuration number 4
Step 4 pairing: 48 4 0.998 0.999 0.999
Step 4 highest raw score in pop: 0.999
Now starting configuration number 5
Step 5 p

In [14]:
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.analysis.structure_matcher import StructureMatcher
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.core import Structure

def is_match(atoms, symprec=0.001):
    s = AseAtomsAdaptor.get_structure(atoms)
    sga = SpacegroupAnalyzer(s, symprec=symprec)
    ps = sga.get_conventional_standard_structure()

    sm = StructureMatcher(scale=False, primitive_cell=False)
    print(sm.fit(ag_structure, ps), sm.get_rms_dist(ag_structure, ps))
    
is_match(current_pop[0])

True (0.0, 0.0)


In [15]:
# Check if there is a hit in the current population
for x in current_pop:
    print('raw score : %.5f' % get_raw_score(x))
    is_match(x)

raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99905
False None
raw score : 0.99904
False None
raw score : 0.99810
False None
raw score : 0.99802
False None
raw score : 0.99580
False None
raw score : 0.99046
False None


In [16]:
# Check if there is a hit in all candidates
for x in all_candidates:
    print('raw score : %.5f' % get_raw_score(x))
    is_match(x)

raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99906
False None
raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99906
False None
raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99906
False None
raw score : 0.99906
True (0.00011564875612286381, 0.00011564875612286381)
raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99906
False None
raw score : 0.99906
True (0.00012548899521450137, 0.0001254889952145866)
raw score : 0.99906
False None
raw score : 0.99906
False None
raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99906
True (0.0001198963650362492, 0.0001198963650363344)
raw score : 0.99906
True (0.0, 0.0)
raw score : 0.99906
False None
raw score : 0.99905
False None
raw score : 0.99905
False None
raw score : 0.99905
False None
raw score : 0.99904
False None
raw score : 0.99904
False None
raw score : 0.99904
False None
raw score : 0.9990

In [17]:
a = all_candidates[7]
s = AseAtomsAdaptor.get_structure(a)
sga = SpacegroupAnalyzer(s, symprec=0.001)
ps = sga.get_conventional_standard_structure()

# If conventional structure of the found structure match the ground true structure
sm = StructureMatcher(scale=False, primitive_cell=False)
print(sm.fit(ag_structure, ps), sm.get_rms_dist(ag_structure, ps))
print(get_raw_score(a))

b2 = relaxer.calculator.model.graph_converter(s)
h2 = relaxer.calculator.model.forward([b2], return_crystal_feas=True)["crystal_fea"][0]
print(float(F.cosine_similarity(h_ref, h2, dim=0, eps=1e-8)))

# Check the cosine similarity of the conventional structure with the ground true structure
b2 = relaxer.calculator.model.graph_converter(ps)
h2 = relaxer.calculator.model.forward([b2], return_crystal_feas=True)["crystal_fea"][0]
print(float(F.cosine_similarity(h_ref, h2, dim=0, eps=1e-8)))

# Check the cosine similarity of the primitive structure with the ground true structure
b2 = relaxer.calculator.model.graph_converter(sga.get_primitive_standard_structure())
h2 = relaxer.calculator.model.forward([b2], return_crystal_feas=True)["crystal_fea"][0]
print(float(F.cosine_similarity(h_ref, h2, dim=0, eps=1e-8)))

True (0.0, 0.0)
0.9990612864494324
0.9990612864494324
0.9990611672401428
0.9990613460540771


In [18]:
# Try invariance with respect to periodicity
supercell = ag_structure.copy() * [1, 2, 2]

b2 = relaxer.calculator.model.graph_converter(supercell)
h2 = relaxer.calculator.model.forward([b2], return_crystal_feas=True)["crystal_fea"][0]
print(float(F.cosine_similarity(h_ref, h2, dim=0, eps=1e-8)))

1.0


In [19]:
# Try invariance with respect to translation
supercell = ag_structure.copy()
supercell.translate_sites(list(range(len(supercell))), [0.333333, 0.66666666, 0.000001])

b2 = relaxer.calculator.model.graph_converter(supercell)
h2 = relaxer.calculator.model.forward([b2], return_crystal_feas=True)["crystal_fea"][0]
print(float(F.cosine_similarity(h_ref, h2, dim=0, eps=1e-8)))

1.0


In [20]:
# Try invariance with respect to rotation

from pymatgen.core.operations import SymmOp

op = SymmOp.from_origin_axis_angle(
    origin=[0, 0, 0],
    axis=[1, 0, 0],
    angle=45
)
supercell = ag_structure.copy()
supercell = supercell.apply_operation(op)
b2 = relaxer.calculator.model.graph_converter(supercell)
h2 = relaxer.calculator.model.forward([b2], return_crystal_feas=True)["crystal_fea"][0]
print(float(F.cosine_similarity(h_ref, h2, dim=0)))

1.0


In [21]:
b1 = relaxer.calculator.model.graph_converter(sga.get_primitive_standard_structure())
b2 = relaxer.calculator.model.graph_converter(sga.get_conventional_standard_structure())
h1 = relaxer.calculator.model.forward([b1], return_crystal_feas=True)["crystal_fea"][0]
h2 = relaxer.calculator.model.forward([b2], return_crystal_feas=True)["crystal_fea"][0]
print(float(F.cosine_similarity(h1, h2, dim=0)))

1.0
