In [1]:
from hydra.experimental import compose
from hydra import initialize_config_dir
import hydra
from pathlib import Path
import numpy as np
import pandas as pd
import torch
from torch.nn import functional as F
import cdvae
import os

#### Load cdvae model for mp_20

In [2]:
os.environ["PROJECT_ROOT"] = "/mnt/c/Users/Lenovo/Downloads/cdvae2"
os.environ["HYDRA_JOBS"] = "/mnt/c/Users/Lenovo/Downloads/cdvae2"
os.environ["WABDB_DIR"] = "/mnt/c/Users/Lenovo/Downloads/cdvae2"

In [3]:
model_path = Path("/mnt/c/Users/Lenovo/Downloads/cdvae2/singlerun/2023-05-19/mp_20")

with initialize_config_dir(str(model_path)):
    ckpts = list(model_path.glob('epoch*.ckpt'))
    if len(ckpts) > 0:
        ckpt_epochs = np.array(
            [int(ckpt.parts[-1].split('-')[0].split('=')[1]) for ckpt in ckpts])
        ckpt = str(ckpts[ckpt_epochs.argsort()[-1]])
        
checkpoint = torch.load(ckpt, map_location=torch.device('cpu'))

gemnet_path = Path(cdvae.__file__).parent / "pl_modules/gemnet/gemnet-dT.json"
checkpoint["hyper_parameters"]["decoder"]["scale_file"] = str(gemnet_path)
ckpt = model_path / "checkpoint_edit.ckpt"
torch.save(checkpoint, model_path / "checkpoint_edit.ckpt")

In [4]:
with initialize_config_dir(str(model_path)):
    # load config
    cfg = compose(config_name='hparams')
    
    # load model
    model = hydra.utils.instantiate(
        cfg.model,
        optim=cfg.optim,
        data=cfg.data,
        logging=cfg.logging,
        _recursive_=False,
    )
    
    model = model.load_from_checkpoint(ckpt)
    model.lattice_scaler = torch.load(model_path / 'lattice_scaler.pt')
    model.scaler = torch.load(model_path / 'prop_scaler.pt')



#### Define functions to get a batch from an atom object

First load lattice and property scalers

In [5]:
lattice_scaler = model.lattice_scaler
scaler = model.scaler

Now define function that takes in an atom object and returns the batch

In [15]:
from cdvae.common.data_utils import build_crystal, add_scaled_lattice_prop
from torch_geometric.data import Data, Batch
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.core import Lattice, Structure
from pymatgen.analysis.graphs import StructureGraph
from pymatgen.analysis import local_env

CrystalNN = local_env.CrystalNN(
    distance_cutoffs=None, x_diff_weight=-1, porous_adjustment=False)

def atoms_to_structure(atoms):
    lattice = Lattice(atoms.cell)
    # lattice = Lattice.from_parameters(*atoms.cell.cellpar())
    symbols = atoms.get_chemical_symbols()
    positions = atoms.get_positions()
    return Structure(lattice, symbols, positions, coords_are_cartesian=True)

def build_crystal_graph(crystal, graph_method='crystalnn'):
    """
    """

    if graph_method == 'crystalnn':
        crystal_graph = StructureGraph.with_local_env_strategy(
            crystal, CrystalNN)
    elif graph_method == 'none':
        pass
    else:
        raise NotImplementedError

    frac_coords = crystal.frac_coords
    atom_types = crystal.atomic_numbers
    lattice_parameters = crystal.lattice.parameters
    lengths = lattice_parameters[:3]
    angles = lattice_parameters[3:]

    edge_indices, to_jimages = [], []
    if graph_method != 'none':
        for i, j, to_jimage in crystal_graph.graph.edges(data='to_jimage'):
            edge_indices.append([j, i])
            to_jimages.append(to_jimage)
            edge_indices.append([i, j])
            to_jimages.append(tuple(-tj for tj in to_jimage))

    atom_types = np.array(atom_types)
    lengths, angles = np.array(lengths), np.array(angles)
    edge_indices = np.array(edge_indices)
    to_jimages = np.array(to_jimages)
    num_atoms = atom_types.shape[0]

    return frac_coords, atom_types, lengths, angles, edge_indices, to_jimages, num_atoms

def process_one(atoms, graph_method="crystalnn", formation_energy_per_atom=0, material_id=0):
    crystal = atoms_to_structure(atoms)
    graph_arrays = build_crystal_graph(crystal, graph_method)
    return {
        'mp_id': material_id,
        'cif': crystal.to(fmt="cif"),
        'graph_arrays': graph_arrays,
        "formation_energy_per_atom": formation_energy_per_atom,
    }

def get_batch(atoms, **process_kwargs):
    d = [process_one(atoms, **process_kwargs)]
    add_scaled_lattice_prop(d, "scale_length")
    (frac_coords, atom_types, lengths, angles, edge_indices,
     to_jimages, num_atoms) = d[0]['graph_arrays']

    prop = scaler.transform(d[0]["formation_energy_per_atom"])
    data = Data(
        frac_coords=torch.Tensor(frac_coords),
        atom_types=torch.LongTensor(atom_types),
        lengths=torch.Tensor(lengths).view(1, -1),
        angles=torch.Tensor(angles).view(1, -1),
        edge_index=torch.LongTensor(
            edge_indices.T).contiguous(),  # shape (2, num_edges)
        to_jimages=torch.LongTensor(to_jimages),
        num_atoms=num_atoms,
        num_bonds=edge_indices.shape[0],
        num_nodes=num_atoms,  # special attribute used for batching in pytorch geometric
        y=prop.view(1, -1),
    )

    return Batch.from_data_list([data])

#### Ground truth latent space of Ag 
- Examing the cosine similarity has the ability of distinguish the different Ag structures encoded by CDVAE in latent space

In [7]:
from pymatgen.core import Structure, Lattice
from pymatgen.io.cif import CifFile

In [8]:
# Reference structure of Ag from Materials Project, id=mp-124
ag_cif = "# generated using pymatgen\ndata_Ag\n_symmetry_space_group_name_H-M   'P 1'\n_cell_length_a   4.10435636\n_cell_length_b   4.10435636\n_cell_length_c   4.10435636\n_cell_angle_alpha   90.00000000\n_cell_angle_beta   90.00000000\n_cell_angle_gamma   90.00000000\n_symmetry_Int_Tables_number   1\n_chemical_formula_structural   Ag\n_chemical_formula_sum   Ag4\n_cell_volume   69.14092475\n_cell_formula_units_Z   4\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n  1  'x, y, z'\nloop_\n _atom_site_type_symbol\n _atom_site_label\n _atom_site_symmetry_multiplicity\n _atom_site_fract_x\n _atom_site_fract_y\n _atom_site_fract_z\n _atom_site_occupancy\n  Ag  Ag0  1  0.00000000  0.00000000  0.00000000  1.0\n  Ag  Ag1  1  0.50000000  0.50000000  0.00000000  1.0\n  Ag  Ag2  1  0.50000000  0.00000000  0.50000000  1.0\n  Ag  Ag3  1  0.00000000  0.50000000  0.50000000  1.0\n"
ag_structure = Structure.from_str(ag_cif, fmt="cif")
ag_atoms = AseAtomsAdaptor.get_atoms(ag_structure)

In [9]:
# Reference structure of Ag from Materials Project, id=mp-8566
ag_structure1 = Structure.from_file("/mnt/c/Users/Lenovo/Downloads/cdvae2/structures_GA/ag_cif/Ag_8566.cif")
ag_atoms1 = AseAtomsAdaptor.get_atoms(ag_structure1)
# Reference structure of Ag from Materials Project, id=mp-10597
ag_structure2 = Structure.from_file("/mnt/c/Users/Lenovo/Downloads/cdvae2/structures_GA/ag_cif/Ag_10597.cif")
ag_atoms2 = AseAtomsAdaptor.get_atoms(ag_structure2)
# Reference structure of Ag from Materials Project, id=mp-989737
ag_structure3 = Structure.from_file("/mnt/c/Users/Lenovo/Downloads/cdvae2/structures_GA/ag_cif/Ag_989737.cif")
ag_atoms3 = AseAtomsAdaptor.get_atoms(ag_structure3)
# Reference structure of Ag from Materials Project, id=mp-2646971
ag_structure4 = Structure.from_file("/mnt/c/Users/Lenovo/Downloads/cdvae2/structures_GA/ag_cif/Ag_2646971.cif")
ag_atoms4 = AseAtomsAdaptor.get_atoms(ag_structure4)



In [10]:
# Modify the atom objects to test the cosine similarity

batch = get_batch(ag_atoms)
batch1 = get_batch(ag_atoms1)
mu_t, log_t, z_t = model.encode(batch)
mu_x, log_x, z_x = model.encode(batch1)
    
input1 = torch.cat([mu_t,log_t],dim=1)
input2 = torch.cat([mu_x,log_x],dim=1)
cos_sim = F.cosine_similarity(input1[0], input2[0], dim=0, eps=1e-8)
print(float(cos_sim))

0.9385689496994019


In [11]:
print("mp-124","mp-8566","mp-10597","mp-989737","mp-2646971")
cosine_similarity = np.array([[1, 0.9386,0.9625,0.9240,0.9648],
                             [0.9386,1,0.9312,0.9563,0.9182],
                             [0.9625,0.9312,1,0.916,0.9733],
                             [0.924,0.9563,0.916,1,0.9281],
                             [0.9648,0.9182,0.9733,0.9281,1]])
print(cosine_similarity)

mp-124 mp-8566 mp-10597 mp-989737 mp-2646971
[[1.     0.9386 0.9625 0.924  0.9648]
 [0.9386 1.     0.9312 0.9563 0.9182]
 [0.9625 0.9312 1.     0.916  0.9733]
 [0.924  0.9563 0.916  1.     0.9281]
 [0.9648 0.9182 0.9733 0.9281 1.    ]]


- Examing if relax the different Ag structures by CHGNEet, the relaxed structure are the same (not fall into global minimum), and test the potential energy per atom of CHGNet.

In [16]:
relaxed = relax(ag_atoms1)
relaxed.get_potential_energy()

-11.166425704956055

In [17]:
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.analysis.structure_matcher import StructureMatcher
from pymatgen.core import Structure

s = atoms_to_structure(relaxed)
sga = SpacegroupAnalyzer(s, symprec=0.001)
ps = sga.get_conventional_standard_structure()

sm = StructureMatcher(scale=False, primitive_cell=False)
print(sm.fit(ag_structure1, ps))
print(sm.get_rms_dist(ag_structure1, ps))

True
(2.8290529173298537e-16, 4.267852349240298e-16)


#### Relax function

In [13]:
from ase.calculators.singlepoint import SinglePointCalculator
from ase.ga import set_raw_score

def finalize(atoms, energy=None, forces=None, stress=None):
    # Finalizes the atoms by attaching a SinglePointCalculator
    # and setting the raw score as the negative of the total energy
    atoms.wrap()
    calc = SinglePointCalculator(atoms, energy=energy, forces=forces,
                                 stress=stress)
    atoms.calc = calc
    raw_score = atoms.get_potential_energy()
    set_raw_score(atoms, raw_score)

In [12]:
import ase
from chgnet.model import StructOptimizer

relaxer = StructOptimizer()

def relax(atoms):
    # Relax our candidates using chgnet

    result = relaxer.relax(atoms_to_structure(atoms), verbose=False)
    relaxed_atoms = AseAtomsAdaptor.get_atoms(result["final_structure"])
    # Compare cosine similarity of the relaxed structures with ground truth
    relaxed_atoms.info = atoms.info
    
    batch = get_batch(relaxed_atoms)
    mu_x, log_x, z_x = model.encode(batch)
    
    input1 = torch.cat([mu_t,log_t],dim=1)
    input2 = torch.cat((mu_x,log_x),dim=1)
    cos_sim = F.cosine_similarity(input1[0], input2[0], dim=0, eps=1e-8)
    
    finalize(
        relaxed_atoms,
        energy = result["trajectory"].energies[-1],
        forces = result["trajectory"].forces[-1],
        stress = result["trajectory"].stresses[-1],
    )  
    return relaxed_atoms

CHGNet initialized with 400,438 parameters
CHGNet will run on cpu
