In [None]:
from pathlib import Path
import ast
import numpy as np
from pymatgen.core import Structure, PeriodicSite, DummySpecie
from pymatgen.analysis.local_env import MinimumDistanceNN


In [16]:
# Load defective structure
defective_file_path = Path("dataset/low_MoS2/cifs/6141cf1a1d648932fbc34297.cif")
defective_structure = Structure.from_file(defective_file_path)

# Get reference structure
ref_file_path = Path("dataset/low_MoS2/MoS2.cif")
ref_unit_cell = Structure.from_file(ref_file_path)
reference_structure = ref_unit_cell.make_supercell([8,8,1])

  struct = parser.parse_structures(primitive=primitive)[0]
  struct = parser.parse_structures(primitive=primitive)[0]


In [18]:
def struct_to_dict(structure):
    list_of_sites = structure.sites
    list_of_frac_coords = np.round(structure.frac_coords,3)
    structure_dict = {i: j for i, j in zip(list_of_sites, list_of_frac_coords)}
    return structure_dict


def get_defects_structure(defective_struct, reference_struct):
    copy_defective_struct = defective_struct.copy()
    # struct to dict
    defective_dict = struct_to_dict(copy_defective_struct)
    reference_dict = struct_to_dict(reference_struct)

    # Get lattice of defective structure
    structure_lattice = copy_defective_struct.lattice

    # List to add all defect sites
    defects_list = []

    # Dictionary to hold properties of each defect site
    defects_properties = {} 

    for ref_site, ref_coords in reference_dict.items():
        matching = False
        for def_site, def_coords in defective_dict.items():
            if np.array_equal(ref_coords, def_coords):
                matching = True
                if ref_site.specie != def_site.specie: # Substitution case
                    # Add site to defects list
                    defects_list.append(def_site)

                    # Get atomic number change and defect type
                    add_property = {"original_an":ref_site.specie.Z,
                                    "new_an": def_site.specie.Z,
                                    "an_change": def_site.specie.Z - ref_site.specie.Z,
                                    "vacancy_defect": 0.0,
                                    "substitution_defect": 1.0}
                    defects_properties[def_site] = add_property

        if not matching: # Vacancy case
            # Add site to defective structure
            vacant_site = PeriodicSite(
                species= DummySpecie(),
                coords= ref_coords,
                coords_are_cartesian= False, 
                lattice= structure_lattice
                )
            
            # Add site to defects list
            defects_list.append(vacant_site)

            # Get atomic number change and defect type
            add_property={"original_an":ref_site.specie.Z,
                          "new_an": 0,
                          "an_change": 0 - ref_site.specie.Z,
                          "vacancy_defect": 1.0,
                          "substitution_defect": 0.0}
            defects_properties[vacant_site] = add_property

    # create a defects structure
    defects_struct = Structure.from_sites(defects_list)

    # Add properties to defects structure
    for a_site in defects_struct.sites:
        if a_site in defects_properties.keys():
            a_site.properties.update(defects_properties[a_site])
        else:
            pass

    return defects_struct

In [19]:
defects_structure = get_defects_structure(defective_structure, reference_structure)
print(defects_structure)

Full Formula (W1 Se1)
Reduced Formula: WSe
abc   :  25.522526  25.522526  14.879004
angles:  90.000000  90.000000 120.000000
pbc   :       True       True       True
Sites (2)
  #  SP           a         b         c    an_change    new_an    original_an    substitution_defect    vacancy_defect
---  ----  --------  --------  --------  -----------  --------  -------------  ---------------------  ----------------
  0  W     0.041667  0.208333  0.25               32        74             42                      1                 0
  1  Se    0.208333  0.041667  0.144826           18        34             16                      1                 0


In [None]:
def get_nodes_edges(structure):
    sites_list = structure.sites

    # The nodes
    mdnn = MinimumDistanceNN()
    nodes = []
    for i, site in enumerate(sites_list):
        the_cn = mdnn.get_cn(structure, i)
        node_features = [i, the_cn - 1, site.properties["original_an"], site.properties["new_an"],
                         site.properties["an_change"], site.properties["vacancy_defect"],
                         site.properties["substitution_defect"]]
        # Node features syntax
        '''[index of site, number of nearset neighbors to site, atomic numbe of element before defect,
        atomic number of element after defect, change in atomic numbers,
        is site vac_site(1  for yes, 0 for no), is site sub_site(1 for yes, 0 for no)]'''
        nodes.append(node_features)
         

    # The edges
    edges = []
    edge_features = []
    
    for i, site_i in enumerate(sites_list):
        for j, site_j  in enumerate(sites_list):
            if i != j:
                edges.append([i,j])
                dist = site_i.distance(site_j)
                edge_features.append([dist])

    return nodes, edges, edge_features
    

In [49]:
the_nodes, the_edges, the_edge_features = get_nodes_edges(defects_structure)