# labelled sites example

In [1]:
from pymatgen.io.vasp import Poscar, Xdatcar
from pymatgen.symmetry.groups import SpaceGroup
from pymatgen.core import Structure, Lattice
import numpy as np

from site_analysis.atom import Atom
from site_analysis.site import Site
from site_analysis.tools import get_vertex_indices
from site_analysis.polyhedral_site import PolyhedralSite
from site_analysis.trajectory import Trajectory

from collections import Counter
import tqdm

Load a `POSCAR` file where every octahedral site is occupied by a Na atom.

In [2]:
all_na_structure = Poscar.from_file('na_sn_all_na_ext.POSCAR.vasp').structure
vertex_species = 'S'
centre_species = 'Na'
all_na_structure.composition

Comp: Na136 Sn16 P8 S96

Create a series of pymatgen Structures using the `Structure.from_spacegroup()` method, that each only contain the NaX sites, using the coordinates from Ramos _et al._ _Chem. Mater._ 2018.

In [3]:
sg = SpaceGroup('I41/acd:2')

In [4]:
lattice = all_na_structure.lattice
na1 = Structure.from_spacegroup(sg='I41/acd:2', lattice=lattice, species=['Na'], coords=[[0.25, 0.0, 0.125]])
na2 = Structure.from_spacegroup(sg='I41/acd:2', lattice=lattice, species=['Na'], coords=[[0.00, 0.0, 0.125]])
na3 = Structure.from_spacegroup(sg='I41/acd:2', lattice=lattice, species=['Na'], coords=[[0.0, 0.25, 0.0]])
na4 = Structure.from_spacegroup(sg='I41/acd:2', lattice=lattice, species=['Na'], coords=[[0.0, 0.0, 0.0]])
na5 = Structure.from_spacegroup(sg='I41/acd:2', lattice=lattice, species=['Na'], coords=[[0.75, 0.25, 0.0]])
na6 = Structure.from_spacegroup(sg='I41/acd:2', lattice=lattice, species=['Na'], coords=[[0.5, 0.75, 0.625]])
i2  = Structure.from_spacegroup(sg='I41/acd:2', lattice=lattice, species=['Na'], coords=[[0.666, 0.1376, 0.05]])
na_structures = {'Na1': na1,
                 'Na2': na2,
                 'Na3': na3,
                 'Na4': na4,
                 'Na5': na5,
                 'Na6': na6, 
                 'i2': i2}

In [5]:
s = Structure.from_spacegroup(sg='I41/acd:2', lattice=lattice, 
                          species=['Sn','S','S','S', 'P','Na','Na','Na','Na','Na','K','Li','Mg'], 
                          coords=[[0.25, 0.0, 0.25],
                                  [0.326, 0.8997, 0.1987],
                                  [0.1258, 0.8972, 0.2999],
                                  [0.4097, 0.8339, 0.332],
                                  [0, 0.75, 0.125],
                                  [0.25, 0.0, 0.125],
                                  [0.0, 0.0, 0.125],
                                  [0.0, 0.25, 0.0],
                                  [0.0, 0.0, 0.0],
                                  [0.75, 0.25, 0.0],
                                  [0.5, 0.75, 0.625],
                                  [0.65, 0.375, 0.05],
                                  [0.25000,  0.75000,  0.00000]]) 

s.to(filename='all_atoms.cif')

Import the `matching_site_indices()` function from `polyhedral_analysis` (I should probably just add this to the `site-analysis` package). This function takes two pymatgen Structures as arguments, and finds the set of sites from the first that are closest to the sites in the second.

In [6]:
from polyhedral_analysis.polyhedra_recipe import matching_site_indices

In [7]:
print(matching_site_indices.__doc__)


    Returns a subset of site indices from structure (as a list) where each site is the closest to one 
    site in the reference structure.
    
    Args:
        structure (Structure): The structure being analysed.
        reference_structure (Structure): A Structure object containing a set of reference sites.
        species (:obj:`list[str]`, optional): A list of species labels. If this is set, only matching
            sites will be included in the returned set.
        
    Returns:
        (list[int])

    


In [8]:
matching_site_indices(all_na_structure, na_structures['Na6'])

[38, 37, 74, 73, 102, 103, 76, 75]

Use this to find the Na sites closest to a Na1, Na2, Na3 etc site, and store the site index and "Na1" etc. label in a dictionary, using the index as keys. Then sort this dictionary and generate an ordered list of "Na1" etc. labels for every Na site.

In [9]:
labels = {}
for l, structure in na_structures.items():
    indices = matching_site_indices(all_na_structure, structure)
    for i in indices:
        labels[i] = l
sorted_labels = [ labels[i] for i in sorted(labels) ]
print(sorted_labels)

['Na3', 'Na3', 'Na3', 'Na4', 'Na3', 'Na4', 'Na3', 'Na3', 'Na3', 'Na2', 'Na3', 'Na5', 'Na2', 'Na2', 'Na2', 'Na1', 'Na1', 'Na2', 'Na2', 'Na5', 'Na4', 'Na4', 'Na5', 'Na2', 'Na2', 'Na4', 'Na5', 'Na4', 'Na5', 'Na2', 'Na4', 'Na4', 'Na3', 'Na3', 'Na3', 'Na3', 'Na2', 'Na6', 'Na6', 'Na3', 'Na3', 'Na3', 'Na3', 'Na1', 'Na1', 'Na1', 'Na1', 'Na2', 'Na5', 'Na5', 'Na2', 'Na1', 'Na1', 'Na2', 'Na2', 'Na5', 'Na2', 'Na1', 'Na2', 'Na1', 'Na2', 'Na5', 'Na5', 'Na1', 'Na2', 'Na2', 'Na2', 'Na5', 'Na2', 'Na5', 'Na4', 'Na4', 'Na4', 'Na6', 'Na6', 'Na6', 'Na6', 'Na2', 'Na2', 'Na1', 'Na1', 'Na5', 'Na2', 'Na2', 'Na4', 'Na2', 'Na2', 'Na4', 'Na1', 'Na4', 'Na5', 'Na2', 'Na2', 'Na5', 'Na2', 'Na1', 'Na1', 'Na5', 'Na4', 'Na2', 'Na2', 'Na4', 'Na6', 'Na6', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2']


We then use the `get_vertex_indices()` function to find the N closest S to each Na (within a cutoff of 4.3 Å).  
This returns a nested list, where each sublist contains the S indices for a single polyedron.  

In [10]:
site_vertices = {'Na1': 6,
                'Na2': 6,
                'Na3': 6,
                'Na4': 6,
                'Na5': 6,
                'Na6': 8,
                'i2': 6}
n_vertices = [site_vertices[l] for l in sorted_labels]
print(n_vertices)

[6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6]


In [11]:
# find atom indices (within species) for all polyhedra vertex atoms
vertex_indices = get_vertex_indices(all_na_structure, centre_species=centre_species, 
                                    vertex_species=vertex_species, cutoff=4.3, n_vertices=n_vertices)
print(vertex_indices[:4])

[[242, 218, 186, 244, 188, 220], [250, 252, 178, 212, 180, 210], [230, 174, 224, 206, 168, 200], [166, 192, 232, 208, 182, 249]]


We can now use these vertex ids to define our `Polyhedron` objects.   
We now also pass in the appropriate label to each polyhedron.

In [12]:
structure = Poscar.from_file('POSCAR').structure
structure.composition # Note we have 88 Na vs. 136 in the all Na structure above.

Comp: Na88 Sn16 P8 S96

The structures we are working with have Na as the first species. We have calculated our site vertex indices using `all_na_structure` which contains 136 Na atoms. In our "real" system we only have 88 Na atoms, so the vertex indices from these structures are offset.

In [13]:
offset = 136-88
offset_vertex_indices = [[i-offset for i in l] for l in vertex_indices]       

In [14]:
# create Polyhedron objects
sites = [PolyhedralSite(vertex_indices=vi, 
                        label=label) 
             for vi, label in zip(offset_vertex_indices, sorted_labels)]
# create Atom objects
atoms = [Atom(index=i, species_string=centre_species)
         for i, site in enumerate(structure) 
         if site.species_string == 'Na']
trajectory = Trajectory(sites, atoms)

In [15]:
trajectory.site_coordination_numbers()

Counter({6: 128, 8: 8})

Polyhedra labels can be accessed directly, or as a list:

In [16]:
trajectory.sites[0].label

'Na3'

In [17]:
print(trajectory.site_labels())

['Na3', 'Na3', 'Na3', 'Na4', 'Na3', 'Na4', 'Na3', 'Na3', 'Na3', 'Na2', 'Na3', 'Na5', 'Na2', 'Na2', 'Na2', 'Na1', 'Na1', 'Na2', 'Na2', 'Na5', 'Na4', 'Na4', 'Na5', 'Na2', 'Na2', 'Na4', 'Na5', 'Na4', 'Na5', 'Na2', 'Na4', 'Na4', 'Na3', 'Na3', 'Na3', 'Na3', 'Na2', 'Na6', 'Na6', 'Na3', 'Na3', 'Na3', 'Na3', 'Na1', 'Na1', 'Na1', 'Na1', 'Na2', 'Na5', 'Na5', 'Na2', 'Na1', 'Na1', 'Na2', 'Na2', 'Na5', 'Na2', 'Na1', 'Na2', 'Na1', 'Na2', 'Na5', 'Na5', 'Na1', 'Na2', 'Na2', 'Na2', 'Na5', 'Na2', 'Na5', 'Na4', 'Na4', 'Na4', 'Na6', 'Na6', 'Na6', 'Na6', 'Na2', 'Na2', 'Na1', 'Na1', 'Na5', 'Na2', 'Na2', 'Na4', 'Na2', 'Na2', 'Na4', 'Na1', 'Na4', 'Na5', 'Na2', 'Na2', 'Na5', 'Na2', 'Na1', 'Na1', 'Na5', 'Na4', 'Na2', 'Na2', 'Na4', 'Na6', 'Na6', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2', 'i2']


The trajectory currently does not have any timesteps associated with it.

In [18]:
len(trajectory)

0

We can analyse a structure as a timestep using `append_timestep()`.

In [19]:
trajectory.append_timestep(structure)
len(trajectory)

1

The occupations of each site are stored as a list of lists, as each site can have zero, one, or multiple atoms occupying it.

In [20]:
sites[0].vertex_indices

[194, 170, 138, 196, 140, 172]

In [21]:
print(trajectory.sites_trajectory[0])

[[50], [61], [64], [34], [32], [30], [46], [38], [56], [0], [], [40], [17], [19], [3], [15], [4], [75], [86], [], [71], [66], [52], [7], [25], [69], [44], [], [], [], [57], [60], [28], [36], [29], [35], [73], [], [], [39], [31], [37], [33], [83], [79], [84], [72], [], [49], [42], [21], [76], [81], [24], [13], [55], [11], [82], [10], [80], [22], [54], [47], [77], [14], [16], [6], [43], [5], [48], [67], [62], [59], [], [], [], [], [], [26], [87], [74], [45], [27], [9], [70], [23], [12], [68], [78], [65], [53], [18], [1], [41], [20], [], [8], [51], [63], [2], [85], [58], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]


Rough example for collecting only occupied sites, and counting their site types:

In [22]:
c = Counter()
for site in trajectory.sites:
    c[site.label] += len([ 1 for ts in site.trajectory if len(ts)>0 ])
c

Counter({'Na3': 15,
         'Na4': 15,
         'Na2': 29,
         'Na5': 14,
         'Na1': 15,
         'Na6': 0,
         'i2': 0})

vs. all sites:

In [23]:
c_sites = Counter(trajectory.site_labels())
c_sites

Counter({'Na3': 16,
         'Na4': 16,
         'Na2': 32,
         'Na5': 16,
         'Na1': 16,
         'Na6': 8,
         'i2': 32})

In [24]:
trajectory.reset()

xdatcar = Xdatcar('XDATCAR_Sn')

trajectory.trajectory_from_structures( xdatcar.structures, progress='notebook')

  0%|          | 0/300 [00:00<?, ? steps/s]

In [25]:
n_timesteps = len(trajectory.timesteps)
c_sites = Counter(trajectory.site_labels())
c = Counter()
p_occ = {}
for site in trajectory.sites:
    c[site.label] += len([ 1 for ts in site.trajectory if len(ts)>0 ])
for k, v in c.items():
    p_occ[k] = v / c_sites[k] / n_timesteps
p_occ

{'Na3': 0.911875,
 'Na4': 0.945625,
 'Na2': 0.8944791666666667,
 'Na5': 0.915,
 'Na1': 0.9354166666666667,
 'Na6': 0.0,
 'i2': 0.0015625}

In [26]:
# check total average occupation = 88 atoms
for k,v in c.items():
    print( k, p_occ[k]*c_sites[k])
print( sum( [ p_occ[k] * c_sites[k] for k, v in c.items()]))

Na3 14.59
Na4 15.13
Na2 28.623333333333335
Na5 14.64
Na1 14.966666666666667
Na6 0.0
i2 0.05
88.0
