In [1]:
import pickle
import gzip
import pandas as pd
import numpy as np
from pymatgen.util import coord
import nglview



In [2]:
with gzip.open('data.pickle.gz', 'rb') as fh:
    data = pickle.load(fh, )

In [3]:
data.shape

(5933, 11)

In [4]:

data.head()

Unnamed: 0_level_0,descriptor_id,energy,energy_per_atom,fermi_level,homo,lumo,initial_structure,defect_representation,formation_energy,formation_energy_per_site,band_gap
_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
6141cf0efbfd4bd9ab2c2f7e,6141cf0efbfd4bd9ab2c2f7c,-1391.3404,-7.284505,-0.199707,-0.6754,0.4698,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,[[-7.98855051 17.50569919 5.28204642] X0+],2.6457,2.6457,1.1452
6141cf0f51c1cbd9654b8870,6141cf0e51c1cbd9654b886e,-1384.5528,-7.28712,-0.220627,-0.6852,0.3991,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,"[[14.34365939 6.45412142 2.15745558] X0+, [9...",5.3063,2.65315,1.0843
6141cf0fe689ecc4c43cdd4b,6141cf0fe689ecc4c43cdd49,-1397.1961,-7.277063,-0.183537,-0.6931,1.1102,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,[[ 4.78547342 17.49833154 2.15486663] Se],0.279,0.279,1.8033
6141cf10b842c2e72e2f2d44,6141cf10b842c2e72e2f2d42,-1396.2576,-7.272175,-0.179802,-0.6916,1.1179,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,"[[ 9.57094697 20.26122598 2.15486663] Se, [20...",0.5795,0.28975,1.8095
6141cf1051c1cbd9654b8872,6141cf0e51c1cbd9654b886e,-1384.5327,-7.287014,-0.21319,-0.6718,0.4384,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,"[[ 7.96302799 17.50569919 2.15745558] X0+, [-...",5.3264,2.6632,1.1102


In [5]:
data.energy_per_atom.min()

-7.297261052631578

In [6]:
s0 = data.iloc[299].initial_structure

In [7]:
s0.lattice

Lattice
    abc : 25.5225256 25.5225256 14.879004
 angles : 90.0 90.0 119.99999999999999
 volume : 8393.668021812642
      A : 25.5225256 0.0 1.5628039641098191e-15
      B : -12.761262799999994 22.10315553833868 1.5628039641098191e-15
      C : 0.0 0.0 14.879004

In [13]:
d0 = data.iloc[299].defect_representation

In [14]:
d0

Structure Summary
Lattice
    abc : 25.5225256 25.5225256 14.879004
 angles : 90.0 90.0 119.99999999999999
 volume : 8393.668021812642
      A : 25.5225256 0.0 1.5628039641098191e-15
      B : -12.761262799999994 22.10315553833868 1.5628039641098191e-15
      C : 0.0 0.0 14.879004
PeriodicSite: X0+ (20.7498, 4.5975, 3.7198) [0.9170, 0.2080, 0.2500]
PeriodicSite: X0+ (22.3194, 3.6912, 2.1575) [0.9580, 0.1670, 0.1450]

In [15]:
d0[0], d0[1]

(PeriodicSite: X0+ (20.7498, 4.5975, 3.7198) [0.9170, 0.2080, 0.2500],
 PeriodicSite: X0+ (22.3194, 3.6912, 2.1575) [0.9580, 0.1670, 0.1450])

In [16]:
d0[0].is_periodic_image(d0[1])

False

In [17]:
coord.pbc_diff(d0[0].frac_coords, d0[1].frac_coords)

array([-0.041,  0.041,  0.105])

In [18]:
d0[0].to_unit_cell()

PeriodicSite: X0+ (20.7498, 4.5975, 3.7198) [0.9170, 0.2080, 0.2500]

In [19]:
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
finder = SpacegroupAnalyzer(s0)
print("The spacegroup is {}".format(finder.get_space_group_symbol()))

The spacegroup is Cm


In [20]:
# j = s0.to_json()
# with open("s0.json", "w") as fh:
#     fh.write(j)

In [27]:
# https://gist.github.com/lan496/3f60b6474750a6fd2b4237e820fbfea4
def plot3d(structure, spacefill=True, show_axes=True):
    from itertools import product
    from pymatgen.core import Structure
    from pymatgen.core.sites import PeriodicSite
    
    eps = 1e-8
    sites = []
    for site in structure:
        species = site.species
        frac_coords = np.remainder(site.frac_coords, 1)
        for jimage in product([0, 1 - eps], repeat=3):
            new_frac_coords = frac_coords + np.array(jimage)
            if np.all(new_frac_coords < 1 + eps):
                new_site = PeriodicSite(species=species, coords=new_frac_coords, lattice=structure.lattice)
                sites.append(new_site)
    structure_display = Structure.from_sites(sites)
    
    view = nglview.show_pymatgen(structure_display)
    view.add_unitcell()
    
    if spacefill:
        view.add_spacefill(radius_type='vdw', radius=0.5, color_scheme='element')
        view.remove_ball_and_stick()
    else:
        view.add_ball_and_stick()
        
    if show_axes:
        view.shape.add_arrow([-4, -4, -4], [0, -4, -4], [1, 0, 0], 0.5, "x-axis")
        view.shape.add_arrow([-4, -4, -4], [-4, 0, -4], [0, 1, 0], 0.5, "y-axis")
        view.shape.add_arrow([-4, -4, -4], [-4, -4, 0], [0, 0, 1], 0.5, "z-axis")
        
    # view.camera = "perspective"
    return view

In [31]:
plot3d(s0)

NGLWidget()

In [22]:
from pymatgen.analysis import local_env

In [23]:
from pymatgen.core import Structure
def add_defects_to_structure(state):
    sites = state.initial_structure.sites.copy()
    defects = state.defect_representation.sites
    for site in defects:
        if site.species_string == 'X0+':
            sites.append(site)
    return Structure.from_sites(sites)



In [26]:
nglview.show_pymatgen(add_defects_to_structure(data.iloc[80]))

NGLWidget()

In [35]:
nglview.show_pymatgen(add_defects_to_structure(data.iloc[180]))

NGLWidget()

In [41]:
br = local_env.BrunnerNN_real() 

In [49]:
l = br.get_nn_info(s0, 0)

In [50]:
len(l)

18

In [48]:
s0[1]

PeriodicSite: Mo (-1.5952, 4.6048, 3.7198) [0.0417, 0.2083, 0.2500]

In [51]:
l

[{'site': PeriodicSite: S (-1.5952, 0.9210, 2.1549) [-0.0417, 0.0417, 0.1448],
  'image': (-1, 0, 0),
  'weight': 0.9999998606112003,
  'site_index': 121},
 {'site': PeriodicSite: S (-0.0000, -1.8419, 2.1549) [-0.0417, -0.0833, 0.1448],
  'image': (-1, -1, 0),
  'weight': 0.6038620537980465,
  'site_index': 128},
 {'site': PeriodicSite: S (-1.5952, 0.9210, 5.2846) [-0.0417, 0.0417, 0.3552],
  'image': (-1, 0, 0),
  'weight': 0.9999998606112004,
  'site_index': 185},
 {'site': PeriodicSite: S (-0.0000, -1.8419, 5.2846) [-0.0417, -0.0833, 0.3552],
  'image': (-1, -1, 0),
  'weight': 0.6038620537980466,
  'site_index': 191},
 {'site': PeriodicSite: Mo (-1.5952, -0.9210, 3.7198) [-0.0833, -0.0417, 0.2500],
  'image': (-1, -1, 0),
  'weight': 0.7575843782493873,
  'site_index': 63},
 {'site': PeriodicSite: S (-0.0000, 3.6839, 2.1549) [0.0833, 0.1667, 0.1448],
  'image': (0, 0, 0),
  'weight': 0.9999998606112008,
  'site_index': 67},
 {'site': PeriodicSite: S (-3.1903, 3.6839, 2.1549) [-0.04

In [58]:
l0 = [s for s in l if s['image'][0] == 0 and s['image'][1] == 0 and s['image'][2] == 0]

In [60]:
len(l0)

9

In [61]:
l0

[{'site': PeriodicSite: S (-0.0000, 3.6839, 2.1549) [0.0833, 0.1667, 0.1448],
  'image': (0, 0, 0),
  'weight': 0.9999998606112008,
  'site_index': 67},
 {'site': PeriodicSite: S (-0.0000, 3.6839, 5.2846) [0.0833, 0.1667, 0.3552],
  'image': (0, 0, 0),
  'weight': 0.9999998606112012,
  'site_index': 130},
 {'site': PeriodicSite: Mo (-1.5952, 4.6048, 3.7198) [0.0417, 0.2083, 0.2500],
  'image': (0, 0, 0),
  'weight': 0.7575843782493874,
  'site_index': 1},
 {'site': PeriodicSite: S (1.5952, 0.9210, 2.1549) [0.0833, 0.0417, 0.1448],
  'image': (0, 0, 0),
  'weight': 0.9999999999999997,
  'site_index': 66},
 {'site': PeriodicSite: S (1.5952, 0.9210, 5.2846) [0.0833, 0.0417, 0.3552],
  'image': (0, 0, 0),
  'weight': 1.0,
  'site_index': 129},
 {'site': PeriodicSite: S (3.1903, 3.6839, 2.1549) [0.2083, 0.1667, 0.1448],
  'image': (0, 0, 0),
  'weight': 0.6038620537980466,
  'site_index': 75},
 {'site': PeriodicSite: S (3.1903, 3.6839, 5.2846) [0.2083, 0.1667, 0.3552],
  'image': (0, 0, 0),