# obtaining data from pymatgen

In [1]:
import pymatgen
import pymatgen.io.cif
import pymatgen.ext.matproj

In [None]:
# https://materialsproject.org/dashboard
API_KEY=# your Materials project API

In [3]:
request1 = pymatgen.ext.matproj.MPRester(api_key=API_KEY)

There are two ways of obtaining data I use.
- Case1: used to obtain large amounts of data
- Case2: used when I know which material I want to download

## Case 1: obtaining many data

In [4]:
# uncomment for downloading all materials
#space_group_list = list(range(1, 230 + 1))
space_group_list = [221]

# properties from: https://github.com/materialsproject/pymatgen/blob/v2022.01.09/pymatgen/ext/matproj.py#L94
properties = ('energy', 'energy_per_atom', 'volume',
              'formation_energy_per_atom', 'nsites',
              'unit_cell_formula', 'pretty_formula',
              'is_hubbard', 'elements', 'nelements',
              'e_above_hull', 'hubbards', 'is_compatible',
              'spacegroup', 'task_ids', 'band_gap', 'density',
              'icsd_id', 'icsd_ids', 'cif', 'total_magnetization',
              'material_id', 'oxide_type', 'tags', 'elasticity')


In [5]:
all_entries = []
for space_group_num in space_group_list:
    print('processing no.', space_group_num)
    all_entries.extend(request1.query(
        criteria={'spacegroup.number': {'$in': [space_group_num]}},
        properties=properties)
    )

processing no. 221


  0%|          | 0/3255 [00:00<?, ?it/s]

In [6]:
print(f'size: {len(all_entries)}')
print(all_entries[0])

size: 3255


## Case 2: obtaining specified material(s)

In [7]:
single_entry=request1.get_entries(chemsys_formula_id_criteria='mp-1004524',inc_structure='initial')

In [8]:
single_entry

[mp-1004524 ComputedStructureEntry - H1 Pb1 I3    (HPbI3)
 Energy (Uncorrected)     = -10.3529  eV (-2.0706  eV/atom)
 Correction               = -1.1370   eV (-0.2274  eV/atom)
 Energy (Final)           = -11.4899  eV (-2.2980  eV/atom)
 Energy Adjustments:
   MP2020 anion correction (I): -1.1370   eV (-0.2274  eV/atom)
 Parameters:
   run_type               = GGA
   is_hubbard             = False
   pseudo_potential       = {'functional': 'PBE', 'labels': ['H', 'Pb_d', 'I'], 'pot_type': 'paw'}
   hubbards               = {}
   potcar_symbols         = ['PBE H', 'PBE Pb_d', 'PBE I']
   oxide_type             = None
 Data:
   oxide_type             = None
   oxidation_states       = {'H': 1.0, 'Pb': 2.0, 'I': -1.0}]

In [9]:
single_entry[0].structure

Structure Summary
Lattice
    abc : 6.244213 6.244213 6.244213
 angles : 90.0 90.0 90.0
 volume : 243.46308866936582
      A : 6.244213 0.0 0.0
      B : 0.0 6.244213 0.0
      C : 0.0 0.0 6.244213
PeriodicSite: H (3.1221, 3.1221, 3.1221) [0.5000, 0.5000, 0.5000]
PeriodicSite: Pb (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000]
PeriodicSite: I (0.0000, 0.0000, 3.1221) [0.0000, 0.0000, 0.5000]
PeriodicSite: I (0.0000, 3.1221, 0.0000) [0.0000, 0.5000, 0.0000]
PeriodicSite: I (3.1221, 0.0000, 0.0000) [0.5000, 0.0000, 0.0000]

# Filtering octahedra

In [10]:
# struct=pymatgen.io.cif.CifParser.from_string(all_entries[0]['cif']).get_structures(primitive=False)[0]
struct=single_entry[0].structure

In [11]:
# define algorithm
nearest_neighbour_finder = pymatgen.analysis.local_env.CrystalNN(
    weighted_cn=False, cation_anion=False,
    distance_cutoffs=[0.5, 1], x_diff_weight=3.0,
    porous_adjustment=True, search_cutoff=7,
    fingerprint_length=None)

In [12]:
struct

Structure Summary
Lattice
    abc : 6.244213 6.244213 6.244213
 angles : 90.0 90.0 90.0
 volume : 243.46308866936582
      A : 6.244213 0.0 0.0
      B : 0.0 6.244213 0.0
      C : 0.0 0.0 6.244213
PeriodicSite: H (3.1221, 3.1221, 3.1221) [0.5000, 0.5000, 0.5000]
PeriodicSite: Pb (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000]
PeriodicSite: I (0.0000, 0.0000, 3.1221) [0.0000, 0.0000, 0.5000]
PeriodicSite: I (0.0000, 3.1221, 0.0000) [0.0000, 0.5000, 0.0000]
PeriodicSite: I (3.1221, 0.0000, 0.0000) [0.5000, 0.0000, 0.0000]

## execute for a single site

In [None]:
nearest_neighbour_finder.get_cn(structure=struct, n=1, use_weights=False)

## execute for all the sites

In [14]:
def try_nearest_neighbour_finder_onlycoorenv(nearest_neighbour_finder, mp_struct):
    list_coor_env = []

    for i in range(mp_struct.num_sites):
        try: # this call often fails
            list_coor_env.append(nearest_neighbour_finder.get_cn(
                structure=mp_struct, n=i, use_weights=False))
        except ValueError:
            print('values error skipping...')
            list_coor_env.append(None)

    return list_coor_env

In [15]:
list_coor_env = try_nearest_neighbour_finder_onlycoorenv(nearest_neighbour_finder, struct)

In [16]:
list_coor_env

[0, 6, 2, 2, 2]

# Analyse distortions

In [17]:
import os
import sys

sys.path.append(os.environ['HOME'] + '/programs') # adding path to the polyhedron_distortion
import polyhedron_distortion.polyhedron_analysis as poly

In [18]:
print('#Eg, T2g, T1u, T2u')
print(poly.calc_distortions_from_struct_octahedron(struct, 1))

#Eg, T2g, T1u, T2u
[1.84889275e-32 0.00000000e+00 0.00000000e+00 0.00000000e+00]
