In [19]:
from mp_api.client import MPRester
TARGET_PROPERTIES = ["formation_energy_per_atom", "band_gap"]

def fetch_structures_in_batches(api_key, total_limit=None, target_properties=[], chunk_size=500):
    from itertools import islice
    all_docs = []
    
    filter_kwargs = {
        "is_stable": True
    }

    with MPRester(api_key) as mpr:
        generator = mpr.materials.summary.search(
            **filter_kwargs,
            fields=["material_id", "structure", "formula_pretty", "formula_anonymous"] + TARGET_PROPERTIES + target_properties,
            num_chunks=total_limit,
            chunk_size=chunk_size
        )

        for doc in islice(generator, total_limit):
            all_docs.append(doc)

    print(f"Collected {len(all_docs)} structures.")
    
    return all_docs

In [20]:
MAPI = "Sdt6SkxvPcGn8RF6kCM7cTKbfRmydMII"

mp_docs = fetch_structures_in_batches(
    api_key=MAPI,
    total_limit=30,
    target_properties=[],
    chunk_size=500
)

Retrieving SummaryDoc documents: 100%|██████████| 15000/15000 [00:29<00:00, 503.51it/s]


Collected 30 structures.


In [22]:
struct = mp_docs[10].structure

mp_docs[10]

[4m[1mMPDataDoc<SummaryDoc>[0;0m[0;0m(
[1mformula_pretty[0;0m='Ac2IrRh',
[1mformula_anonymous[0;0m='ABC2',
[1mmaterial_id[0;0m=MPID(mp-1183093),
[1mstructure[0;0m=Structure Summary
Lattice
    abc : 5.234427767456992 5.234427767456992 5.234427767456992
 angles : 59.99999999999999 59.99999999999999 59.99999999999999
 volume : 101.41276773783284
      A : np.float64(0.0) np.float64(3.70129937) np.float64(3.70129937)
      B : np.float64(3.70129937) np.float64(0.0) np.float64(3.70129937)
      C : np.float64(3.70129937) np.float64(3.70129937) np.float64(-0.0)
    pbc : True True True
PeriodicSite: Ac (5.552, 5.552, 5.552) [0.75, 0.75, 0.75]
PeriodicSite: Ac (1.851, 1.851, 1.851) [0.25, 0.25, 0.25]
PeriodicSite: Ir (3.701, 3.701, 3.701) [0.5, 0.5, 0.5]
PeriodicSite: Rh (0.0, 0.0, 0.0) [0.0, -0.0, -0.0],
[1mformation_energy_per_atom[0;0m=-0.534733513749998,
[1mband_gap[0;0m=0.0,
)

In [None]:
from pymatgen.core import Structure

def mp_to_jarvis(mp_doc):
    struct: Structure = mp_doc.structure
    lattice = struct.lattice

    jarvis_dict = {
        'id': str(mp_doc.material_id),
        'formula': struct.composition.reduced_formula,
        'formation_energy_per_atom': mp_doc.formation_energy_per_atom,
        'band_gap': mp_doc.band_gap,
        'atoms': {
            'lattice_mat': [list(lattice.matrix[i]) for i in range(3)],
            'coords': [site.frac_coords.tolist() for site in struct],
            'elements': [str(site.specie) for site in struct],
            'abc': list(lattice.abc),
            'angles': list(lattice.angles),
            'cartesian': False,
            'props': ['' for _ in struct],
        }
    }

    return jarvis_dict


In [24]:
mp_to_jarvis(mp_docs[10])

{'id': 'mp-1183093',
 'formula': 'Ac2IrRh',
 'e_form': -0.534733513749998,
 'gap pbe': 0.0,
 'atoms': {'lattice_mat': [[np.float64(0.0),
    np.float64(3.70129937),
    np.float64(3.70129937)],
   [np.float64(3.70129937), np.float64(0.0), np.float64(3.70129937)],
   [np.float64(3.70129937), np.float64(3.70129937), np.float64(-0.0)]],
  'coords': [[0.75, 0.75, 0.75],
   [0.25, 0.25, 0.25],
   [0.5, 0.5, 0.5],
   [0.0, -0.0, -0.0]],
  'elements': ['Ac', 'Ac', 'Ir', 'Rh'],
  'abc': [5.234427767456992, 5.234427767456992, 5.234427767456992],
  'angles': [59.99999999999999, 59.99999999999999, 59.99999999999999],
  'cartesian': False,
  'props': ['', '', '', '']}}

In [33]:
mp_docs[10].structure.composition.reduced_formula

'Ac2IrRh'

In [None]:
from jarvis.db.figshare import data as load_jarvis_data
jv_data = load_jarvis_data("dft_3d", store_dir='../data')

Obtaining 3D dataset 76k ...
Reference:https://www.nature.com/articles/s41524-020-00440-1
Other versions:https://doi.org/10.6084/m9.figshare.6815699


100%|██████████| 40.8M/40.8M [00:07<00:00, 5.41MiB/s]


Loading the zipfile...
Loading completed.


In [None]:
import numpy as np
import matplotlib
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
df=pd.DataFrame(jv_data)

the_grid = GridSpec(4, 3)
plt.rcParams.update({'font.size': 18})
plt.figure(figsize=(16,14))

plt.subplot(the_grid[0, 0])
val=np.array(df['formation_energy_peratom'].replace('na',np.nan).dropna().values,dtype='float')

plt.hist(val,bins=np.arange(-4,3,.1))
plt.xlabel('Formation enrgies (eV/atom)')
plt.ylabel('Materials distribution')

plt.subplot(the_grid[0, 1])
val=np.array(df['optb88vdw_bandgap'].replace('na',np.nan).dropna().values,dtype='float')
plt.hist(val,bins=np.arange(0,8,.1))
plt.xlabel('OptB88vdW bandgaps (eV)')
#plt.ylabel('Materials distribution')

plt.subplot(the_grid[0, 2])
val=np.array(df['mbj_bandgap'].replace('na',np.nan).dropna().values,dtype='float')
plt.hist(val,bins=np.arange(0,8,.1))
plt.xlabel('TBmBJ bandgaps (eV)')
#plt.ylabel('Materials distribution')

plt.subplot(the_grid[1, 0])
val=np.array(df['bulk_modulus_kv'].replace('na',np.nan).dropna().values,dtype='float')
plt.hist(val,bins=np.arange(0,250,10))
plt.xlabel('Voigt-bulk modulus (GPa)')
#plt.ylabel('Materials distribution')


plt.subplot(the_grid[1, 1])
val=np.array(df['spillage'].replace('na',np.nan).dropna().values,dtype='float')
plt.hist(val,bins=np.arange(0,4,.1))
plt.xlabel('Spin-orbit spillage')
#plt.ylabel('Materials distribution')

plt.subplot(the_grid[1, 2])
val=np.array(df['slme'].replace('na',np.nan).dropna().values,dtype='float')
plt.hist(val,bins=np.arange(0,35,1))
plt.xlabel('Solar-SLME (%)')
#plt.ylabel('Materials distribution')


plt.subplot(the_grid[2, 0])
val=np.array(df['dfpt_piezo_max_dielectric'].replace('na',np.nan).dropna().values,dtype='float')
plt.hist(val,bins=np.arange(0,50,2))
#plt.ylabel('Materials distribution')
plt.xlabel('Max.Piezo.coeff(C/m2)')


plt.subplot(the_grid[2, 1])
val=np.array(df['n-powerfact'].replace('na',np.nan).dropna().values,dtype='float')
plt.hist(val,bins=np.arange(0,4000,50))
#plt.ylabel('Materials distribution')
plt.xlabel('n-Power factor (muW/(mK)2)')

plt.subplot(the_grid[2, 2])
val=np.array(df['n-Seebeck'].replace('na',np.nan).dropna().values,dtype='float')
plt.hist(val,bins=np.arange(-600,0,50))
#plt.ylabel('Materials distribution')
plt.xlabel('n-Seebeck coeff (muV/K)')


plt.subplot(the_grid[3, 0])
val=np.array(df['magmom_oszicar'].replace('na',np.nan).dropna().values,dtype='float')
plt.hist(val,bins=np.arange(0,20,1))
#plt.ylabel('Materials distribution')
plt.xlabel('Mag.mom (muB)')


plt.subplot(the_grid[3, 1])
val=np.array(df['encut'].replace('na',np.nan).dropna().values,dtype='float')
plt.hist(val,bins=np.arange(400,1500,50))
#plt.ylabel('Materials distribution')
plt.xlabel('ENCUT (eV)')



plt.subplot(the_grid[3, 2])
val=np.sqrt(np.array(df['epsx'].replace('na',np.nan).dropna().values,dtype='float'))
plt.hist(val,bins=np.arange(0,25,1))
#plt.ylabel('Materials distribution')
plt.xlabel('OptB88vdW Refractive index-x')

plt.tight_layout()

In [38]:
jv_data[0].keys()

dict_keys(['jid', 'spg_number', 'spg_symbol', 'formula', 'formation_energy_peratom', 'func', 'optb88vdw_bandgap', 'atoms', 'slme', 'magmom_oszicar', 'spillage', 'elastic_tensor', 'effective_masses_300K', 'kpoint_length_unit', 'maxdiff_mesh', 'maxdiff_bz', 'encut', 'optb88vdw_total_energy', 'epsx', 'epsy', 'epsz', 'mepsx', 'mepsy', 'mepsz', 'modes', 'magmom_outcar', 'max_efg', 'avg_elec_mass', 'avg_hole_mass', 'icsd', 'dfpt_piezo_max_eij', 'dfpt_piezo_max_dij', 'dfpt_piezo_max_dielectric', 'dfpt_piezo_max_dielectric_electronic', 'dfpt_piezo_max_dielectric_ionic', 'max_ir_mode', 'min_ir_mode', 'n-Seebeck', 'p-Seebeck', 'n-powerfact', 'p-powerfact', 'ncond', 'pcond', 'nkappa', 'pkappa', 'ehull', 'Tc_supercon', 'dimensionality', 'efg', 'xml_data_link', 'typ', 'exfoliation_energy', 'spg', 'crys', 'density', 'poisson', 'raw_files', 'nat', 'bulk_modulus_kv', 'shear_modulus_gv', 'mbj_bandgap', 'hse_gap', 'reference', 'search'])

In [40]:
key_map = {
    "jid": "id",
    "formation_energy_peratom": "formation_energy_per_atom",
    "optb88vdw_bandgap": "band_gap"
}

jv_data = [
    {key_map.get(k, k): v for k, v in d.items()}
    for d in jv_data
]
jv_data[0].keys()

dict_keys(['id', 'spg_number', 'spg_symbol', 'formula', 'formation_energy_per_atom', 'func', 'band_gap', 'atoms', 'slme', 'magmom_oszicar', 'spillage', 'elastic_tensor', 'effective_masses_300K', 'kpoint_length_unit', 'maxdiff_mesh', 'maxdiff_bz', 'encut', 'optb88vdw_total_energy', 'epsx', 'epsy', 'epsz', 'mepsx', 'mepsy', 'mepsz', 'modes', 'magmom_outcar', 'max_efg', 'avg_elec_mass', 'avg_hole_mass', 'icsd', 'dfpt_piezo_max_eij', 'dfpt_piezo_max_dij', 'dfpt_piezo_max_dielectric', 'dfpt_piezo_max_dielectric_electronic', 'dfpt_piezo_max_dielectric_ionic', 'max_ir_mode', 'min_ir_mode', 'n-Seebeck', 'p-Seebeck', 'n-powerfact', 'p-powerfact', 'ncond', 'pcond', 'nkappa', 'pkappa', 'ehull', 'Tc_supercon', 'dimensionality', 'efg', 'xml_data_link', 'typ', 'exfoliation_energy', 'spg', 'crys', 'density', 'poisson', 'raw_files', 'nat', 'bulk_modulus_kv', 'shear_modulus_gv', 'mbj_bandgap', 'hse_gap', 'reference', 'search'])

In [1]:
from jarvis.db.figshare import data as load_jarvis_data

def get_jid_data(jid):
    d = load_jarvis_data("dft_3d", store_dir='../data/mpjv')
    for i in d:
        if i["jid"] == jid:
            return i

In [2]:
entry = get_jid_data("JVASP-127558")

Obtaining 3D dataset 76k ...
Reference:https://www.nature.com/articles/s41524-020-00440-1
Other versions:https://doi.org/10.6084/m9.figshare.6815699
Loading the zipfile...
Loading completed.


In [3]:
for k, v in entry.items():
    print(f"{k}: {v}")

jid: JVASP-127558
spg_number: 8
spg_symbol: Cm
formula: HgF3
formation_energy_peratom: 4.81123
func: OptB88vdW
optb88vdw_bandgap: 0.0
atoms: {'lattice_mat': [[3.061326360572636, -0.1168835715361886, 0.0], [-1.0997956513103386, 2.859340949659272, 0.0], [0.0, 0.0, 6.4226958085859485]], 'coords': [[0.50144670198092, 0.7010841144674014, 1.7222177637976417], [0.7927976473443289, 1.1084285410373336, 3.9455860729306416], [1.1656271047158158, 1.6296899407052927, 2.4713833785380857], [1.463188545958935, 2.045717403789973, 4.7062127847336335]], 'elements': ['Hg', 'F', 'F', 'F'], 'abc': [3.06356, 3.063558, 6.4227], 'angles': [90.0, 90.0, 113.2249], 'cartesian': True, 'props': ['', '', '', '']}
slme: na
magmom_oszicar: 0.0
spillage: na
elastic_tensor: na
effective_masses_300K: {'p': 'na', 'n': 'na'}
kpoint_length_unit: 25
maxdiff_mesh: na
maxdiff_bz: na
encut: 500
optb88vdw_total_energy: 5.52064
epsx: 7.2429
epsy: 9.4481
epsz: 14.6849
mepsx: na
mepsy: na
mepsz: na
modes: na
magmom_outcar: 0.0
max_

In [4]:
import nglview as nv
from ase import Atoms
import numpy as np

atoms_dict = entry.get("atoms")

positions = np.array(atoms_dict['coords'])
symbols = atoms_dict['elements']
cell = np.array(atoms_dict['lattice_mat'])

ase_atoms = Atoms(symbols=symbols, positions=positions, cell=cell, pbc=True)

nv.show_ase(ase_atoms)




NGLWidget()