In [19]:
from mp_api.client import MPRester
TARGET_PROPERTIES = ["formation_energy_per_atom", "band_gap"]

def fetch_structures_in_batches(api_key, total_limit=None, target_properties=[], chunk_size=500):
    from itertools import islice
    all_docs = []
    
    filter_kwargs = {
        "is_stable": True
    }

    with MPRester(api_key) as mpr:
        generator = mpr.materials.summary.search(
            **filter_kwargs,
            fields=["material_id", "structure", "formula_pretty", "formula_anonymous"] + TARGET_PROPERTIES + target_properties,
            num_chunks=total_limit,
            chunk_size=chunk_size
        )

        for doc in islice(generator, total_limit):
            all_docs.append(doc)

    print(f"Collected {len(all_docs)} structures.")
    
    return all_docs

In [20]:
MAPI = "Sdt6SkxvPcGn8RF6kCM7cTKbfRmydMII"

mp_docs = fetch_structures_in_batches(
    api_key=MAPI,
    total_limit=30,
    target_properties=[],
    chunk_size=500
)

Retrieving SummaryDoc documents: 100%|██████████| 15000/15000 [00:29<00:00, 503.51it/s]


Collected 30 structures.


In [22]:
struct = mp_docs[10].structure

mp_docs[10]

[4m[1mMPDataDoc<SummaryDoc>[0;0m[0;0m(
[1mformula_pretty[0;0m='Ac2IrRh',
[1mformula_anonymous[0;0m='ABC2',
[1mmaterial_id[0;0m=MPID(mp-1183093),
[1mstructure[0;0m=Structure Summary
Lattice
    abc : 5.234427767456992 5.234427767456992 5.234427767456992
 angles : 59.99999999999999 59.99999999999999 59.99999999999999
 volume : 101.41276773783284
      A : np.float64(0.0) np.float64(3.70129937) np.float64(3.70129937)
      B : np.float64(3.70129937) np.float64(0.0) np.float64(3.70129937)
      C : np.float64(3.70129937) np.float64(3.70129937) np.float64(-0.0)
    pbc : True True True
PeriodicSite: Ac (5.552, 5.552, 5.552) [0.75, 0.75, 0.75]
PeriodicSite: Ac (1.851, 1.851, 1.851) [0.25, 0.25, 0.25]
PeriodicSite: Ir (3.701, 3.701, 3.701) [0.5, 0.5, 0.5]
PeriodicSite: Rh (0.0, 0.0, 0.0) [0.0, -0.0, -0.0],
[1mformation_energy_per_atom[0;0m=-0.534733513749998,
[1mband_gap[0;0m=0.0,
)

In [None]:
from pymatgen.core import Structure

def mp_to_jarvis(mp_doc):
    struct: Structure = mp_doc.structure
    lattice = struct.lattice

    jarvis_dict = {
        'id': str(mp_doc.material_id),
        'formula': struct.composition.reduced_formula,
        'formation_energy_per_atom': mp_doc.formation_energy_per_atom,
        'band_gap': mp_doc.band_gap,
        'atoms': {
            'lattice_mat': [list(lattice.matrix[i]) for i in range(3)],
            'coords': [site.frac_coords.tolist() for site in struct],
            'elements': [str(site.specie) for site in struct],
            'abc': list(lattice.abc),
            'angles': list(lattice.angles),
            'cartesian': False,
            'props': ['' for _ in struct],
        }
    }

    return jarvis_dict


In [24]:
mp_to_jarvis(mp_docs[10])

{'id': 'mp-1183093',
 'formula': 'Ac2IrRh',
 'e_form': -0.534733513749998,
 'gap pbe': 0.0,
 'atoms': {'lattice_mat': [[np.float64(0.0),
    np.float64(3.70129937),
    np.float64(3.70129937)],
   [np.float64(3.70129937), np.float64(0.0), np.float64(3.70129937)],
   [np.float64(3.70129937), np.float64(3.70129937), np.float64(-0.0)]],
  'coords': [[0.75, 0.75, 0.75],
   [0.25, 0.25, 0.25],
   [0.5, 0.5, 0.5],
   [0.0, -0.0, -0.0]],
  'elements': ['Ac', 'Ac', 'Ir', 'Rh'],
  'abc': [5.234427767456992, 5.234427767456992, 5.234427767456992],
  'angles': [59.99999999999999, 59.99999999999999, 59.99999999999999],
  'cartesian': False,
  'props': ['', '', '', '']}}

In [33]:
mp_docs[10].structure.composition.reduced_formula

'Ac2IrRh'

In [None]:
from jarvis.db.figshare import data as load_jarvis_data
jv_data = load_jarvis_data("dft_3d", store_dir='../data')

Obtaining 3D dataset 76k ...
Reference:https://www.nature.com/articles/s41524-020-00440-1
Other versions:https://doi.org/10.6084/m9.figshare.6815699


100%|██████████| 40.8M/40.8M [00:07<00:00, 5.41MiB/s]


Loading the zipfile...
Loading completed.


In [38]:
jv_data[0].keys()

dict_keys(['jid', 'spg_number', 'spg_symbol', 'formula', 'formation_energy_peratom', 'func', 'optb88vdw_bandgap', 'atoms', 'slme', 'magmom_oszicar', 'spillage', 'elastic_tensor', 'effective_masses_300K', 'kpoint_length_unit', 'maxdiff_mesh', 'maxdiff_bz', 'encut', 'optb88vdw_total_energy', 'epsx', 'epsy', 'epsz', 'mepsx', 'mepsy', 'mepsz', 'modes', 'magmom_outcar', 'max_efg', 'avg_elec_mass', 'avg_hole_mass', 'icsd', 'dfpt_piezo_max_eij', 'dfpt_piezo_max_dij', 'dfpt_piezo_max_dielectric', 'dfpt_piezo_max_dielectric_electronic', 'dfpt_piezo_max_dielectric_ionic', 'max_ir_mode', 'min_ir_mode', 'n-Seebeck', 'p-Seebeck', 'n-powerfact', 'p-powerfact', 'ncond', 'pcond', 'nkappa', 'pkappa', 'ehull', 'Tc_supercon', 'dimensionality', 'efg', 'xml_data_link', 'typ', 'exfoliation_energy', 'spg', 'crys', 'density', 'poisson', 'raw_files', 'nat', 'bulk_modulus_kv', 'shear_modulus_gv', 'mbj_bandgap', 'hse_gap', 'reference', 'search'])

In [40]:
key_map = {
    "jid": "id",
    "formation_energy_peratom": "formation_energy_per_atom",
    "optb88vdw_bandgap": "band_gap"
}

jv_data = [
    {key_map.get(k, k): v for k, v in d.items()}
    for d in jv_data
]
jv_data[0].keys()

dict_keys(['id', 'spg_number', 'spg_symbol', 'formula', 'formation_energy_per_atom', 'func', 'band_gap', 'atoms', 'slme', 'magmom_oszicar', 'spillage', 'elastic_tensor', 'effective_masses_300K', 'kpoint_length_unit', 'maxdiff_mesh', 'maxdiff_bz', 'encut', 'optb88vdw_total_energy', 'epsx', 'epsy', 'epsz', 'mepsx', 'mepsy', 'mepsz', 'modes', 'magmom_outcar', 'max_efg', 'avg_elec_mass', 'avg_hole_mass', 'icsd', 'dfpt_piezo_max_eij', 'dfpt_piezo_max_dij', 'dfpt_piezo_max_dielectric', 'dfpt_piezo_max_dielectric_electronic', 'dfpt_piezo_max_dielectric_ionic', 'max_ir_mode', 'min_ir_mode', 'n-Seebeck', 'p-Seebeck', 'n-powerfact', 'p-powerfact', 'ncond', 'pcond', 'nkappa', 'pkappa', 'ehull', 'Tc_supercon', 'dimensionality', 'efg', 'xml_data_link', 'typ', 'exfoliation_energy', 'spg', 'crys', 'density', 'poisson', 'raw_files', 'nat', 'bulk_modulus_kv', 'shear_modulus_gv', 'mbj_bandgap', 'hse_gap', 'reference', 'search'])