Skip to content

Commit

Permalink
Use Threading for MP pull parallelism
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewcarbone committed Sep 13, 2022
1 parent 4acd6ab commit 6f20e5b
Showing 1 changed file with 26 additions and 15 deletions.
41 changes: 26 additions & 15 deletions lightshow/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
utilizing existing data the user may have on their hard drive."""

from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
import json
import os
from pathlib import Path
Expand Down Expand Up @@ -55,12 +56,16 @@ def _fetch_from_MP(mpr, mpid, metadata_keys):
Returns
-------
tuple
dict
The structure (:class:`pymatgen.core.structure.Structure`) of interest
and the specified metadata.
and the specified metadata, as well as the mpid for reference.
"""

metadata = mpr.get_doc(mpid)
try:
metadata = mpr.get_doc(mpid)
except MPRestError as error:
warn(f"MPRestError pulling mpid={mpid}, error: {error}")
return {"mpid": mpid, "structure": None, "metadata": None}

if metadata_keys is not None:
metadata = {key: metadata[key] for key in metadata_keys}
Expand All @@ -69,10 +74,12 @@ def _fetch_from_MP(mpr, mpid, metadata_keys):
# The structure is precisely in the data pulled from get_doc:
structure = Structure.from_dict(metadata.pop("structure"))

return structure, metadata
return {"mpid": mpid, "structure": structure, "metadata": metadata}


def _from_mpids_list(mpids, api_key, metadata_keys, verbose=True):
def _from_mpids_list(
mpids, api_key, metadata_keys, verbose=True, concurrent_threads=20
):
"""Makes one large API call to the Materials Project database and pulls the
relevant structural files given a list of Materials Project ID's (mpids).
Expand All @@ -94,17 +101,21 @@ def _from_mpids_list(mpids, api_key, metadata_keys, verbose=True):
metadata (with the same keys).
"""

structures = dict()
metadatas = dict()
# Safely fetch all of the Materials Project structures matching the query
with MPRester(api_key) as mpr:
for mpid in tqdm(mpids, disable=not verbose):
try:
structure, metadata = _fetch_from_MP(mpr, mpid, metadata_keys)
except MPRestError as error:
warn(f"MPRestError pulling mpid={mpid}, error: {error}")
continue
structures[mpid] = structure.get_primitive_structure()
metadatas[mpid] = metadata
with ThreadPoolExecutor(max_workers=concurrent_threads) as executor:
futures = [
executor.submit(_fetch_from_MP, mpr, mpid, metadata_keys)
for mpid in mpids
]
results = [future.result() for future in as_completed(futures)]

structures = {
x["mpid"]: x["structure"] for x in results if x["structure"] is not None
}
metadatas = {
x["mpid"]: x["metadata"] for x in results if x["structure"] is not None
}

return {"structures": structures, "metadata": metadatas}

Expand Down

0 comments on commit 6f20e5b

Please sign in to comment.