In [1]:
from mp_api.client import MPRester
from pymatgen.analysis.pourbaix_diagram import PourbaixDiagram, PourbaixPlotter, PourbaixEntry
from api_key import APIKEY

API_KEY = APIKEY

with MPRester(API_KEY) as mpr:
    docs = mpr.materials.summary.search(
        energy_above_hull=(0, 0.008), band_gap=[0.85,2.15], is_stable=True, fields=["material_id"]
    )
    materials = [doc.material_id for doc in docs]

Retrieving SummaryDoc documents:   0%|          | 0/5081 [00:00<?, ?it/s]

In [2]:
def get_material_entries(material_ids, batch_size=6000):
    mp_entries = []
    with MPRester(API_KEY) as mpr:
        for i in range(0, len(material_ids), batch_size):
            batch_ids = material_ids[i:i + batch_size]
            material_entries = mpr.materials.search(material_ids=batch_ids)
            mp_entries.extend(material_entries)
    return mp_entries

mp_entries = get_material_entries(materials)

Retrieving MaterialsDoc documents:   0%|          | 0/5081 [00:00<?, ?it/s]

In [3]:
chemsys_list = [entry.chemsys for entry in mp_entries[:100] if hasattr(entry, 'chemsys')]
composition_list = [entry.composition for entry in mp_entries[:100] if hasattr(entry, 'composition')]
materials_gga = [material + "-GGA" for material in materials[:100]]
materials_id = materials[:100]

In [4]:
from pymatgen.core.composition import Composition

def remove_o_and_h(composition):
    new_composition_elements = []
    for element, count in composition.get_el_amt_dict().items():
        if element not in ['O', 'H']:
            new_composition_elements.append(f"{element}{int(count)}")
    new_composition_str = ' '.join(new_composition_elements)
    return Composition(new_composition_str)

filtered_molecules = [remove_o_and_h(molecule) for molecule in composition_list]

In [5]:
from pymatgen.core.composition import Composition

def calculate_percent_composition(molecule):
    total_atoms = sum(molecule.values())
    return {element.symbol: count / total_atoms for element, count in molecule.items()}

percent_compositions = [calculate_percent_composition(molecule) for molecule in filtered_molecules]

In [6]:
import time
import concurrent.futures


BATCH_SIZE = 5  # Number of elements to process in each batch
SLEEP_BETWEEN_BATCHES = 30  # Seconds to wait between each batch
TIMEOUT = 10  # Timeout for each API call in seconds

def fetch_pourbaix_entries(chemsys):
    with MPRester(API_KEY) as mpr:
        return mpr.get_pourbaix_entries(chemsys)

pbx_data = []
pbx_entries = []

for batch_start in range(0, len(chemsys_list), BATCH_SIZE):
    batch_end = min(batch_start + BATCH_SIZE, len(chemsys_list))
    current_batch = zip(chemsys_list[batch_start:batch_end], percent_compositions[batch_start:batch_end])

    for chemsys, percent_comp in current_batch:
        with concurrent.futures.ThreadPoolExecutor() as executor:
            future = executor.submit(fetch_pourbaix_entries, chemsys)
            try:
                pourbaix_entries = future.result(timeout=TIMEOUT)
                pbx_entries.append(pourbaix_entries)
                pbx_diagram = PourbaixDiagram(entries=pourbaix_entries, comp_dict=percent_comp)
                pbx_data.append(pbx_diagram)
            except concurrent.futures.TimeoutError:
                print(f"Timeout occurred for {chemsys}")
                pbx_data.append('timeout')
            except Exception as e:
                print(f"An error occurred for {chemsys}: {e}")
                pbx_data.append('error')

    print(f"Processed batch {batch_start // BATCH_SIZE + 1}/{(len(chemsys_list) + BATCH_SIZE - 1) // BATCH_SIZE}")
    time.sleep(SLEEP_BETWEEN_BATCHES)  # Pause between batches


In [None]:
matches = []

for index, comp in enumerate(composition_list):
    comp_formula = comp.formula
    found_match = False 

    try:
        for sublist in pbx_entries:
            if isinstance(sublist, list):
                for entry in sublist:
                    entry_str = str(entry)
                    if comp_formula and comp_formula in entry_str:
                        matches.append(entry)  
                        found_match = True
                        break

            if found_match:
                break

        if not found_match:
            print(f"No match found for: {comp_formula}")
            matches.append("None")

    except Exception as e:
        print(f"An error occurred at index {index} for composition {comp_formula}: {e}")
        matches.append("None")  


In [None]:
stabilities = []

for i, x in zip(pbx_data, matches):
    try:
        stability = i.get_decomposition_energy(x, 7, 0)
        formatted_stability = "{:.3g}".format(stability)
        stabilities.append(formatted_stability)
    except Exception as e:
        stabilities.append("error")

print(stabilities)


In [None]:
combined_list = list(zip(materials_id, stabilities))
data_without_error = [item for item in combined_list if item[1] != 'error']
filtered_data = [item for item in data_without_error if float(item[1]) <= 0.5]
mp_identifiers = [item[0] for item in filtered_data]

In [None]:
print("total materials:",len(materials_id))
print("materials without error:",len(data_without_error))
print("stable materials:",len(filtered_data))
