In [None]:
# !!! If it is only for reproduction, please skip this step.

# Download Materials Project (MP) data for elastic properties.
# !!! Important: The official Materials Project (MP) API currently does not support downloading data for a specified database version.
# (e.g., our version: 2023.11.01, link: https://materialsproject-build.s3.amazonaws.com/index.html#collections/2023-11-01/). 
# As a result, running this code will download data from the latest available version (2025).
# Although the number of entries remains the same, the specific content of the data may differ.
# To ensure reproducibility, please use the dataset provided in the `Data` folder:
# `Data/dft_dataset/downloaded_data/mp_elastic_stable.json` and `Data/dft_dataset/downloaded_data/mp_elastic_unstable.json`.
# The file names keep the same with the running result of this code.

# mp_api latest version is not compatible with pymatgen==2023.12.18, so if you want to download the data, please use the latest version of pymatgen.
# But our description generation part is based on pymatgen==2023.12.18, so if you want to run the description generation part, please use the version of pymatgen==2023.12.18.
from mp_api.client import MPRester
api_key = "Your api key here"

# load stable mp_ids list
with open("../../Data/dft_dataset/download_data/stable_data_mp_ids.csv", "r") as f:
    stable_mp_ids = f.read().splitlines()[1:]
# load unstable mp_ids list
with open("../../Data/dft_dataset/download_data/unstable_data_mp_ids.csv", "r") as f:
    unstable_mp_ids = f.read().splitlines()[1:]
print("Stable mp_ids: ", len(stable_mp_ids))
print("Unstable mp_ids: ", len(unstable_mp_ids))

with MPRester(api_key) as mpr:
    stable_materials = mpr.materials.elasticity.search(material_ids=stable_mp_ids)
    unstable_materials = mpr.materials.elasticity.search(material_ids=unstable_mp_ids)
    
from monty.serialization import dumpfn
dumpfn(stable_materials, "mp_elastic_stable.json")
dumpfn(unstable_materials, "mp_elastic_unstable.json")

In [2]:
# Here we suppose that the data is already downloaded
# We copy the files from `Data/dft_dataset/download_data`
import json 
with open("mp_elastic_stable.json") as f:
    stable_data = json.load(f)
with open("mp_elastic_unstable.json") as f:
    unstable_data = json.load(f)

In [None]:
from robocrys import StructureCondenser, StructureDescriber
from pymatgen.io.vasp import Poscar
from pymatgen.core.structure import Structure
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from tqdm import tqdm

def describe_structures(entry):
    structure = Structure.from_dict(entry["structure"])
    structure.add_oxidation_state_by_guess()
    condenser = StructureCondenser()
    describer = StructureDescriber()
    condensed_structure = condenser.condense_structure(structure)
    description = describer.describe(condensed_structure)
    return description

for i in tqdm(range(len(stable_data))):
    stable_data[i]['description'] = describe_structures(stable_data[i])

with open("mp_elastic_stable_with_desc.json", "w") as f:
    json.dump(stable_data, f, indent=2)

for i in tqdm(range(len(unstable_data))):
    unstable_data[i]['description'] = describe_structures(unstable_data[i])

with open("mp_elastic_unstable_with_desc.json", "w") as f:
    json.dump(unstable_data, f, indent=2)

In [1]:
with open("mp_elastic_combined.json", "w") as f:
    json.dump(stable_data + unstable_data, f, indent=2)