In [7]:
from optimade.client import OptimadeClient
import jmespath
import os
import time
from pathlib import Path
from pymatgen.core import Composition

import oss2
from dotenv import load_dotenv
from oss2.credentials import EnvironmentVariableCredentialsProvider

# === LOAD ENV ===
load_dotenv()

# === UTILS ===
def hill_formula_filter(formula: str) -> str:
    hill_formula = Composition(formula).hill_formula.replace(' ', '')
    return f'chemical_formula_reduced="{hill_formula}"'


In [8]:
filter_str = hill_formula_filter('SiO2')
client = OptimadeClient(include_providers={"mp"}, max_results_per_provider=2)
results = client.get(filter=filter_str)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

elements = ['Mg', 'Al', 'O']
element_filter = 'elements HAS ALL ' + ', '.join(f'"{e}"' for e in elements)
client = OptimadeClient(include_providers={"mp"}, max_results_per_provider=1)
results = client.get(filter=element_filter)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)


[{'id': 'mp-683953', 'type': 'structures', 'attributes': {'immutable_id': '645d2b75bcd30f748b474984', 'last_modified': '2021-02-15T22:29:02.995000', 'elements': ['O', 'Si'], 'nelements': 2, 'elements_ratios': [0.3333333333333333, 0.6666666666666666], 'chemical_formula_descriptive': 'O96Si48', 'chemical_formula_reduced': 'O2Si', 'chemical_formula_hill': 'O96Si48', 'chemical_formula_anonymous': 'A2B', 'dimension_types': [1, 1, 1], 'nperiodic_dimensions': 3, 'lattice_vectors': [[-8.324448, 8.324448, 8.324448], [8.324448, -8.324448, 8.324448], [8.324448, 8.324448, -8.324448]], 'cartesian_site_positions': [[2.52226612176, 6.473581907232, 3.990815291232], [6.684498446208, 0.17141703321599994, 2.3113495827839996], [4.3336327087679996, 2.522282770656001, 10.175314092768001], [-6.013090092768, 6.684490121760001, 8.153039291232], [10.175314092768, 4.3336327087679996, 2.522282770656001], [-2.3113579072320003, 6.684506770656, 8.495856708768], [8.153030966784002, 6.013098417216001, 1.63994955379200

[{'id': 'mp-2224718', 'type': 'structures', 'attributes': {'immutable_id': '645d2b97bcd30f748b47549a', 'last_modified': '2022-05-20T04:20:57.200000', 'elements': ['Al', 'Cr', 'Mg', 'O'], 'nelements': 4, 'elements_ratios': [0.058823529411764705, 0.23529411764705882, 0.11764705882352941, 0.5882352941176471], 'chemical_formula_descriptive': 'Al4Cr2MgO10', 'chemical_formula_reduced': 'Al4Cr2MgO10', 'chemical_formula_hill': 'Al4Cr2MgO10', 'chemical_formula_anonymous': 'A10B4C2D', 'dimension_types': [1, 1, 1], 'nperiodic_dimensions': 3, 'lattice_vectors': [[3.702908, -9e-05, -6e-06], [-1.851575, 4.868172, -6.4e-05], [-1.6e-05, -0.000132, 9.611507]], 'cartesian_site_positions': [[1.852781766814, 0.05141701395000001, 2.4029499311699998], [0.0016911212060000504, 1.1671749668159999, 5.481051525667], [0.0016983298730000609, 1.16712264741, 8.936167227552], [1.853020487653, 3.461191280346, 0.498334832725], [1.853017851243, 3.4610902896300004, 4.307211661869999], [0.001773226025000013, 2.01798571699

In [11]:
client = OptimadeClient(include_providers={"oqmd"}, max_results_per_provider=3)
results = client.get(filter='chemical_formula_reduced="SiO2"')
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

elements = ['Mg', 'Al', 'O']
element_filter = 'elements HAS ALL ' + ', '.join(f'"{e}"' for e in elements)
client = OptimadeClient(include_providers={"oqmd"}, max_results_per_provider=1)
results = client.get(filter=element_filter)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

[{'id': 4065228, 'type': 'structures', 'attributes': {'last_modified': None, 'chemical_formula_reduced': 'O2Si', 'chemical_formula_anonymous': 'A2B', 'nelements': 2, 'elements': ['O', 'Si'], 'nsites': 3, 'lattice_vectors': [[2.268732, -2.268732, 0.0], [-2.268732, 0.0, -2.268732], [2.268732, 2.268732, 0.0]], 'species_at_sites': ['O', 'O', 'Si'], 'nperiodic_dimensions': 3, 'structure_features': [], 'chemical_formula_descriptive': 'O2Si', 'cartesian_site_positions': [[1.13437, -1.13437, -1.13437], [1.13437, 1.13437, -1.13437], [0.0, 0.0, 0.0]], '_oqmd_entry_id': 8352, '_oqmd_calculation_id': 8668, '_oqmd_icsd_id': 44271, '_oqmd_band_gap': 2.245, '_oqmd_delta_e': -1.813, '_oqmd_volume': 23.355, '_oqmd_stability': 1.261, '_oqmd_prototype': 'CaF2', '_oqmd_spacegroup': 'Fm-3m'}}, {'id': 4073934, 'type': 'structures', 'attributes': {'last_modified': None, 'chemical_formula_reduced': 'O2Si', 'chemical_formula_anonymous': 'A2B', 'nelements': 2, 'elements': ['O', 'Si'], 'nsites': 6, 'lattice_vect

[{'id': 5359688, 'type': 'structures', 'attributes': {'last_modified': None, 'chemical_formula_reduced': 'AlAsMgO', 'chemical_formula_anonymous': 'ABCD', 'nelements': 4, 'elements': ['Al', 'As', 'Mg', 'O'], 'nsites': 8, 'lattice_vectors': [[3.988855, 0.0, 0.0], [0.0, 3.988855, 0.0], [0.0, 0.0, 9.267893]], 'species_at_sites': ['Al', 'Al', 'As', 'As', 'Mg', 'Mg', 'O', 'O'], 'nperiodic_dimensions': 3, 'structure_features': [], 'chemical_formula_descriptive': 'AlAsMgO', 'cartesian_site_positions': [[2.99165, 0.99722, 4.63394], [0.99722, 2.99165, 4.63394], [2.99165, 2.99165, 3.07118], [0.99722, 0.99722, 6.19671], [2.99165, 2.99165, 0.01523], [0.99722, 0.99722, 9.25267], [2.99165, 0.99722, 0.0], [0.99722, 2.99165, 0.0]], '_oqmd_entry_id': 1025790, '_oqmd_calculation_id': 2055327, '_oqmd_icsd_id': None, '_oqmd_band_gap': 0.679, '_oqmd_delta_e': -1.4072, '_oqmd_volume': 147.461, '_oqmd_stability': 0.327, '_oqmd_prototype': None, '_oqmd_spacegroup': 'P4/nmm'}}, {'id': 5364962, 'type': 'structur

In [14]:
filter_str = hill_formula_filter('SiO2')
client = OptimadeClient(include_providers={"omdb"}, max_results_per_provider=3)
results = client.get(filter=filter_str)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

elements = ['Mg', 'Al', 'O']
element_filter = 'elements HAS ALL ' + ', '.join(f'"{e}"' for e in elements)
client = OptimadeClient(include_providers={"omdb"}, max_results_per_provider=1)
results = client.get(filter=element_filter)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

[{'attributes': {'cartesian_site_positions': [[0.44392, 1.1456, 2.4344], [2.685, 2.685, 2.685], [3.13608, 6.0144, 6.0144], [0.895, 4.475, 6.265], [4.02392, 2.4344, 4.7256], [6.265, 0.895, 4.475], [6.71608, 4.7256, 1.1456], [4.475, 6.265, 0.895], [2.4344, 0.44392, 1.1456], [1.1456, 6.71608, 4.7256], [6.0144, 3.13608, 6.0144], [4.7256, 4.02392, 2.4344], [1.1456, 2.4344, 0.44392], [4.7256, 1.1456, 6.71608], [6.0144, 6.0144, 3.13608], [2.4344, 4.7256, 4.02392], [1.7542, 1.7542, 1.7542], [3.63728, 3.63728, 3.63728], [1.8258, 5.4058, 5.3342], [7.10272, 3.52272, 0.05728], [5.3342, 1.8258, 5.4058], [0.05728, 7.10272, 3.52272], [5.4058, 5.3342, 1.8258], [3.52272, 0.05728, 7.10272]], 'chemical_formula_anonymous': 'AB2', 'chemical_formula_descriptive': 'O2Si', 'chemical_formula_reduced': 'O2Si', 'dimension_types': [1, 1, 1], 'elements': ['O', 'Si'], 'lattice_vectors': [[7.16, 0.0, 0.0], [0.0, 7.16, 0.0], [0.0, 0.0, 7.16]], 'nelements': 2, 'nperiodic_dimensions': 3, 'nsites': 24, 'species_at_sites

[{'attributes': {'cartesian_site_positions': [[0.8996484614707111, 3.65701266084, 0.7069643831173484], [3.1560340586481366, 4.258427584488, 0.006574243653384626], [3.0355481489797795, 2.184489207912, 1.643836987196], [8.02997588802929, 3.65701266084, 1.8142579898826516], [5.773590290851863, 4.258427584488, 2.5146481293466154], [5.89407620052022, 2.184489207912, 0.877385385804], [7.260603581529289, 5.18698733916, 4.3354803628826515], [5.004217984351864, 4.585572415512, 5.035870502346615], [5.12470389402022, 6.659510792088, 3.398607758804], [0.13027615497071107, 5.18698733916, 3.2281867561173483], [2.386661752148137, 4.585572415512, 2.5277966166533847], [2.2661758424797798, 6.659510792088, 4.165059360196], [5.749146789470711, 8.07901266084, 0.7069643831173484], [8.005532386648136, 8.680427584488, 0.006574243653384626], [7.88504647697978, 6.606489207912, 1.643836987196], [3.180477560029289, 8.07901266084, 1.8142579898826516], [0.9240919628518633, 8.680427584488, 2.5146481293466154], [1.04

In [15]:
filter_str = hill_formula_filter('SiO2')
client = OptimadeClient(include_providers={"mcloud"}, max_results_per_provider=3)
results = client.get(filter=filter_str)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

elements = ['Mg', 'Al', 'O']
element_filter = 'elements HAS ALL ' + ', '.join(f'"{e}"' for e in elements)
client = OptimadeClient(include_providers={"mcloud"}, max_results_per_provider=1)
results = client.get(filter=element_filter)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

[{'id': 'ec1f048e-973d-4f69-b371-2f2130be6e7a', 'type': 'structures', 'links': None, 'attributes': {'immutable_id': 'ec1f048e-973d-4f69-b371-2f2130be6e7a', 'last_modified': '2022-02-14T16:46:36.543140+01:00', 'elements': ['O', 'Si'], 'nelements': 2, 'elements_ratios': [0.6666666666666666, 0.3333333333333333], 'chemical_formula_descriptive': 'O8Si4', 'chemical_formula_reduced': 'O2Si', 'chemical_formula_hill': None, 'chemical_formula_anonymous': 'A2B', 'dimension_types': [1, 1, 1], 'nperiodic_dimensions': 3, 'lattice_vectors': [[4.5290550060668, 0.0, 0.0], [0.0, 4.1162160882957, 0.0], [0.0, 0.0, 5.0835955089915]], 'space_group_symmetry_operations_xyz': None, 'space_group_symbol_hall': None, 'space_group_symbol_hermann_mauguin': None, 'space_group_symbol_hermann_mauguin_extended': None, 'space_group_it_number': None, 'cartesian_site_positions': [[2.2645275030334, 0.0, 1.5796461277345], [2.2645275030334, 2.0581080441479, 4.1214438821301], [0.0, 0.0, 0.033377272964866], [0.0, 2.05810804414

[{'id': '7a100f3f-97d6-4d26-bbad-24b5bd128317', 'type': 'structures', 'links': None, 'attributes': {'immutable_id': '7a100f3f-97d6-4d26-bbad-24b5bd128317', 'last_modified': '2022-02-14T16:49:43.075017+01:00', 'elements': ['Al', 'Mg', 'O'], 'nelements': 3, 'elements_ratios': [0.2857142857142857, 0.14285714285714285, 0.5714285714285714], 'chemical_formula_descriptive': 'Al4Mg2O8', 'chemical_formula_reduced': 'Al2MgO4', 'chemical_formula_hill': None, 'chemical_formula_anonymous': 'A4B2C', 'dimension_types': [1, 1, 1], 'nperiodic_dimensions': 3, 'lattice_vectors': [[1.4032915197, -4.6369118521, 0.0], [1.4032915197, 4.6369118521, 0.0], [0.0, 0.0, 9.4682833499]], 'space_group_symmetry_operations_xyz': None, 'space_group_symbol_hall': None, 'space_group_symbol_hermann_mauguin': None, 'space_group_symbol_hermann_mauguin_extended': None, 'space_group_it_number': None, 'cartesian_site_positions': [[-3.3103223857269e-17, 3.6096431237, 2.3670708375], [1.4032915197, 1.0272687284, 7.1012125124], [-1

In [16]:
filter_str = hill_formula_filter('SiO2')
client = OptimadeClient(max_results_per_provider=3)
results = client.get(filter=filter_str)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

[]



                chemical_formula_reduced="O2Si"
•	aflow                  ❌
•	alexandria              
•	cod                     ❌
•	cmr
•	mcloud
•	mcloudarchive         1/3❌
•	mp
•	mpdd
•	mpds
•	mpod                    ❌
•	nmd
•	odbx
•	omdb
•	oqmd
•	jarvis
•	tcod                    ❌
•	twodmatpedia            ?


•	aiida ❌
•	ccdc ❌
•	ccpnc ❌
•	httk ❌
•	optimake ❌
•	optimade ❌
•	pcod ❌
•	psdi ❌


| Database        | Connect     | `chemical_formula_reduced="O2Si"` | `elements HAS ALL +` |
|----------------|-------------|------------------------------------|------------------------|
| alexandria     | ✅           | ✅                                  |                        |
| cmr            | ✅           | ✅                                  |                        |
| mcloud         | ✅           | ✅                                  |                        |
| mp             | ✅           | ✅                                  |                        |
| mpdd           | ✅           | ✅                                  |                        |
| mpds           | ✅           | ✅                                  |                        |
| nmd            | ✅           | ✅                                  |                        |
| odbx           | ✅           | ✅                                  |                        |
| omdb           | ✅           | ✅                                  |                        |
| oqmd           | ✅           | ✅                                  |                        |
| jarvis         | ✅           | ✅                                  |                        |
| mcloudarchive  | ✅           | 1/3 ❌                              |                        |
| cod            | ✅           | ❌                                  |                        |
| mpod           | ✅           | ❌                                  |                        |
| tcod           | ✅           | ❌                                  |                        |
| twodmatpedia   | ✅           | ?                                  |                        |
| aflow          | ✅           |    ❌                                  |                        |
| aiida          | ❌ (no URL)  | -                                  |                        |
| ccdc           | ❌ (no URL)  | -                                  |                        |
| ccpnc          | ❌ (no URL)  | -                                  |                        |
| httk           | ❌ (no URL)  | -                                  |                        |
| optimake       | ❌ (no URL)  | -                                  |                        |
| optimade       | ❌ (no URL)  | -                                  |                        |
| pcod           | ❌ (no URL)  | -                                  |                        |
| psdi           | ❌ (no URL)  | -                                  |                        |

In [17]:
elements = ['Mg', 'Al', 'O']
element_filter = 'elements HAS ALL ' + ', '.join(f'"{e}"' for e in elements)

client = OptimadeClient(max_results_per_provider=3)
results = client.get(filter=element_filter)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

[]


In [None]:
providers = {'alexandria', 'cmr', 'mp', 'mpds', 'nmd', 'odbx', 'omdb', 'oqmd', 'jarvis'}

formula_filter = hill_formula_filter('TiO2')

elements = ['Mg', 'Al', 'O']
element_filter = 'elements HAS ALL ' + ', '.join(f'"{e}"' for e in elements)


client = OptimadeClient(include_providers=providers, max_results_per_provider=2)
results = client.get(filter=formula_filter)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

[{'id': 'agm003201360', 'type': 'structures', 'attributes': {'immutable_id': 'agm003201360', 'last_modified': '2023-11-16T06:57:59Z', 'elements': ['O', 'Ti'], 'nelements': 2, 'elements_ratios': [0.6666666666666666, 0.3333333333333333], 'chemical_formula_descriptive': 'Ti2 O4', 'chemical_formula_reduced': 'O2Ti', 'chemical_formula_hill': None, 'chemical_formula_anonymous': 'A2B', 'dimension_types': [1, 1, 1], 'nperiodic_dimensions': 3, 'lattice_vectors': [[3.77800496, 0.00075939, -0.01956182], [-0.00010751, 3.77800513, -0.01956182], [-1.86310253, -1.86353008, 4.79995599]], 'cartesian_site_positions': [[0.01696794839271974, -0.92556779191114, 3.1858417184112997], [1.8980301308526144, 2.8409134785216668, 1.5749995213036867], [1.9089265279398844, 0.9669169816608555, 3.9749775163036865], [0.0059795920479302316, 0.9485206431109833, 0.7858637234112998], [0.007040304155737934, 2.8395048612169336, 1.1804155911150256], [1.9079518211878936, -0.9240731706126899, 3.580393586115026]], 'nsites': 6, '

In [46]:
from urllib.parse import urlparse

structures_by_filter = results.get("structures", {})

for filter_key, provider_dict in structures_by_filter.items():
    for provider_url, provider_structures in provider_dict.items():
        print(provider_url, provider_structures)

len(provider_structures.get("data", []))

https://alexandria.icams.rub.de/pbesol {'data': [{'id': 'agm003201360', 'type': 'structures', 'attributes': {'immutable_id': 'agm003201360', 'last_modified': '2023-11-16T06:57:59Z', 'elements': ['O', 'Ti'], 'nelements': 2, 'elements_ratios': [0.6666666666666666, 0.3333333333333333], 'chemical_formula_descriptive': 'Ti2 O4', 'chemical_formula_reduced': 'O2Ti', 'chemical_formula_hill': None, 'chemical_formula_anonymous': 'A2B', 'dimension_types': [1, 1, 1], 'nperiodic_dimensions': 3, 'lattice_vectors': [[3.77800496, 0.00075939, -0.01956182], [-0.00010751, 3.77800513, -0.01956182], [-1.86310253, -1.86353008, 4.79995599]], 'cartesian_site_positions': [[0.01696794839271974, -0.92556779191114, 3.1858417184112997], [1.8980301308526144, 2.8409134785216668, 1.5749995213036867], [1.9089265279398844, 0.9669169816608555, 3.9749775163036865], [0.0059795920479302316, 0.9485206431109833, 0.7858637234112998], [0.007040304155737934, 2.8395048612169336, 1.1804155911150256], [1.9079518211878936, -0.92407

20

In [None]:
import logging
import json
from datetime import datetime
from optimade.adapters.structures import Structure

max_results = 2
as_cif = False
BASE_OUTPUT_DIR = Path("materials_data")
BASE_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output_folder = BASE_OUTPUT_DIR / f"{formula_filter}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
output_folder.mkdir(parents=True, exist_ok=True)


structures_by_filter = results.get("structures", {})

for filter_key, provider_dict in structures_by_filter.items():
    for provider_url, content in provider_dict.items():
        # Clean provider name for use in filename
        parsed = urlparse(provider_url)
        netloc = parsed.netloc.replace('.', '_')
        path = parsed.path.strip('/').replace('/', '_')
        provider_name = f"{netloc}_{path}" if path else netloc

        data_list = content.get("data", [])
        for i, structure_data in enumerate(data_list[:max_results]):
            try:
                suffix = "cif" if as_cif else "json"
                filename = f"{provider_name}_{i}.{suffix}"
                file_path = output_folder / filename
                with open(file_path, "w") as f:
                    if as_cif:
                        f.write(Structure(structure_data).convert('cif'))
                    else:
                        json.dump(structure_data, f, indent=2)
            except Exception as e:
                logging.warning(f"Failed to save structure from {provider_name} #{i}: {str(e)}")

In [48]:

client = OptimadeClient(include_providers=providers, max_results_per_provider=2)
results = client.get(filter=element_filter)
structure_data_list = jmespath.search("structures.*.*.data", results)[0][0]
print(structure_data_list)

[{'id': 'agm003239870', 'type': 'structures', 'attributes': {'immutable_id': 'agm003239870', 'last_modified': '2023-11-16T06:57:59Z', 'elements': ['Al', 'Li', 'Mg', 'O'], 'nelements': 4, 'elements_ratios': [0.03125, 0.03125, 0.4375, 0.5], 'chemical_formula_descriptive': 'Li1 Mg14 Al1 O16', 'chemical_formula_reduced': 'AlLiMg14O16', 'chemical_formula_hill': None, 'chemical_formula_anonymous': 'A16B14CD', 'dimension_types': [1, 1, 1], 'nperiodic_dimensions': 3, 'lattice_vectors': [[4.18024568, 0.0, 0.0], [0.0, 8.36905899, 0.0], [0.0, 0.0, 8.40712616]], 'cartesian_site_positions': [[0.0, 0.0, 4.20356308], [0.0, 4.184529495, 0.0], [0.0, 4.184529495, 4.20356308], [2.09012284, 0.0, 2.1625939826507903], [2.09012284, 0.0, 6.2445321773492095], [2.09012284, 4.184529495, 2.1209835002207016], [2.09012284, 4.184529495, 6.286142659779299], [2.09012284, 2.090220939604052, 0.0], [2.09012284, 2.045854214561545, 4.20356308], [2.09012284, 6.278838050395947, 0.0], [2.09012284, 6.323204775438455, 4.2035630

In [49]:
from urllib.parse import urlparse

structures_by_filter = results.get("structures", {})

for filter_key, provider_dict in structures_by_filter.items():
    for provider_url, provider_structures in provider_dict.items():
        print(provider_url, provider_structures)

len(provider_structures.get("data", []))

https://alexandria.icams.rub.de/pbesol {'data': [{'id': 'agm003239870', 'type': 'structures', 'attributes': {'immutable_id': 'agm003239870', 'last_modified': '2023-11-16T06:57:59Z', 'elements': ['Al', 'Li', 'Mg', 'O'], 'nelements': 4, 'elements_ratios': [0.03125, 0.03125, 0.4375, 0.5], 'chemical_formula_descriptive': 'Li1 Mg14 Al1 O16', 'chemical_formula_reduced': 'AlLiMg14O16', 'chemical_formula_hill': None, 'chemical_formula_anonymous': 'A16B14CD', 'dimension_types': [1, 1, 1], 'nperiodic_dimensions': 3, 'lattice_vectors': [[4.18024568, 0.0, 0.0], [0.0, 8.36905899, 0.0], [0.0, 0.0, 8.40712616]], 'cartesian_site_positions': [[0.0, 0.0, 4.20356308], [0.0, 4.184529495, 0.0], [0.0, 4.184529495, 4.20356308], [2.09012284, 0.0, 2.1625939826507903], [2.09012284, 0.0, 6.2445321773492095], [2.09012284, 4.184529495, 2.1209835002207016], [2.09012284, 4.184529495, 6.286142659779299], [2.09012284, 2.090220939604052, 0.0], [2.09012284, 2.045854214561545, 4.20356308], [2.09012284, 6.278838050395947,

10

In [54]:
import logging
import json
from pathlib import Path
from urllib.parse import urlparse
from datetime import datetime
from optimade.adapters.structures import Structure

# === CONFIGURATION ===
max_results = 2
as_cif = True
BASE_OUTPUT_DIR = Path("materials_data")
BASE_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Assume element_filter is already defined elsewhere (e.g., 'elements HAS ALL "Mg", "Al", "O"')
output_folder = BASE_OUTPUT_DIR / f"{element_filter}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
output_folder.mkdir(parents=True, exist_ok=True)

# Assume results is already obtained from OptimadeClient.get(filter=...)
structures_by_filter = results.get("structures", {})

# === STRUCTURE PROCESSING ===
for filter_key, provider_dict in structures_by_filter.items():
    for provider_url, content in provider_dict.items():
        # Clean provider name for use in filename
        parsed = urlparse(provider_url)
        netloc = parsed.netloc.replace('.', '_')
        path = parsed.path.strip('/').replace('/', '_')
        provider_name = f"{netloc}_{path}" if path else netloc

        data_list = content.get("data", [])
        for i, structure_data in enumerate(data_list[:max_results]):
            suffix = "cif" if as_cif else "json"
            filename = f"{provider_name}_{i}.{suffix}"
            file_path = output_folder / filename

            try:
                if as_cif:
                    # Convert structure to CIF string
                    cif_content = Structure(structure_data).convert('cif')
                    if not cif_content.strip():
                        raise ValueError("CIF content is empty")
                    with open(file_path, "w") as f:
                        f.write(cif_content)
                else:
                    with open(file_path, "w") as f:
                        json.dump(structure_data, f, indent=2)

            except Exception as e:
                logging.warning(f"Failed to save structure from {provider_name} #{i}: {e}")

id
  Input should be a valid string [type=string_type, input_value=6698, input_type=int]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type
attributes.last_modified
  Input should be a valid datetime or date, invalid character in year [type=datetime_from_date_parsing, input_value='%Y-%m-%dT%H:%M:%SZ', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/datetime_from_date_parsing
attributes.elements
  Value error, elements must be sorted alphabetically, but is: ['Mg', 'Al', 'O'] [type=value_error, input_value=['Mg', 'Al', 'O'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/value_error
attributes.chemical_formula_reduced
  String should match pattern '(^$)|^([A-Z][a-z]?([2-9]|[1-9]\d+)?)+$' [type=string_pattern_mismatch, input_value='Na0.58Mg2.12Ag0.18Al6.98...0[BO3]3O18.53[OH]3F0.47', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/string_pattern_mi