In [179]:
from mp_api.client import MPRester
from emmet.core.xas import Edge, Type
from tqdm import tqdm
from pymatgen.analysis.bond_valence import BVAnalyzer as BVA
from pymatgen.core import Element
import pickle

## Extract Data from MP-API


In [180]:
def get_XAS(edge_type,element):
	with MPRester(api_key='u1TjwfwfTnpF8IolXF9PBY9RT9YauL84',use_document_model=False,) as mpr:
		docs = mpr.materials.xas.search(edge=edge_type,
										spectrum_type=Type.XANES,
										absorbing_element=element,
										fields=["material_id", "xas_id", "spectrum", "structure"])
	return docs

In [181]:
def get_L2_3(element):
	docs_L23 = get_XAS(Edge.L2_3,element)
	return docs_L23

In [182]:
def process_docs(docs):
	data_dict = {}
	for doc in tqdm(docs, desc="Processing Docs"):
		spectrum = doc['spectrum']
		# print(spectrum.x)
		energy = spectrum.x
		intensity = spectrum.y
		structure = spectrum.structure
		mp_id = doc['material_id']
		if mp_id not in data_dict:
			data_dict[mp_id] = {
				'energy': energy,
				'intensity': intensity,
				'structure': structure
			}

	return data_dict

In [183]:
def extract_data_element_list(element_list):
	data_dict_total = dict(zip(element_list,[{}]*len(element_list)))
	for element in element_list:
		print("Processing Element: ", element)
		docs = get_L2_3(element)
		data_dict = process_docs(docs)
		data_dict_total[element] = data_dict

	return data_dict_total

In [184]:
# Some tools for easy access
def get_keys(data_dict):
	return list(data_dict.keys())

In [185]:
def get_oxidation_state(structure, element):
	oxi_states = BVA().get_oxi_state_decorated_structure(structure).species
	oxi_states = [i.oxi_state for i in oxi_states if i.element == Element(element)]
	return oxi_states


In [186]:
def batch_process_oxidation_states(element, structures):
	problem = []
	oxi_states = []
	for idx, structure in enumerate(tqdm(structures)):
		try:
			oxi_states.append(get_oxidation_state(structure, element))
		except ValueError:
			oxi_states.append(None)
			problem.append(idx)

	return oxi_states, problem

In [187]:
def assign_oxidation_states(data_dict_total):

	for element, data_dict in data_dict_total.items():
		mpids = get_keys(data_dict)
		structures = [data_dict[mpid]['structure'] for mpid in mpids]
		oxi_states, problem = batch_process_oxidation_states(element, structures)
		print(f"Problematic structures for {element}: {len(problem)} which is {len(problem)/len(mpids)*100:.2f}%")
		for idx, mpid in enumerate(mpids):
			if idx not in problem:
				data_dict[mpid]['oxidation_state'] = oxi_states[idx]
			else:
				data_dict[mpid]['oxidation_state'] = None

	return data_dict_total


In [188]:
element_list = ['Mn', 'Cu', 'Cr', 'V', 'Fe', 'Ni', 'Co']
data_dict_total = extract_data_element_list(element_list)

Processing Element:  Mn


Retrieving XASDoc documents: 100%|██████████| 1030/1030 [00:00<00:00, 2017.52it/s]
Processing Docs: 100%|██████████| 1030/1030 [00:00<00:00, 1299288.16it/s]


Processing Element:  Cu


Retrieving XASDoc documents: 100%|██████████| 1533/1533 [00:02<00:00, 667.23it/s]
Processing Docs: 100%|██████████| 1533/1533 [00:00<00:00, 739235.23it/s]


Processing Element:  Cr


Retrieving XASDoc documents: 100%|██████████| 580/580 [00:00<00:00, 26158024.95it/s]
Processing Docs: 100%|██████████| 580/580 [00:00<00:00, 1371305.70it/s]


Processing Element:  V


Retrieving XASDoc documents: 100%|██████████| 863/863 [00:00<00:00, 28727653.59it/s]
Processing Docs: 100%|██████████| 863/863 [00:00<00:00, 1265181.53it/s]


Processing Element:  Fe


Retrieving XASDoc documents: 100%|██████████| 1272/1272 [00:02<00:00, 529.04it/s]
Processing Docs: 100%|██████████| 1272/1272 [00:00<00:00, 1163864.46it/s]


Processing Element:  Ni


Retrieving XASDoc documents: 100%|██████████| 998/998 [00:00<00:00, 38053776.29it/s]
Processing Docs: 100%|██████████| 998/998 [00:00<00:00, 1037658.75it/s]


Processing Element:  Co


Retrieving XASDoc documents: 100%|██████████| 1065/1065 [00:01<00:00, 709.55it/s]
Processing Docs: 100%|██████████| 1065/1065 [00:00<00:00, 452760.36it/s]


In [189]:
data_dict_total = assign_oxidation_states(data_dict_total)

100%|██████████| 1030/1030 [01:17<00:00, 13.35it/s]


Problematic structures for Mn: 176 which is 17.09%


100%|██████████| 1533/1533 [01:53<00:00, 13.47it/s]


Problematic structures for Cu: 220 which is 14.35%


100%|██████████| 580/580 [00:55<00:00, 10.41it/s]


Problematic structures for Cr: 108 which is 18.62%


100%|██████████| 863/863 [00:50<00:00, 17.11it/s] 


Problematic structures for V: 96 which is 11.12%


 72%|███████▏  | 911/1272 [01:11<00:06, 52.72it/s]spglib: ssm_get_exact_positions failed.
spglib: get_bravais_exact_positions_and_lattice failed.
spglib: ssm_get_exact_positions failed.
spglib: get_bravais_exact_positions_and_lattice failed.
100%|██████████| 1272/1272 [01:21<00:00, 15.59it/s]


Problematic structures for Fe: 263 which is 20.68%


100%|██████████| 998/998 [00:50<00:00, 19.66it/s]


Problematic structures for Ni: 340 which is 34.07%


100%|██████████| 1065/1065 [01:43<00:00, 10.24it/s]

Problematic structures for Co: 290 which is 27.23%





In [190]:
def save_data(data_dict_total, filename):
	with open(filename, 'wb') as f:
		pickle.dump(data_dict_total, f)

In [191]:
save_data(data_dict_total, f'data_{"-".join(element_list)}.pickle')