In [2]:
from mp_api.client import MPRester
from emmet.core.xas import Edge, Type
from tqdm import tqdm
from pymatgen.analysis.bond_valence import BVAnalyzer as BVA
from pymatgen.core import Element
import pickle

## Extract Data from MP-API


In [None]:
def get_XAS(edge_type,element):
	with MPRester(api_key='u1TjwfwfTnpF8IolXF9PBY9RT9YauL84',use_document_model=False,) as mpr:
		docs = mpr.materials.xas.search(edge=edge_type,
										spectrum_type=Type.XANES,
										absorbing_element=element,
										fields=["material_id", "xas_id", "spectrum", "structure"])
	return docs

In [None]:
def get_L2_3(element):
	docs_L23 = get_XAS(Edge.L2_3,element)
	return docs_L23

In [None]:
def process_docs(docs):
	data_dict = {}
	for doc in tqdm(docs, desc="Processing Docs"):
		spectrum = doc['spectrum']
		# print(spectrum.x)
		energy = spectrum.x
		intensity = spectrum.y
		structure = spectrum.structure
		mp_id = doc['material_id']
		if mp_id not in data_dict:
			data_dict[mp_id] = {
				'energy': energy,
				'intensity': intensity,
				'structure': structure
			}

	return data_dict

In [None]:
def extract_data_element_list(element_list):
	data_dict_total = dict(zip(element_list,[{}]*len(element_list)))
	for element in element_list:
		print("Processing Element: ", element)
		docs = get_L2_3(element)
		data_dict = process_docs(docs)
		data_dict_total[element] = data_dict

	return data_dict_total

In [None]:
# Some tools for easy access
def get_keys(data_dict):
	return list(data_dict.keys())

In [None]:
def get_oxidation_state(structure, element):
	oxi_states = BVA().get_oxi_state_decorated_structure(structure).species
	oxi_states = [i.oxi_state for i in oxi_states if i.element == Element(element)]
	return oxi_states


In [None]:
def batch_process_oxidation_states(element, structures):
	problem = []
	oxi_states = []
	for idx, structure in enumerate(tqdm(structures)):
		try:
			oxi_states.append(get_oxidation_state(structure, element))
		except ValueError:
			oxi_states.append(None)
			problem.append(idx)

	return oxi_states, problem

In [None]:
def assign_oxidation_states(data_dict_total):

	for element, data_dict in data_dict_total.items():
		mpids = get_keys(data_dict)
		structures = [data_dict[mpid]['structure'] for mpid in mpids]
		oxi_states, problem = batch_process_oxidation_states(element, structures)
		print(f"Problematic structures for {element}: {len(problem)} which is {len(problem)/len(mpids)*100:.2f}%")
		for idx, mpid in enumerate(mpids):
			if idx not in problem:
				data_dict[mpid]['oxidation_state'] = oxi_states[idx]
			else:
				data_dict[mpid]['oxidation_state'] = None

	return data_dict_total


In [None]:
element_list = ['Mn', 'Cu', 'Cr', 'V', 'Fe', 'Ni', 'Co']
data_dict_total = extract_data_element_list(element_list)

In [None]:
data_dict_total = assign_oxidation_states(data_dict_total)

In [190]:
def save_data(data_dict_total, filename):
	with open(filename, 'wb') as f:
		pickle.dump(data_dict_total, f)

In [191]:
save_data(data_dict_total, f'data_{"-".join(element_list)}.pickle')