Notebook adapted from: https://github.com/WMD-group/SMACT/blob/master/docs/tutorials/filtering_icsd_oxidation_states.ipynb

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/WMD-group/SMACT/blob/master/docs/tutorials/filtering_icsd_oxidation_states.ipynb)

In [1]:
import json
import math
import re
from collections import defaultdict
from itertools import combinations_with_replacement, product
from pathlib import Path

import numpy as np
import pandas as pd
import smact
from pymatgen.core import Composition, Element, Species
from smact.utils.oxidation import ICSD24OxStatesFilter

from lemat_genbench.utils.oxidation_state import (
    build_oxi_dict,
    build_oxi_dict_probs,
    build_oxi_state_map,
    build_sorted_oxi_dict,
)

Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
W0925 15:17:51.679000 11156 torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.
--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\samue\lemat-genbench\.venv\Lib\site-packages\material_hasher\similarity\__init__.py", line 13, in <module>
    from .eqv2 import EquiformerV2Similarity
  File "C:\Users\samue\lemat-genbench\.venv\Lib\site-packages\material_hasher\similarity\eqv2.py", line 12, in <module>
    from fairchem.core import OCPCalculator
ImportError: cannot import name 'OCPCalculator' from 'fairchem.core' (C:\Users\samue\lemat-genbench\.venv\Lib\site-packages\fairchem\core\__init__.py)

During handling of the above exception, another exception occurred:

Traceback (most recent cal

In [2]:
from smact.screening import smact_filter, smact_validity

In [3]:
# Initialise the oxidation state filter
ox_filter = ICSD24OxStatesFilter()

In [4]:
# Return the dataframe with non-zero results
test = ox_filter.get_species_occurrences_df(sort_by_occurrences=False)

In [5]:
test["species_proportion_fraction"] = test["species_proportion (%)"]/100

In [6]:
test_subcols = test[["species", "species_proportion_fraction"]]

In [7]:
icsd_dict = test_subcols.set_index("species")["species_proportion_fraction"].to_dict()

In [8]:
with open("icsd_oxi_dict_probs.json", "w") as f:
    json.dump(icsd_dict, f, indent=4)

In [9]:
pattern = re.compile(r"([A-Za-z]+)(\d*)([+-])")

In [10]:
oxi_state_mapping = defaultdict(list)

In [11]:
for species in icsd_dict.keys():
    match = pattern.fullmatch(species)
    element, number, sign = match.groups()
    try:
        number = int(number)
    except ValueError:
        number = 1 
    if sign == "+":
        charge = number
    else:
        charge = -number
    oxi_state_mapping[element].append(charge)

In [12]:
with open("icsd_oxi_state_mapping.json", "w") as f:
    json.dump(oxi_state_mapping, f, indent=4)

In [13]:
with open("oxi_state_mapping.json", "rb") as f:
    lemat_oxi_state_mapping = json.load(f)

In [14]:
with open("oxi_dict_probs.json", "rb") as f:
    lemat_oxi_dict_probs = json.load(f)

In [15]:
for key in oxi_state_mapping:
    if key in lemat_oxi_state_mapping.keys():
        pass
    else:
        lemat_oxi_state_mapping[key] = oxi_state_mapping[key]
        for charge in oxi_state_mapping[key]:
            new_key = str(key)
            if charge > 0:
                if charge > 1:
                    new_key += str(charge)
                new_key += "+"
            if charge < 0:
                if charge < -1: 
                    new_key += str(np.abs(charge))
                new_key += "-"
            print(new_key)
            lemat_oxi_dict_probs[new_key] = icsd_dict[new_key]

Kr2+
Tc+
Tc2+
Tc3+
Tc4+
Tc5+
Tc6+
Tc7+
Xe2+
Xe4+
Xe6+
Xe8+
Pm3+
Os+
Os2+
Os3+
Os4+
Os5+
Os6+
Os7+
Os8+
Pt2-
Pt+
Pt2+
Pt3+
Pt4+
Pt5+
Pt6+
Au-
Au+
Au2+
Au3+
Au5+
Po4+
Ra2+
Ac3+
Pa3+
Pa4+
Pa5+
Np2+
Np3+
Np4+
Np5+
Np6+
Np7+
Pu2+
Pu3+
Pu4+
Pu5+
Pu6+
Pu7+
Am2+
Am3+
Am4+
Am5+
Am6+
Cm3+
Cm4+
Bk3+
Bk4+
Cf3+
Es3+


In [16]:
# with open("lemat_icsd_oxi_state_mapping.json", "w") as f:
#     json.dump(lemat_oxi_state_mapping, f, indent=4)

In [None]:
# with open("lemat_icsd_oxi_state_mapping.json", "w") as f:
#     json.dump(lemat_oxi_state_mapping, f, indent=4)

In [24]:
def test_matching(oxi_state_mapping, lemat_oxi_dict_probs):
    
    with open("lemat_icsd_oxi_state_mapping.json", "rb") as f:
        reference_oxi_state_mapping = json.load(f) 
    with open("lemat_icsd_oxi_dict_probs.json", "rb") as f:
        reference_oxi_dict_probs = json.load(f) 

    if reference_oxi_state_mapping == oxi_state_mapping: 
        pass
    else: 
        raise ValueError 

    if reference_oxi_dict_probs == lemat_oxi_dict_probs:
        pass
    else:
        raise ValueError 

    return "oxidation state dictionaries and probabilities match!"

In [25]:
test_matching(lemat_oxi_state_mapping, lemat_oxi_dict_probs)

'oxidation state dictionaries and probabilities match!'