In [1]:
from approx.approximate import * 

In [2]:
# Create instance
approx = ApprOXimate(verbose=False)

# Test with your example
formula = "NaCu0.5Mn0.5O2"

# String output (original)
result_string = approx.charge_balance(formula, return_format='string')
print("String result:", result_string)

String result: Na:1:1.0;O:-2:2.0;Cu:2:0.5;Mn:4:0.5;FinalChargeBalance:0.0


In [3]:
# Dictionary output - now with proper SRP values
result_dict = approx.charge_balance(formula, return_format='dict')
print("\nDict result:")
for element, data in result_dict['elements'].items():
    if 'states' in data:  # Element with multiple oxidation states
        print(f"{element}:")
        for state in data['states']:
            print(f"  Oxidation state {state['oxidation_state']}: quantity={state['quantity']}, SRP={state['srp']}")
    else:  # Single oxidation state
        print(f"{element}: oxidation_state={data['oxidation_state']}, quantity={data['quantity']}, SRP={data['srp']}")


Dict result:
Na: oxidation_state=1, quantity=1.0, SRP=None
O: oxidation_state=-2, quantity=2.0, SRP=None
Cu: oxidation_state=2, quantity=0.5, SRP=0.339
Mn:
  Oxidation state 4: quantity=0.5, SRP=0.98


In [4]:
# Object output - now with proper SRP values
result_obj = approx.charge_balance(formula, return_format='object')
print("\nObject result:")
for element_state in result_obj.elements:
    print(f"{element_state.element}: {element_state.oxidation_state}, quantity={element_state.quantity}, SRP={element_state.srp}")


Object result:
Na: 1, quantity=1.0, SRP=None
O: -2, quantity=2.0, SRP=None
Cu: 2, quantity=0.5, SRP=0.339
Mn: 4, quantity=0.5, SRP=0.98


In [5]:
# DataFrame output - now with proper SRP values
result_df = approx.charge_balance(formula, return_format='dataframe')

result_df

Unnamed: 0,Element,Oxidation_State,Quantity,Is_Fixed,SRP
0,Na,1,1.0,True,
1,O,-2,2.0,True,
2,Cu,2,0.5,False,0.339
3,Mn,4,0.5,False,0.98


In [6]:
from approx.approximate import ApprOXimate
from approx.feature_engineering import MaterialFeatureExtractor
from mendeleev.fetch import fetch_table

approx = ApprOXimate()
ptable = fetch_table("elements")

extractor = MaterialFeatureExtractor(approx, ptable)
extractor.get_features("Li2MnO3")

{'formula': 'Li2MnO3',
 'all_valence_s_sum': np.float64(6.0),
 'all_valence_s_avg': np.float64(1.0),
 'all_valence_s_dev': np.float64(1.0),
 'all_valence_s_min': np.float64(0.0),
 'all_valence_s_max': np.float64(2.0),
 'all_valence_s_range': np.float64(2.0),
 'all_valence_s_mode': np.float64(0.0),
 'all_unfilled_valence_s_sum': np.float64(6.0),
 'all_unfilled_valence_s_avg': np.float64(1.0),
 'all_unfilled_valence_s_dev': np.float64(1.0),
 'all_unfilled_valence_s_min': np.float64(0.0),
 'all_unfilled_valence_s_max': np.float64(2.0),
 'all_unfilled_valence_s_range': np.float64(2.0),
 'all_unfilled_valence_s_mode': np.float64(2.0),
 'all_valence_p_sum': np.float64(12.0),
 'all_valence_p_avg': np.float64(2.0),
 'all_valence_p_dev': np.float64(2.0),
 'all_valence_p_min': np.float64(0.0),
 'all_valence_p_max': np.float64(4.0),
 'all_valence_p_range': np.float64(4.0),
 'all_valence_p_mode': np.float64(0.0),
 'all_unfilled_valence_p_sum': np.float64(24.0),
 'all_unfilled_valence_p_avg': np.fl

In [7]:
from mendeleev import element

formula = "NaFe0.5Co0.5O2"

approx = ApprOXimate()

formula_dict = approx.parse_formula(formula)

mass = 0

for atom in formula_dict.keys():
    weighted_mass = element(atom).mass * formula_dict[atom]
    mass += weighted_mass
    
print(mass)

112.37686628000002


In [8]:
faradays_constant = 96485
n = 0.5
q = (n*faradays_constant)/(3.6*mass)

print(q)

119.24780328946936


In [9]:
faradays_constant = 96485
n = 1
q = (n*faradays_constant)/(3.6*mass)

print(q)

238.49560657893872


Change n based on the smallest number where charge balance is 0 to give you the theoretical mass based on charge balance

In [10]:
import pandas as pd
from approx.approximate import * 

icsd_df = pd.read_csv("ICSD_CrystStruc_DATA_cleaned.csv")

icsd_df

Unnamed: 0,HMS,StructuredFormula,Temperature,Pressure,crystal_system
0,P 4/m m m,Nd1Ba1Mn1Fe1O5.45,293.0,0.101325,tetragonal
1,P 4/m m m,Nd1Ba1Mn1Fe1O5.17,293.0,0.101325,tetragonal
2,P 4/m m m,Nd1Ba1Mn1Fe1O5.09,293.0,0.101325,tetragonal
3,I 4/m m m,Sr2Mn2.275Cr0.725As2O2,300.0,0.101325,tetragonal
4,P 42 m c,Ca1Mn1Ti1.8V0.2O6,293.0,0.101325,tetragonal
...,...,...,...,...,...
2296,F d -3 Z,Mn21.5Rb49Al92Si100O384,294.0,0.101325,cubic
2297,F 4 3 2,Ca6.3Mn3Ga4.4Al1.3O18,293.0,0.101325,cubic
2298,F -4 3 m,Li1In1Cr3.8Mn0.2O8,293.0,0.101325,cubic
2299,F -4 3 m,Li1In1Cr3.6Mn0.4O8,293.0,0.101325,cubic


In [11]:
from mendeleev.fetch import fetch_table

approx = ApprOXimate()
# ptable = fetch_table("elements")

# extractor = MaterialFeatureExtractor(approx, ptable)

for formula in icsd_df['StructuredFormula'].tolist():
    try:
        formula_dict = approx.charge_balance(formula, return_format='string')
    except:
        print(formula)

Ca8Mn10.64Si12.32O56H18
Mn2H46O101Si2W24
La6H113O132Mn2V25
Ce6H109O130Mn2V25
Pr6H113O132Mn2V25
Mn6H88O110W19Zn3
H76Mn5.5O102W18.5Sb2
Na10Mn5H148O156W24
Na12Mn1Nb12O88H100
K2.16Mn16Si26.9O75.8H8
K3.68Mn15.904Si25.472O80.4H23.424
Na12Mn1Nb12O90H104
Mn16Si12As3O57H17
Mn28Cs36Al92Si100O384
Mn28.5Si135Al57O411.2H54.4
Mn21.5Rb49Al92Si100O384


In [12]:
def charge_balance_ok(formula):
    try:
        approx.charge_balance(formula, return_format='string')
        return True
    except Exception:
        return False

mask = icsd_df['StructuredFormula'].apply(charge_balance_ok)
icsd_df = icsd_df[mask].reset_index(drop=True)

In [13]:
from tqdm import tqdm

def featurize_df(df, extractor, formula_col="Compound"):
    feature_rows = []
    valid_indices = []

    for idx, f in tqdm(df[formula_col].items(), desc="Feat compounds"):
        try:
            feats = extractor.get_features(f)
            feature_rows.append(feats)
            valid_indices.append(idx)
        except Exception as e:
            print("Formula Fail:", f)

    feat_df = pd.DataFrame(feature_rows, index=valid_indices)

    # Keep only rows that featurized successfully
    df_valid = df.loc[valid_indices]

    return df_valid.join(feat_df)

# Initialize modules
approx = ApprOXimate()
ptable = fetch_table("elements")
extractor = MaterialFeatureExtractor(approx, ptable, mode="all")

# Run featurization
df_feat = featurize_df(icsd_df, extractor, formula_col="StructuredFormula")
df_feat.head()

Feat compounds: 920it [13:29,  1.12it/s]

Formula Fail: Mn1Si1F6D12O6


Feat compounds: 1584it [23:30,  1.54it/s]

Formula Fail: Na0.6Mn2O5.5D2H1


Feat compounds: 2285it [33:27,  1.14it/s]


Unnamed: 0,HMS,StructuredFormula,Temperature,Pressure,crystal_system,formula,all_valence_s_sum,all_valence_s_avg,all_valence_s_dev,all_valence_s_min,...,all_gordy_en_max,all_gordy_en_range,all_gordy_en_mode,all_mb_en_sum,all_mb_en_avg,all_mb_en_dev,all_mb_en_min,all_mb_en_max,all_mb_en_range,all_mb_en_mode
0,P 4/m m m,Nd1Ba1Mn1Fe1O5.45,293.0,0.101325,tetragonal,Nd1Ba1Mn1Fe1O5.45,10.9,1.153439,0.988158,0.0,...,0.465308,0.424263,0.325615,0.374233,0.039601,0.021076,0.0,0.063694,0.063694,0.0
1,P 4/m m m,Nd1Ba1Mn1Fe1O5.17,293.0,0.101325,tetragonal,Nd1Ba1Mn1Fe1O5.17,10.34,1.12759,0.991827,0.0,...,0.465308,0.424263,0.325615,0.36123,0.039393,0.021368,0.0,0.063694,0.063694,0.0
2,P 4/m m m,Nd1Ba1Mn1Fe1O5.09,293.0,0.101325,tetragonal,Nd1Ba1Mn1Fe1O5.09,10.18,1.119912,0.992785,0.0,...,0.465308,0.424263,0.325615,0.357515,0.039331,0.021454,0.0,0.063694,0.063694,0.0
3,I 4/m m m,Sr2Mn2.275Cr0.725As2O2,300.0,0.101325,tetragonal,Sr2Mn2.275Cr0.725As2O2,8.0,0.380952,0.785353,0.0,...,0.378548,0.337504,0.241179,1.087329,0.051778,0.039568,0.0,0.166667,0.166667,0.0
4,P 42 m c,Ca1Mn1Ti1.8V0.2O6,293.0,0.101325,tetragonal,Ca1Mn1Ti1.8V0.2O6,12.0,1.2,0.979796,0.0,...,29.850746,29.809702,0.136209,3.014886,0.301489,1.055243,0.0,4.477612,4.477612,0.0


In [14]:
df_feat.to_csv('ICSD_featurised_data.csv', index=False)