In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('complete_periodic_table.csv')
df.head()

Unnamed: 0,atomic_number,symbol,name,atomic_mass,oxidation_states
0,1,H,Hydrogen,1.008,"[-1, +1]"
1,2,He,Helium,4.0026,[0]
2,3,Li,Lithium,6.94,[+1]
3,4,Be,Beryllium,9.0122,[+2]
4,5,B,Boron,10.81,[+3]


In [4]:
elem_list = []

while True:
  user_quest = input('enter the element symbol you want (to stop, enter "q") : ')
  if user_quest == 'q':
    break
  else:
    elem_list.append(user_quest)


elems = pd.DataFrame(elem_list)

print(elems,"this is your list of elements")

elements = df[df['symbol'].isin(elem_list)]

# Reset the index and set the 'symbol' column as the index for oxidation states
elements = elements.reset_index(drop=True)
os_of_each_elem = elements.set_index('symbol')['oxidation_states']


print(len(elements))

print(os_of_each_elem)

enter the element symbol you want (to stop, enter "q") : Mn
enter the element symbol you want (to stop, enter "q") : Mg
enter the element symbol you want (to stop, enter "q") : Fe
enter the element symbol you want (to stop, enter "q") : Cu
enter the element symbol you want (to stop, enter "q") : S
enter the element symbol you want (to stop, enter "q") : P
enter the element symbol you want (to stop, enter "q") : Co
enter the element symbol you want (to stop, enter "q") : Cr
enter the element symbol you want (to stop, enter "q") : N
enter the element symbol you want (to stop, enter "q") : Ni
enter the element symbol you want (to stop, enter "q") : q
    0
0  Mn
1  Mg
2  Fe
3  Cu
4   S
5   P
6  Co
7  Cr
8   N
9  Ni this is your list of elements
10
symbol
N     [-3, +1, +2, +3, +4, +5]
Mg                        [+2]
P                 [-3, +3, +5]
S             [-2, +2, +4, +6]
Cr                [+2, +3, +6]
Mn        [+2, +3, +4, +6, +7]
Fe                    [+2, +3]
Co                   

In [5]:
import itertools, math


In [6]:
import itertools, math, ast
from math import gcd
from functools import reduce

# --- Hill system formula builder ---
def hill_formula(formula_dict):
    parts = []
    if "C" in formula_dict:  # Carbon first
        n = formula_dict["C"]
        parts.append(f"C{n if n > 1 else ''}")
    if "H" in formula_dict:  # Hydrogen second
        n = formula_dict["H"]
        parts.append(f"H{n if n > 1 else ''}")
    for el in sorted(formula_dict):  # Rest alphabetically
        if el not in ["C", "H"]:
            n = formula_dict[el]
            parts.append(f"{el}{n if n > 1 else ''}")
    return "".join(parts)

# --- Reduce subscripts to simplest whole-number ratio ---
def simplify_counts(counts):
    g = reduce(gcd, counts)
    return [c // g for c in counts]

# --- Clean oxidation state field (handles strings like "[-2, +4]") ---
def clean_oxidation_states(oxidation_states):
    cleaned = {}
    for el, states in oxidation_states.items():
        if isinstance(states, str):
            try:
                parsed = ast.literal_eval(states)   # safely parse list
                cleaned[el] = [int(str(s).replace("+","")) for s in parsed]
            except:
                cleaned[el] = []
        else:
            cleaned[el] = [int(str(s).replace("+","")) for s in states]
    return cleaned

# --- Main compound generator ---
def generate_multi_element_compounds(user_elements, oxidation_states):
    compounds = set()
    os_lists = [oxidation_states[el] for el in user_elements]

    for charges in itertools.product(*os_lists):
        parsed_charges = []
        for c in charges:
            try:
                parsed_charges.append(int(str(c).strip()))  # keep minus sign
            except:
                parsed_charges.append(None)

        if None in parsed_charges:
            continue

        # Require at least one positive and one negative charge
        if not (any(c > 0 for c in parsed_charges) and any(c < 0 for c in parsed_charges)):
            continue

        try:
            non_zero_charges = [abs(c) for c in parsed_charges if c != 0]
            if not non_zero_charges:
                continue
            lcm_val = abs(math.lcm(*non_zero_charges))
        except ValueError:
            continue

        counts = [lcm_val // abs(c) if c != 0 else 0 for c in parsed_charges]
        if not all(counts):
            continue

        counts = simplify_counts(counts)
        formula_dict = {el: n for el, n in zip(user_elements, counts)}
        compounds.add(hill_formula(formula_dict))

    return sorted(compounds)

# --- Example Run ---
if __name__ == "__main__":
    # Example oxidation states dataset
    oxidation_states = elements.set_index('symbol')['oxidation_states']

    cleaned_states = clean_oxidation_states(oxidation_states)

    result = generate_multi_element_compounds(elem_list, cleaned_states)
    print("Compounds:", result)
    print("Total:", len(result))


Compounds: ['Co105Cr105Cu105Fe105Mg105Mn30N105Ni70P42S105', 'Co105Cr105Cu105Fe105Mg105Mn30N210Ni70P42S105', 'Co105Cr105Cu105Fe105Mg105Mn30N42Ni105P70S105', 'Co105Cr105Cu105Fe105Mg105Mn30N42Ni105P70S35', 'Co105Cr105Cu105Fe105Mg105Mn30N42Ni70P42S105', 'Co105Cr105Cu105Fe105Mg105Mn30N42Ni70P70S105', 'Co105Cr105Cu105Fe105Mg105Mn30N42Ni70P70S35', 'Co105Cr105Cu105Fe105Mg105Mn30N70Ni105P42S105', 'Co105Cr105Cu105Fe105Mg105Mn30N70Ni105P42S35', 'Co105Cr105Cu105Fe105Mg105Mn30N70Ni70P42S105', 'Co105Cr105Cu105Fe105Mg105Mn30N70Ni70P42S35', 'Co105Cr105Cu105Fe70Mg105Mn30N105Ni105P42S105', 'Co105Cr105Cu105Fe70Mg105Mn30N105Ni70P42S105', 'Co105Cr105Cu105Fe70Mg105Mn30N210Ni105P42S105', 'Co105Cr105Cu105Fe70Mg105Mn30N210Ni70P42S105', 'Co105Cr105Cu105Fe70Mg105Mn30N42Ni105P42S105', 'Co105Cr105Cu105Fe70Mg105Mn30N42Ni105P70S105', 'Co105Cr105Cu105Fe70Mg105Mn30N42Ni105P70S35', 'Co105Cr105Cu105Fe70Mg105Mn30N42Ni70P42S105', 'Co105Cr105Cu105Fe70Mg105Mn30N42Ni70P70S105', 'Co105Cr105Cu105Fe70Mg105Mn30N42Ni70P70S35', 'C