In [256]:
import pandas as pd
import re
import sympy as sm

# Import data about elements
interests = ['Symbol', 'Element', 'AtomicNumber', 'AtomicMass']
elements = pd.read_csv('periodic_table.csv')[interests]

def gcd(a, b):
    if (b == 0):
        return a
    else:
        return gcd(b, a % b)


def lcm(a, b): return (a*b)//gcd(a, b)


def listlcm(L):
    res = L[0]
    for n in L[1::]:
        res = lcm(n, res)
    return res


def hyperliteralize(compound):
    return

def literalize(eqn,hyper=False): 
    result = eqn
    # First, convert all parenthesis multiples into element forms. For example, (CH2)6 becomes C6H12.
    if hyper: step = re.findall("\(([A-Za-z0-9]+)\)([0-9]+)",eqn) # If performing hyperliteralization - still split groups regardless of the ~ tag
    else: step = re.findall("\(((?![~])[A-Za-z0-9]+)\)([0-9]+)",eqn) # Otherwise, maintain groupings with the ~ tag (such as polyatomic ions that must remain intact throughout the reaction process)
    repls = []
    for match in step:
        rep = ""
        split1 = re.findall("[A-Z][a-z]?[0-9]*",match[0])
        for elem in split1:
            symb = re.findall("[A-Z][a-z]?", elem)[0]
            count = re.findall("\d+", elem)
            if len(count) < 1:
                count.append("1")
            num = int(count[0]) * int(match[1])
            rep += symb + str(num)
            result = re.sub(f"\(~*({match[0]})\)({match[1]})",f"{rep}",result)
    """
    if hyper:
        # If performing hyperliteralization- multiply element forms by compound coefficients. For example, 4H2O becomes H8O4.
        step2 = re.findall("(^|[+=])([0-9]+)([A-Za-z0-9]+)",result)
        for match in step2:
            rep = ""
            split2 = re.findall("[A-Z][a-z]?[0-9]*",match[2])
            for elem in split2:
                symb = re.findall("[A-Z][a-z]?", elem)[0]
                count = re.findall("\d+", elem)
                if len(count) < 1:
                    count.append("1")
                num = int(count[0]) * int(match[1])
                rep += symb + str(num)
            result = re.sub(f"({match[1]})({match[2]})",f"{rep}",result)
    """
    return result

def balance(eqn,lit=True,star=False,unk=False,el=False,ones=False):
    if lit: eqn = literalize(eqn)
    lhs, rhs = re.split("=", eqn)
    lhl = re.split("[+]", lhs)
    rhl = re.split("[+]", rhs)
    elems = []
    for i in range(len(lhl)):
        lhl[i] = re.findall("[A-Z][a-z]?[0-9]*|\(~\w+\)[0-9]*", lhl[i])
        for j in range(len(lhl[i])):
            symb = re.findall("[A-Z][a-z]?|\(~\w+\)", lhl[i][j])[0]
            if not symb in elems:
                elems.append(symb)
            count = re.findall("[0-9]+", lhl[i][j])
            if len(count) < 1:
                count.append("1")
            lhl[i][j] = [symb, int(count[0])]
    for i in range(len(rhl)):
        rhl[i] = re.findall("[A-Z][a-z]?[0-9]*|\(~\w+\)[0-9]*", rhl[i])
        for j in range(len(rhl[i])):
            symb = re.findall("[A-Z][a-z]?|\(~\w+\)", rhl[i][j])[0]
            if not symb in elems:
                elems.append(symb)
            count = re.findall("[0-9]+", rhl[i][j])
            if len(count) < 1:
                count.append("1")
            rhl[i][j] = [symb, int(count[0])]
    if el: print(elems)
    matrix = []
    for i in range(len(elems)):
        current = elems[i]
        inmat = []
        for j in range(len(lhl)):
            entered = False
            for k in range(len(lhl[j])):
                if lhl[j][k][0] == current:
                    if entered: inmat[-1]+=lhl[j][k][1]
                    else: 
                        inmat.append(lhl[j][k][1])
                        entered = True
            if not entered:
                inmat.append(0)
        for j in range(len(rhl)):
            entered = False
            for k in range(len(rhl[j])):
                if rhl[j][k][0] == current:
                    if entered: inmat[-1]-=rhl[j][k][1]
                    else: 
                        inmat.append(-rhl[j][k][1])
                        entered = True
            if not entered:
                inmat.append(0)
        matrix.append(inmat)
    reduced = (sm.Matrix(matrix)).rref()
    endcol = reduced[0].col(-1)
    denoms = []
    for i in range(len(endcol)):
        denoms.append(endcol[i].as_numer_denom()[1])
    factor = listlcm(denoms)
    unknowns = []
    for i in range(len(lhl)+len(rhl)-1):
        unknowns.append(-factor*endcol[i])
    unknowns.append(factor)
    if unk: print(unknowns)
    outstr = ""
    for i in range(len(lhl)):
        if ((unknowns[i] != 1) or (ones == True)):
            outstr += str(unknowns[i])
            if star: outstr += "*"
        for j in range(len(lhl[i])):
            outstr += lhl[i][j][0]
            if lhl[i][j][1] > 1:
                outstr += str(lhl[i][j][1])
        if i == len(lhl) - 1:
            outstr += "="
        else:
            outstr += "+"
    for i in range(len(rhl)):
        if ((unknowns[len(lhl)+i] != 1) or (ones == True)):
            outstr += str(unknowns[len(lhl)+i])
            if star: outstr += "*"
        for j in range(len(rhl[i])):
            outstr += rhl[i][j][0]
            if rhl[i][j][1] > 1:
                outstr += str(rhl[i][j][1])
        if not i == len(rhl) - 1:
            outstr += "+"
    return outstr

def details(eqn): # Input a balanced equation, and it will be hyperliteralized
    print("Details on reaction: " + eqn)
    eqn = literalize(eqn,True) # Hyperliteralize the equation to obtain base forms
    lhs, rhs = re.split("=", eqn)
    lhl = re.split("[+]", lhs)
    rhl = re.split("[+]", rhs)
    for i in range(len(lhl)): # Look at each compound on the LHS
        pass
    """
    fl = re.split("[+=]", eqn)
    elems = [[],[]]
    for i in range(len(fl)):
        found = re.findall("[A-Z][a-z]?[0-9]*|\(~\w+\)[0-9]*",fl[i])
        for j in range(len(found)):
            symb = re.findall("[A-Z][a-z]?|\(~\w+\)", found[j])[0]
            count = re.findall("[0-9]+",found[j])
            if len(count) < 1:
                count.append("1")
            if symb in elems[0]:
                elems[1][elems[0].index(symb)] += int(count[0])
            else:
                elems[0].append(symb)
                elems[1].append(int(count[0]))
    print(elems)
    """
    return
    
photosynthesis = balance("H2O+CO2=C6H12O6+O2")
print(photosynthesis)
print(balance("FeCl3+3NaOH=FeO3H3+NaCl"))
print(balance("CH4+O2=CO2+H2O"))
print(balance("P4O10+H2O=H3PO4"))
print(balance("SiCl4+H2O=H4SiO4+HCl"))
print(balance("Na2CO3+HCl=NaCl+H2O+CO2"))
print(balance("Ca3P2O8+SiO2=P4O10+CaSiO3"))

details(photosynthesis)

#print(balance("C2952H4664N812O832S8Fe4+Na2C4H3O4SAu+Fe(SCN)2+Fe(NH4)2(SO4)2(H2O)6+C4H8Cl2S+C8H12MgN2O8=C55H77MgN4+Na399Fe100(CN)600+Au987S1000C6000H11000O5000+HClO4+H2S",True,True,True,True))

6H2O+6CO2=C6H12O6+6O2
FeCl3+3NaOH=FeO3H3+3NaCl
CH4+2O2=CO2+2H2O
P4O10+6H2O=4H3PO4
SiCl4+4H2O=H4SiO4+4HCl
Na2CO3+2HCl=2NaCl+H2O+CO2
2Ca3P2O8+6SiO2=P4O10+6CaSiO3
Details on reaction: 6H2O+6CO2=C6H12O6+6O2
