In [12]:
def molecular_mass(formula):
    total_mass = 0
    i = 0
    while i < len(formula):
        symbol = formula[i]
        i += 1

        if i < len(formula) and formula[i].islower():
            symbol += formula[i]
            i += 1  

        num_str = ""
        while i < len(formula) and formula[i].isdigit():
            num_str += formula[i]
            i += 1
        count = int(num_str) if num_str else 1

        
        total_mass += element_dict[symbol] * count

    return total_mass


In [68]:
import re
import numpy as np
from fractions import Fraction
from math import gcd
from functools import reduce
def formula_in_dict(formula:str)-> dict:
    element_pattern = r"([A-Z][a-z]*)"
    number_pattern = r"(\d*)"
    pattern = element_pattern + number_pattern
    counts = {}
    for (element, count) in re.findall(pattern, formula):
        if count == "":
            count = 1
        else:
            count = int(count)
        if element in counts:
            counts[element] += count
        else:
            counts[element] = count
    return counts


def balance_reaction(reactants: list, products: list) -> list:
    all_species = reactants + products
    composition_dicts = [formula_in_dict(f) for f in all_species]
    all_elements = set()
    for composition_dict in composition_dicts:
        for elem in composition_dict.keys():
            all_elements.add(elem)
    elements = sorted(all_elements)

    matrix = []
    for elem in elements:
        row = []
        for mol_dict in composition_dicts:
             row.append(mol_dict.get(elem, 0))
        for i in range(len(reactants), len(all_species)):
            row[i] *= -1
        matrix.append(row)

    A = np.array(matrix, dtype=float)
    u, s, vh = np.linalg.svd(A)
    vec = vh[-1]
    #tol = 1e-10
    non_zero_vals = vec[np.abs(vec) !=0]  #> tol
    min_val = np.min(np.abs(non_zero_vals)) 
    vec_scaled = vec / min_val
    coeffs_frac = [Fraction(c).limit_denominator() for c in vec_scaled]
    denominators = [f.denominator for f in coeffs_frac]
    def lcm(a,b): return abs(a*b)//gcd(a,b)
    lcm_total = reduce(lcm, denominators, 1)
    coeffs_int = [int(f * lcm_total) for f in coeffs_frac]

   

    non_zero_ints = [abs(x) for x in coeffs_int if x != 0]
    pgcd_total = reduce(gcd, non_zero_ints)
    coeffs_final = [x // pgcd_total for x in coeffs_int]

    if any(x < 0 for x in coeffs_final):
        coeffs_final = [-x for x in coeffs_final]


    return coeffs_final[:len(reactants)], coeffs_final[len(reactants):]

# Test 1 
reactants1 = ["H2", "O2"]
products1 = ["H2O"]
coeffs_reac1, coeffs_prod1 = balance_reaction(reactants1, products1)

print("Test 1: H2 + O2 → H2O")
print("Reactants:")
for mol, coef in zip(reactants1, coeffs_reac1):
    print(f"{coef} × {mol}")
print("Products:")
for mol, coef in zip(products1, coeffs_prod1):
    print(f"{coef} × {mol}")
print("-"*30)

# Test 2 
reactants2 = ["CH4", "O2"]
products2 = ["CO2", "H2O"]
coeffs_reac2, coeffs_prod2 = balance_reaction(reactants2, products2)

print("Test 2: CH4 + O2 → CO2 + H2O")
print("Reactants:")
for mol, coef in zip(reactants2, coeffs_reac2):
    print(f"{coef} × {mol}")
print("Products:")
for mol, coef in zip(products2, coeffs_prod2):
    print(f"{coef} × {mol}")
print("-"*30)

# Test 3 
reactants3 = ["CaO2H2", "H2SO4"]
products3 = ["CaSO4", "H2O"]
coeffs_reac3, coeffs_prod3 = balance_reaction(reactants3, products3)

print("Test 3: CaO2H2 + H2SO4 → CaSO4 + H2O")
print("Reactants:")
for mol, coef in zip(reactants3, coeffs_reac3):
    print(f"{coef} × {mol}")
print("Products:")
for mol, coef in zip(products3, coeffs_prod3):
    print(f"{coef} × {mol}")



#print(balance_reaction(["CaO2H2", "H2SO4"], ["CaSO4", "H2O"]))




Test 1: H2 + O2 → H2O
Reactants:
2 × H2
1 × O2
Products:
2 × H2O
------------------------------
Test 2: CH4 + O2 → CO2 + H2O
Reactants:
1 × CH4
2 × O2
Products:
1 × CO2
2 × H2O
------------------------------
Test 3: CaO2H2 + H2SO4 → CaSO4 + H2O
Reactants:
1 × CaO2H2
1 × H2SO4
Products:
1 × CaSO4
2 × H2O


## Chemical Reaction Balancing – Function Explanations

### `formula_in_dict` Function

**Purpose:**  
Convert a simple chemical formula into a dictionary `{symbol: count}`.  

**Limitations:**  
- Cannot handle formulas with parentheses (e.g., `Ca(OH)2`) or hydration dots (e.g., `CuSO4·5H2O`).  

**How it works:**  
1. `element_pattern` captures the chemical symbol by looking for an uppercase letter followed optionally by a lowercase letter (e.g., `H`, `He`, `Ca`).  
2. `number_pattern` captures the number of atoms following the element.  
3. If no number is present, it defaults to `1`.  
4. The function returns a dictionary where keys are element symbols and values are their counts.  

**Example:**  
```python
formula_in_dict("H2O")  # returns {"H": 2, "O": 1}


### `balance_reaction` Function

**Purpose:**  
Prepare the data for constructing the atom conservation matrix and calculate stoichiometric coefficients.

**How it works:**  
1. Receives two lists: one for reactants and one for products.  
2. Creates a list of all unique elements present in the reaction.  
3. Builds a matrix where:  
   - Each **row** corresponds to an element.  
   - Each **column** corresponds to a molecule.  
   - **Columns corresponding to products are negated** to reflect atom conservation.

**Example:**  
For the reaction `H2 + O2 → H2O`, the matrix `A` looks like:

H : [ 2, 0, -2] 

O : [ 0, 2, -1]

#### Mathematical Solution in `balance_reaction`

**Goal:**  
Find a non-zero vector `x` such that `A · x = 0` (stoichiometric coefficients).

**Method:**  
- Use **Singular Value Decomposition (SVD)** with `np.linalg.svd` to find a basis of solutions.  
- The vector associated with the **smallest singular value** (`vh[-1]`) gives a solution.

**Normalization:**  
1. Convert the solution to **fractions** to avoid floating-point errors.  
2. Multiply by the **least common multiple (LCM)** of denominators to get integer coefficients.  
3. Divide by the **greatest common divisor (GCD)** to simplify the coefficients.

**Return values:**  
- Separate the coefficients of **reactants** and **products**.


In [61]:

def check_mass_conservation(reactants, products, coeffs_reac, coeffs_prod):
    total_reactants = sum(
        coeffs_reac[i] * molecular_mass(reactants[i]) for i in range(len(reactants))
    )
    total_products = sum(
        coeffs_prod[i] * molecular_mass(products[i]) for i in range(len(products))
    )
    if abs(total_reactants - total_products) < 1e-10:
        print(f"✅ Mass balanced")
        return True
    else:
        print(f"❌ Mass not balanced")
        return False


# Test
reactants = ["CaO2H2", "H2SO4"]
products = ["CaSO4", "H2O"]

coeffs_reac, coeffs_prod = balance_reaction(reactants, products)
check_mass_conservation(reactants, products, coeffs_reac, coeffs_prod)



✅ Mass balanced


True

#### `check_mass_conservation` Function

**Purpose:**  
Verify whether the total mass of reactants equals the total mass of products after balancing a chemical reaction.  

**How it works:**  
1. For each reactant, the function multiplies its stoichiometric coefficient (`coeffs_reac[i]`) by its molecular mass (`molecular_mass`).
2. The same procedure is performed for the products with their coefficients (`coeffs_prod[i]`).
3. The total mass of the reactants and products is calculated by adding these values.
4. The two totals are compared:
- Instead of checking for strict equality (which can fail due to floating-point rounding errors), the function checks whether the difference is less than a small tolerance (`1e-10`).
- This ensures that tiny numerical errors do not result in a false imbalance.
5. A message is displayed if the mass is balanced and returns "True" or "False".

**Example:**  
```python
reactants = ["CaO2H2", "H2SO4"]
products = ["CaSO4", "H2O"]

coeffs_reac, coeffs_prod = balance_reaction(reactants, products)
check_mass_conservation(reactants, products, coeffs_reac, coeffs_prod)
# Output: ✅ Mass balanced
