# Decoding a chemical formula

Script takes a string, which is a chemical formula, 
and converts this into the molecular weight of the molecule.

In [52]:
formula = "H2SO4"   # Define the chemical formula

In [53]:
import re       # Load Regular Expression library

# Define some atomic masses
atomic_mass = {'H':  1.01, 
               'He': 4.0026, 
               'Li': 6.941,
               'Be': 9.01218,
               'B':  10.81,
               'C':  12.011, 
               'N':  14.0067, 
               'O':  15.9994,
               'F':  18.998403,
               'Ne': 20.179,
               'Na': 22.98977,
               'Mg': 24.305,
               'Al': 26.98154,
               'Si': 28.0855,
               'P':  30.97376,
               'S':  32.06,
               'Cl': 35.453,
               'K':  39.0983,
               'Ar': 39.948,
               'Ca': 40.08,
               'Fe': 55.847}

In [54]:
# Define a regular expression that seaches fo names of elements 
# First letter capital and second letter lower case if it exists
element = re.compile('[A-Z]{1}[a-z]{0,1}')

# Extract the names of elements from the formula
elements = [m.group() for m in re.finditer(element,formula)]

# Find the atomic mass for each element in the formula
mass = [atomic_mass[e] for e in elements]    
    
    
# Split the formula using element names as separators
# This will extract the number of atoms of each element
nAtom = re.split(element, formula)
nAtom.remove('')         # Remove first element

# Replace blanks with 1
for i in range(len(nAtom)):
    if nAtom[i]=='':
        nAtom[i] = '1'

In [55]:
# Calculate the molecular mass of the molecule

# Molecular mass of each component
mol_comp = [mass[e] * float(nAtom[e]) for e in range(len(elements))]    
 
# Add them up to find total molecular mass    
mol_mass =  sum(mol_comp)  

# Print the result
print('{formula} has a molecular mass of {mass:7.2f} g'.format(formula=formula, mass=mol_mass))

H2SO4 has a molecular mass of   98.08 g
