In [1]:
import xml.etree.ElementTree as ET
import yaml

def save_yaml(filename, unimod):
    with open(filename, "w") as file:
        yaml.dump(unimod, file)

xml = ET.parse('unimod.xml')
root = xml.getroot()


def get_composition(node):
    composition = ""
    for elem in node.findall(f'{xmlns}element'):
        composition += elem.attrib['symbol']+'('+elem.attrib['number']+')'
    return composition

xmlns = '{http://www.unimod.org/xmlns/schema/unimod_2}'
unimod = {}
for modifications in root.findall(f'{xmlns}modifications'):
    for mod in modifications.findall(f'{xmlns}mod'):
        modname = mod.attrib['title']
        id = mod.attrib['record_id']
        for delta in mod.findall(f'{xmlns}delta'):
            mono_mass = delta.attrib['mono_mass']
            avge_mass = delta.attrib['avge_mass']
            composition = get_composition(delta)
            break
        for specificity in mod.findall(f'{xmlns}specificity'):
            pos = specificity.attrib['position']
            site = specificity.attrib['site']
            _class = specificity.attrib['classification']
            if site == 'N-term' or site == 'C-term':
                site = pos
            elif pos.startswith('Any ') or pos.startswith('Protein '):
                site = site + '^' + pos

            ptm_nl = 0
            ptm_nl_composition = ""
            for nl in specificity.findall(f'{xmlns}NeutralLoss'):
                if nl.attrib['mono_mass'] == '0': continue
                ptm_nl = nl.attrib['mono_mass']
                ptm_nl_composition = get_composition(nl)
                break
            mod_site = f'{modname}@{site}'
            unimod[mod_site] = {}
            unimod[mod_site]['mono_mass'] = float(mono_mass)
            unimod[mod_site]['avge_mass'] = float(avge_mass)
            unimod[mod_site]['composition'] = composition
            unimod[mod_site]['modloss'] = float(ptm_nl)
            unimod[mod_site]['modloss_composition'] = ptm_nl_composition
            unimod[mod_site]['classification'] = _class
            unimod[mod_site]['unimod_id'] = int(id)

            if '~' in site:
                print(mod_site)
                mod_site = f'{modname}@{pos}'
                unimod[mod_site] = {}
                unimod[mod_site]['mono_mass'] = float(mono_mass)
                unimod[mod_site]['avge_mass'] = float(avge_mass)
                unimod[mod_site]['composition'] = composition
                unimod[mod_site]['modloss'] = float(ptm_nl)
                unimod[mod_site]['modloss_composition'] = ptm_nl_composition
                unimod[mod_site]['classification'] = _class
                unimod[mod_site]['unimod_id'] = int(id)

save_yaml('unimod.yaml', unimod)