# Write XML from list of path and values

[see program source](https://stackoverflow.com/questions/38984272/write-xml-from-list-of-path-values)

This program convert entries (see below) in an XML structure (see below)

In [14]:
entries = [
        {'Path': 'Item/Info/Name', 'Value': 'Body HD'},
        {'Path': 'Item/Info/Synopsis', 'Value': 'A great movie'},
        {'Path': 'Item/Locales/Locale[@Country="US"][@Language="ES"]/Name', 'Value': 'El Grecco'},
        {'Path': 'Item/Genres/Genre', 'Value': 'Action'},
        {'Path': 'Item/Genres/Genre', 'Value': 'Drama'},
        {'Path': 'Item/Purchases/Purchase[@Country="US"]/HDPrice', 'Value': '10.99'},
        {'Path': 'Item/Purchases/Purchase[@Country="US"]/SDPrice', 'Value': '9.99'},
        {'Path': 'Item/Purchases/Purchase[@Country="CA"]/SDPrice', 'Value': '4.99'},
    ]

    <Item>
        <Info>
            <Name>Body HD</Name>
            <Synopsis>A great movie</Synopsis>
        </Info>
        <Locales>
            <Locale Country="US" Language="ES">
                <Name>El Grecco</Name>
            </Locale>
        </Locales>
        <Genres>
            <Genre>Action</Genre>
            <Genre>Drama</Genre>
        </Genres>
        <Purchases>
            <Purchase Country="US">
                <HDPrice>10.99</HDPrice>
                <SDPrice>9.99</SDPrice>
            </Purchase>
            <Purchase Country="CA">
                <SDPrice>4.99</SDPrice>
            </Purchase>
        </Purchases>
    </Item>

In [15]:
import re
from lxml import etree

To parse each xpath step, I use the following regular expressions

In [16]:
TAG_REGEX = r"(?P<tag>\w+)"                         # capture group named tag. It is tag name
CONDITION_REGEX = r"(?P<condition>(?:\[.*?\])*)"    # capture group named condition. It is attributes
STEP_REGEX = TAG_REGEX + CONDITION_REGEX
ATTR_REGEX = r"@(?P<key>\w+)=\"(?P<value>.*?)\""

search_step = re.compile(STEP_REGEX, flags=re.DOTALL).search
findall_attr = re.compile(ATTR_REGEX, flags=re.DOTALL).findall

def parse_step(step):
    mo = search_step(step)
    if mo:
        tag = mo.group("tag")
        condition = mo.group("condition")
    return tag, dict(findall_attr(condition))
    raise ValueError(xpath)

The **parse_step** return a tag name and a attributes dictionary. Then, I process the same way to build the XML tree:

In [17]:
from IPython.core.debugger import set_trace

In [18]:
# le noeud racine n'existe pas donc sa valeur est nulle
root = None

for entry in entries:
    # path est le pathname de la premiere colonne du tableau
    # creation du xpath_list qui est la liste des elements du pathname (separateur est le /)
    path = entry["Path"]
    parts = path.split("/")
    xpath_list = ["/" + parts[0]] + parts[1:]
    
    # initialise le noeud courant sur le noeud racine
    curr = root
    
    # xpath est un element elementaire du pathname 
    for xpath in xpath_list:
        
        # tag_name est le nom de l'element et attrs est le dictionnaire des attributs de l'element
        tag_name, attrs = parse_step(xpath)

        # si le noeud courant est vide, l'element devient la racine
        if curr is None:                 
            root = curr = etree.Element(tag_name, **attrs)               # creation du noeud racine
        else:
            nodes = curr.xpath(xpath)   # sinon rechercher l'element dans le noeud courant
            if nodes:                   # le noeud courant contient l'element
                curr = nodes[0]                     # se positionner sur l'element 
            else:                       # le noeud courant ne contient pas l'element
                                        # créer l'element comme un fils du noeud courant
                curr = etree.SubElement(curr, tag_name, **attrs) # créer l'element comme un fils du 
    
    # quand on arrive au bout du pathname on insere la valeur dans le dernier element
    if curr.text:                                        
        curr = etree.SubElement(curr.getparent(), curr.tag, **curr.attrib)
    curr.text = entry["Value"]

print(etree.tostring(root, pretty_print=True).decode())

<Item>
  <Info>
    <Name>Body HD</Name>
    <Synopsis>A great movie</Synopsis>
  </Info>
  <Locales>
    <Locale Country="US" Language="ES">
      <Name>El Grecco</Name>
    </Locale>
  </Locales>
  <Genres>
    <Genre>Action</Genre>
    <Genre>Drama</Genre>
  </Genres>
  <Purchases>
    <Purchase Country="US">
      <HDPrice>10.99</HDPrice>
      <SDPrice>9.99</SDPrice>
    </Purchase>
    <Purchase Country="CA">
      <SDPrice>4.99</SDPrice>
    </Purchase>
  </Purchases>
</Item>

