In [1]:
#export
"""All tools related to xml file format. Expected to use behind the "kxml"
module name, like this::

    from k1lib.imports import *
    cat("abc.xml") | kxml.node() | kxml.display()
"""
from k1lib import cli; from typing import Iterator
import xml.etree.ElementTree as ET; import copy, xml, k1lib
from typing import List
__all__ = ["node", "maxDepth", "tags", "pretty", "display"]

In [2]:
s = """
<EXPERIMENT_PACKAGE_SET>
  <EXPERIMENT_PACKAGE>
    <EXPERIMENT_PACKAGE/>
    <Pool/>
    <RUN_SET/>
  </EXPERIMENT_PACKAGE>
  <EXPERIMENT_PACKAGE>
    <Pool/>
    <RUN_SET/>
  </EXPERIMENT_PACKAGE>
</EXPERIMENT_PACKAGE_SET>"""

In [32]:
#export
class node(cli.BaseCli):
    """Turns lines into a single
node. Example::

    s = \"\"\"
    <html>
        <head>
            <style></style>
        </head>
        <body>
            <div></div>
        </body>
    </html>\"\"\"
    # returns root node
    s | kxml.node()
    # same thing as above, demonstrating you can pipe in list of strings
    s.split(\"\\n\") | kxml.node()
"""
    def __ror__(self, it:Iterator[str]) -> ET.Element:
        return ET.fromstring("".join(it))

In [4]:
assert isinstance(s | node(), xml.etree.ElementTree.Element)

In [5]:
#export
def _maxDepth(node, maxDepth:int, depth:int=0):
    if depth >= maxDepth:
        while len(node) > 0: del node[0]
    for n in node: _maxDepth(n, maxDepth, depth+1)
    return node
class maxDepth(cli.BaseCli):
    def __init__(self, depth:int=None, copy:bool=True):
        """Filters out too deep nodes.
Example::

    # returns root node, but prunes children deeper than the specified depth
    s | kxml.node() | kxml.maxDepth()

:param depth: max depth to include in
:param copy: whether to limit the nodes itself, or limit a copy"""
        self.depth = depth if depth != None else float("inf")
        self.copy = copy
    def __ror__(self, node:ET.Element) -> ET.Element:
        if self.copy: node = copy.deepcopy(node)
        return _maxDepth(node, self.depth)

In [6]:
#export
def _tags(node, tag:str, nested):
    if node.tag == tag: yield node
    if node.tag != tag or nested:
        for n in node: yield from _tags(n, tag, nested)
class tags(cli.BaseCli):
    def __init__(self, *tags:List[str], nested=False):
        """Finds all tags that have a particular name.. Example::

    s = \"\"\"
    <EXPERIMENT_PACKAGE_SET>
      <EXPERIMENT_PACKAGE>
        <EXPERIMENT_PACKAGE/>
        <Pool/>
        <RUN_SET/>
      </EXPERIMENT_PACKAGE>
      <EXPERIMENT_PACKAGE>
        <Pool/>
        <RUN_SET/>
      </EXPERIMENT_PACKAGE>
    </EXPERIMENT_PACKAGE_SET>\"\"\"

    # returns a list of "Pool" tags (with 2 elements) that are 2 levels deep
    s | kxml.node() | kxml.tags("Pool") | toList()
    # returns list with 2 tags
    s | kxml.node() | kxml.tags("EXPERIMENT_PACKAGE")
    # returns list with 3 tags
    s | kxml.node() | kxml.tags("EXPERIMENT_PACKAGE", nested=True)

:param nested: whether to search for "div" tag inside of another "div" tag"""
        self.tags = tags; self.nested = nested
    def __ror__(self, node:ET.Element) -> Iterator[ET.Element]:
        return [_tags(node, tag, self.nested) for tag in self.tags] | cli.joinStreams()

In [7]:
assert len(s | node() | tags("Pool") | cli.toList()) == 2
assert len(s | node() | tags("EXPERIMENT_PACKAGE") | cli.toList()) == 2
assert len(s | node() | tags("EXPERIMENT_PACKAGE", nested=True) | cli.toList()) == 3

In [8]:
#export
def _pretty(node, depth:int=0, indents=[]):
    attr = "".join([f" {k}=\"{v}\"" for k, v in node.attrib.items()])
    text = (node.text or "").strip("\t \n\r")
    if len(node) == 0:
        if text == "": yield indents[depth] + f"<{node.tag}{attr}/>"
        else: yield indents[depth] + f"<{node.tag}{attr}>{text}</{node.tag}>"
    else:
        yield indents[depth] + f"<{node.tag}{attr}>"
        for n in node: yield from _pretty(n, depth+1, indents)
        yield indents[depth] + f"</{node.tag}>"
class pretty(cli.BaseCli):
    def __init__(self, indent:str=None):
        """Converts the element into a list of xml strings, and make them pretty.
Example::

    # prints out the element
    s | kxml.node() | kxml.pretty() | stdout()"""
        self.indent = cli.init.patchDefaultIndent(indent)
    def __ror__(self, it:ET.Element) -> Iterator[str]:
        indents = [i*self.indent for i in range(100)]
        return _pretty(it, indents=indents) | cli.filt(cli.op().strip() != "")

In [9]:
assert s | node() | pretty() | cli.deref() | cli.shape(0) > 0

In [10]:
#export
class display(cli.BaseCli):
    def __init__(self, depth:int=3, lines:int=20):
        """Convenience method for getting head, make it pretty and print it out.
Example::

    # prints out the element
    s | kxml.node() | kxml.display()

:param depth: prune tags deeper than the specified depth. Put "None" to not prune at all
:param lines: max number of lines to print out. Put "None" if you want to display everything"""
        self.depth = depth; self.lines = lines
    def __ror__(self, it:ET.Element, lines=10):
        if self.depth is not None: it = it | maxDepth(self.depth)
        it | pretty() | cli.head(self.lines) | cli.stdout()

In [11]:
with k1lib.captureStdout() as out: s | node() | display()
assert out() | cli.shape(0) == 12

In [33]:
!../../export.py cli/kxml

Current dir: /home/kelvin/repos/labs/k1lib, ../../export.py
rm: cannot remove '__pycache__': No such file or directory
Found existing installation: k1lib 0.17
Uninstalling k1lib-0.17:
  Successfully uninstalled k1lib-0.17
running install
running bdist_egg
running egg_info
creating k1lib.egg-info
writing k1lib.egg-info/PKG-INFO
writing dependency_links to k1lib.egg-info/dependency_links.txt
writing requirements to k1lib.egg-info/requires.txt
writing top-level names to k1lib.egg-info/top_level.txt
writing manifest file 'k1lib.egg-info/SOURCES.txt'
reading manifest file 'k1lib.egg-info/SOURCES.txt'
adding license file 'LICENSE'
writing manifest file 'k1lib.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build
creating build/lib
creating build/lib/k1lib
copying k1lib/_learner.py -> build/lib/k1lib
copying k1lib/fmt.py -> build/lib/k1lib
copying k1lib/_context.py -> build/lib/k1lib
copying k1lib/selector.py -> build