In [None]:
#| default_exp xml

# XML

> Concise generation of XML.

In [None]:
#| export
from fastcore.utils import *

import types,json

from dataclasses import dataclass, asdict
from typing import Mapping
from functools import partial
from html import escape

In [None]:
from IPython.display import Markdown
from pprint import pprint

from fastcore.test import test_eq

## FT functions

In [None]:
#|export
def _fix_k(k): return k if k=='_' else k.lstrip('_').replace('_', '-')

In [None]:
#| export
_specials = set('@.-!~:[](){}$%^&*+=|/?<>,`')

def attrmap(o):
    if _specials & set(o): return o
    o = dict(htmlClass='class', cls='class', _class='class', klass='class',
             _for='for', fr='for', htmlFor='for').get(o, o)
    return _fix_k(o)

In [None]:
#| export
def valmap(o):
    if is_listy(o): return ' '.join(map(str,o)) if o else None
    if isinstance(o, dict): return '; '.join(f"{k}:{v}" for k,v in o.items()) if o else None
    return o

In [None]:
#| export
def _flatten_tuple(tup):
    if not any(isinstance(item, tuple) for item in tup): return tup
    result = []
    for item in tup:
        if isinstance(item, tuple): result.extend(item)
        else: result.append(item)
    return tuple(result)

In [None]:
#|export
def _preproc(c, kw, attrmap=attrmap, valmap=valmap):
    if len(c)==1 and isinstance(c[0], (types.GeneratorType, map, filter)): c = tuple(c[0])
    attrs = {attrmap(k.lower()):valmap(v) for k,v in kw.items() if v is not None}
    return _flatten_tuple(c),attrs

In [None]:
#|export
class FT:
    "A 'Fast Tag' structure, containing `tag`,`children`,and `attrs`"
    def __init__(self, tag:str, cs:tuple, attrs:dict=None, void_=False, **kwargs):
        assert isinstance(cs, tuple)
        self.tag,self.children,self.attrs,self.void_ = tag,cs,attrs,void_
        self.listeners_ = []
    
    def on(self, f): self.listeners_.append(f)
    def changed(self):
        [f(self) for f in self.listeners_]
        return self

    def __setattr__(self, k, v):
        if len(k)>1 and k.startswith('__') or k[-1]=='_' or k in ('tag','children','attrs','void_'): return super().__setattr__(k,v)
        self.attrs[_fix_k(k)] = v
        self.changed()

    def __getattr__(self, k):
        if k.startswith('__'): raise AttributeError(k)
        return self.get(k)

    @property
    def list(self): return [self.tag,self.children,self.attrs]
    def get(self, k, default=None): return self.attrs.get(_fix_k(k), default)
    
    def __repr__(self): return f'{self.tag}({self.children},{self.attrs})'
    def __iter__(self): return iter(self.children)
    def __getitem__(self, idx): return self.children[idx]
    
    def __setitem__(self, i, o):
        self.children = self.children[:i] + (o,) + self.children[i+1:]
        self.changed()

    def __call__(self, *c, **kw):
        c,kw = _preproc(c,kw)
        if c: self.children = self.children+c
        if kw: self.attrs = {**self.attrs, **kw}
        return self.changed()

    def set(self, *c, **kw):
        "Set children and/or attributes (chainable)"
        c,kw = _preproc(c,kw)
        if c: self.children = c
        if kw:
            self.attrs = {k:v for k,v in self.attrs.items() if k in ('id','name')}
            self.attrs = {**self.attrs, **kw}
        return self.changed()

In [None]:
#| export
def ft(tag:str, *c, void_:bool=False, attrmap:callable=attrmap, valmap:callable=valmap, ft_cls=FT, **kw):
    "Create an `FT` structure for `to_xml()`"
    return ft_cls(tag.lower(),*_preproc(c,kw,attrmap=attrmap, valmap=valmap), void_=void_)

In [None]:
#| export
voids = set('area base br col command embed hr img input keygen link meta param source track wbr !doctype'.split())
_g = globals()
_all_ = ['Head', 'Title', 'Meta', 'Link', 'Style', 'Body', 'Pre', 'Code',
    'Div', 'Span', 'P', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'Strong', 'Em', 'B',
    'I', 'U', 'S', 'Strike', 'Sub', 'Sup', 'Hr', 'Br', 'Img', 'A', 'Link', 'Nav',
    'Ul', 'Ol', 'Li', 'Dl', 'Dt', 'Dd', 'Table', 'Thead', 'Tbody', 'Tfoot', 'Tr',
    'Th', 'Td', 'Caption', 'Col', 'Colgroup', 'Form', 'Input', 'Textarea',
    'Button', 'Select', 'Option', 'Label', 'Fieldset', 'Legend', 'Details',
    'Summary', 'Main', 'Header', 'Footer', 'Section', 'Article', 'Aside', 'Figure',
    'Figcaption', 'Mark', 'Small', 'Iframe', 'Object', 'Embed', 'Param', 'Video',
    'Audio', 'Source', 'Canvas', 'Svg', 'Math', 'Script', 'Noscript', 'Template', 'Slot']

for o in _all_: _g[o] = partial(ft, o.lower(), void_=o.lower() in voids)

The main HTML tags are exported as `ft` partials.

Attributes are passed as keywords. Use 'klass' and 'fr' instead of 'class' and 'for', to avoid Python reserved word clashes.

In [None]:
#| export
def Html(*c, doctype=True, **kwargs)->FT:
    "An HTML tag, optionally preceeded by `!DOCTYPE HTML`"
    res = ft('html', *c, **kwargs)
    if not doctype: return res
    return (ft('!DOCTYPE', html=True, void_=True), res)

In [None]:
samp = Html(
    Head(Title('Some page')),
    Body(Div('Some text\nanother line', (Input(name="jph's"), Img(src="filename", data=1)),
             cls=['myclass', 'another'],
             style={'padding':1, 'margin':2}))
)
pprint(samp)

(!doctype((),{'html': True}),
 html((head((title(('Some page',),{}),),{}), body((div(('Some text\nanother line', input((),{'name': "jph's"}), img((),{'src': 'filename', 'data': 1})),{'class': 'myclass another', 'style': 'padding:1; margin:2'}),),{})),{}))


In [None]:
elem = P('Some text', id="myid")
print(elem.tag)
print(elem.children)
print(elem.attrs)

p
('Some text',)
{'id': 'myid'}


You can get and set attrs directly:

In [None]:
elem.id = 'newid'
print(elem.id, elem.get('id'), elem.get('foo', 'missing'))
elem

newid newid missing


p(('Some text',),{'id': 'newid'})

In [None]:
#| export
class Safe(str):
    def __html__(self): return self

## Conversion to XML/HTML

In [None]:
#| export
def _escape(s): return '' if s is None else s.__html__() if hasattr(s, '__html__') else escape(s) if isinstance(s, str) else s
def _noescape(s): return '' if s is None else s.__html__() if hasattr(s, '__html__') else s

In [None]:
#| export
def _to_attr(k,v):
    if isinstance(v,bool):
        if v==True : return str(k)
        if v==False: return ''
    if isinstance(v,str): v = escape(v, quote=False)
    elif isinstance(v, Mapping): v = json.dumps(v)
    else: v = str(v)
    qt = '"'
    if qt in v:
        qt = "'"
        if "'" in v: v = v.replace("'", "&#39;")
    return f'{k}={qt}{v}{qt}'

In [None]:
#| export
_block_tags = {'div', 'p', 'ul', 'ol', 'li', 'table', 'thead', 'tbody', 'tfoot',
               'html', 'head', 'body', 'meta', 'title', '!doctype', 'input', 'script', 'link', 'style',
               'tr', 'th', 'td', 'section', 'article', 'nav', 'aside', 'header',
               'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote'}
_inline_tags = {'a', 'span', 'b', 'i', 'u', 'em', 'strong', 'img', 'br', 'small',
                'big', 'sub', 'sup', 'label', 'input', 'select', 'option'}

def _is_whitespace_significant(elm):
    return elm.tag in {'pre', 'code', 'textarea', 'script'} or elm.get('contenteditable') == 'true'

In [None]:
#| export
def _to_xml(elm, lvl=0, indent=True, do_escape=True):
    "Convert `FT` element tree into an XML string"
    esc_fn = _escape if do_escape else _noescape
    if elm is None: return ''
    if hasattr(elm, '__ft__'): elm = elm.__ft__()
    if isinstance(elm, tuple):
        return ''.join(_to_xml(o, lvl=lvl, indent=indent, do_escape=do_escape) for o in elm)
    if isinstance(elm, bytes): return elm.decode('utf-8')
    if not isinstance(elm, FT): return f'{esc_fn(elm)}'

    tag, cs, attrs = elm.list
    is_void = getattr(elm, 'void_', False)
    is_block = tag in _block_tags
    if _is_whitespace_significant(elm): indent = False

    sp,nl = (' ' * lvl,'\n') if indent and is_block else ('','')
    nl_end = nl

    stag = tag
    if attrs:
        sattrs = ' '.join(_to_attr(k, v) for k, v in attrs.items() if v not in (False, None, '') and (k=='_' or k[-1]!='_'))
        if sattrs: stag += f' {sattrs}'

    cltag = '' if is_void else f'</{tag}>'
    stag_ = f'<{stag}>' if stag else ''

    if not cs:
        if is_void: return f'{sp}{stag_}{nl_end}'
        else: return f'{sp}{stag_}{cltag}{nl_end}'
    if len(cs) == 1 and not isinstance(cs[0], (list, tuple, FT)) and not hasattr(cs[0], '__ft__'):
        content = esc_fn(cs[0])
        return f'{sp}{stag_}{content}{cltag}{nl_end}'

    res = f'{sp}{stag_}{nl}'
    for c in cs:
        res += _to_xml(c, lvl=lvl+2 if indent else 0, indent=indent, do_escape=do_escape)
    if not is_void: res += f'{sp}{cltag}{nl_end}'
    return Safe(res)

In [None]:
#| export
def to_xml(elm, lvl=0, indent=True, do_escape=True):
    "Convert `ft` element tree into an XML string"
    return Safe(_to_xml(elm, lvl, indent, do_escape=do_escape))

FT.__html__ = to_xml

In [None]:
#| hide
test_eq(to_xml(Div("Hello")), '<div>Hello</div>\n')
test_eq(to_xml(P("Text", Class="test")), '<p class="test">Text</p>\n')
test_eq(to_xml(Div(P("Nested"))), '<div>\n  <p>Nested</p>\n</div>\n')
test_eq(to_xml(Pre("  Whitespace\n  Significant  ")), '<pre>  Whitespace\n  Significant  </pre>')
test_eq(to_xml(Img(src="image.jpg")), '<img src="image.jpg">')
test_eq(to_xml(Div("Text", contenteditable="true")), '<div contenteditable="true">Text</div>')
test_eq(to_xml(None), '')
test_eq(to_xml(("Text", P("Paragraph"))), 'Text<p>Paragraph</p>\n')
test_eq(to_xml(b"Bytes"), 'Bytes')
test_eq(to_xml(Div(P("Text"), B("Bold")), indent=False), '<div><p>Text</p><b>Bold</b></div>')
test_eq(to_xml(Div("<script>alert('XSS')</script>"), do_escape=True),
        '<div>&lt;script&gt;alert(&#x27;XSS&#x27;)&lt;/script&gt;</div>\n')
test_eq(to_xml(Div("<script>alert('XSS')</script>"), do_escape=False),
        "<div><script>alert('XSS')</script></div>\n")
test_eq(to_xml(Div(foo=False), indent=False), '<div></div>')

In [None]:
#| hide
test_eq(to_xml(B('Bold Text')), '<b>Bold Text</b>')
test_eq(to_xml(Div(P('Paragraph Text'))), '<div>\n  <p>Paragraph Text</p>\n</div>\n')
test_eq(to_xml(Pre('   Preformatted\n   Text')), '<pre>   Preformatted\n   Text</pre>')
editable_div = Div('Editable Content', contenteditable='true')
test_eq(to_xml(editable_div), '<div contenteditable="true">Editable Content</div>')
test_eq(to_xml(Div(Span('Inline Text'), P('Paragraph'))),
        '<div>\n<span>Inline Text</span>  <p>Paragraph</p>\n</div>\n')
test_eq(to_xml(Br()), '<br>')
test_eq(to_xml(P(None)), '<p></p>\n')
test_eq(to_xml(Div()), '<div></div>\n')
test_eq(to_xml(Input(type='text', disabled=True)), '<input type="text" disabled>\n')
special_attr_tag = Div(id='main"div', data_info="Some 'info'")
expected_special_attr = "<div id='main\"div' data-info=\"Some 'info'\"></div>\n"
test_eq(to_xml(special_attr_tag), expected_special_attr)

In [None]:
h = to_xml(samp, do_escape=False)
print(h)

<!doctype html>
<html>
  <head>
    <title>Some page</title>
  </head>
  <body>
    <div class="myclass another" style="padding:1; margin:2">
Some text
another line      <input name="jph's">
<img src="filename" data="1">    </div>
  </body>
</html>



In [None]:
class PageTitle:
    def __ft__(self): return H1("Hello")

class HomePage:
    def __ft__(self): return Div(PageTitle(), Div('hello'))

h = to_xml(Div(HomePage()))
expected_output = """<div>
  <div>
    <h1>Hello</h1>
    <div>hello</div>
  </div>
</div>
"""
assert h == expected_output

In [None]:
print(h)

<div>
  <div>
    <h1>Hello</h1>
    <div>hello</div>
  </div>
</div>



In [None]:
h = to_xml(samp, indent=False)
print(h)

<!doctype html><html><head><title>Some page</title></head><body><div class="myclass another" style="padding:1; margin:2">Some text
another line<input name="jph's"><img src="filename" data="1"></div></body></html>


Interoperability both directions with Django and Jinja using the [__html__() protocol](https://jinja.palletsprojects.com/en/3.1.x/templates/#jinja-filters.escape):

In [None]:
def _esc(s): return s.__html__() if hasattr(s, '__html__') else Safe(escape(s))

r = Safe('<b>Hello from Django</b>')
print(to_xml(Div(r)))
print(_esc(Div(P('Hello from fastcore <3'))))

<div><b>Hello from Django</b></div>

<div>
  <p>Hello from fastcore &lt;3</p>
</div>



## Display

In [None]:
#| export
def highlight(s, lang='html'):
    "Markdown to syntax-highlight `s` in language `lang`"
    return f'```{lang}\n{to_xml(s)}\n```'

In [None]:
#| export
def showtags(s):
    return f"""<code><pre>
{escape(to_xml(s))}
</code></pre>"""

FT._repr_markdown_ = highlight

You can also reorder the children to come *after* the attrs, if you use this alternative syntax for `FT` where the children are in a second pair of `()` (behind the scenes this is because `FT` implements `__call__` to add children).

In [None]:
Body(klass='myclass')(
    Div(style='padding:3px')(
        'Some text 1<2',
        I(spurious=True)('in italics'),
        Input(name='me'),
        Img(src="filename", data=1)
    )
)

```html
<body class="myclass">
  <div style="padding:3px">
Some text 1&lt;2<i spurious>in italics</i>    <input name="me">
<img src="filename" data="1">  </div>
</body>

```

In [None]:
#| export
def __getattr__(tag):
    if tag.startswith('_') or tag[0].islower(): raise AttributeError
    tag = _fix_k(tag)
    def _f(*c, target_id=None, **kwargs): return ft(tag, *c, target_id=target_id, **kwargs)
    return _f

# Export -

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()