# XML - Extensible Markup Language

Useful way to store data. It is a markup language and file format for storing, transmitting, and reconstructing arbitrary data. It defines a set of rules for encoding documents in a format that is both human-readable and machine-readable.

In [1]:
import xml.etree.ElementTree as et

In [2]:
dir(et)

['C14NWriterTarget',
 'Comment',
 'Element',
 'ElementPath',
 'ElementTree',
 'HTML_EMPTY',
 'PI',
 'ParseError',
 'ProcessingInstruction',
 'QName',
 'SubElement',
 'TreeBuilder',
 'VERSION',
 'XML',
 'XMLID',
 'XMLParser',
 'XMLPullParser',
 '_Element_Py',
 '_ListDataStream',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_escape_attrib',
 '_escape_attrib_c14n',
 '_escape_attrib_html',
 '_escape_cdata',
 '_escape_cdata_c14n',
 '_get_writer',
 '_looks_like_prefix_name',
 '_namespace_map',
 '_namespaces',
 '_raise_serialization_error',
 '_serialize',
 '_serialize_html',
 '_serialize_text',
 '_serialize_xml',
 '_set_factories',
 'canonicalize',
 'collections',
 'contextlib',
 'dump',
 'fromstring',
 'fromstringlist',
 'indent',
 'io',
 'iselement',
 'iterparse',
 'parse',
 're',
 'register_namespace',
 'sys',
 'tostring',
 'tostringlist',

## Use inspect module by python to Inspect what is in the ElementTree Module

In [3]:
import xml.etree.ElementTree as et
from inspect import getmembers, isclass, isfunction

Display Classes in ElementTree module

In [4]:
for(name, member) in getmembers(et, isclass):
    if not name.startswith("_"):
        print(name)

C14NWriterTarget
Element
ElementTree
ParseError
QName
TreeBuilder
XMLParser
XMLPullParser


XML = Tree of Elements
Each node is an element. ```<user></user>```

Display functions in ElementTree Module

In [5]:
for(name, member) in getmembers(et, isfunction):
    if not name.startswith("_"):
        print(name)

Comment
PI
ProcessingInstruction
XML
XMLID
canonicalize
dump
fromstring
fromstringlist
indent
iselement
iterparse
parse
register_namespace
tostring
tostringlist


In [6]:
import xml.etree.ElementTree as et

In [9]:
tree = et.parse('hodlers.xml')
root = tree.getroot()
print(et.tostring(root)) # Returns Byte string

b'<crypto coin="MONEY!!!">\n    <investor>Warren Buffett</investor>\n    <investor>Peter Thiel</investor>\n    <investor>Ray Dalio</investor>\n    <investor>Mark Cuban</investor>\n    <investor>Chris Sacca</investor>\n    <investor>Tim Ferriss</investor>\n    <investor>Ben Horowitz</investor>\n    <investor>Naval Ravikant</investor>\n    <investor>Chamath Palihapitiya</investor>\n    <investor>Mary Meeker</investor>\n    <investor>Marc Andreessen</investor>\n    <investor>Reid Hoffman</investor>\n    <investor>Bill Gurley</investor>\n    <investor>Paul Graham</investor>\n    <investor>Michael Moritz</investor>\n</crypto>'


In [11]:
coin = root.get('coin') # returns an attribute value
print("Crypto name = {val}".format(val=coin))

Crypto name = MONEY!!!


In [12]:
# Set an attribute value
root.set('launched','20230226')
print(root.attrib)

{'coin': 'MONEY!!!', 'launched': '20230226'}


In [13]:
# Save updated XML
tree.write('hodlers.xml')

In [14]:
# Add 'id' attribute to each investor
id = 1
for investor in tree.findall('investor'):
    investor.set('id',str(id))
    id+=1

# Save XML
tree.write('hodlers.xml')

In [15]:
# If you have regrets, you can delete attributes
for investor in tree.findall('investor'):
    del(investor.attrib['id'])

# Save XML
tree.write('hodlers.xml')

In [16]:
# Add new investors to XML

# Add investor 1
investor1 = et.fromstring("<investor>Allen Duffy</investor>")
root.append(investor1)

# Save XML
tree.write('hodlers.xml')

In [17]:
# Add investor 2 using constructor
investor2 = et.Element("investor")
investor2.text = "Karl Amber"
root.append(investor2)

# Save XML
tree.write('hodlers.xml')

In [18]:
# Add ids once more
for (id,investor) in enumerate(root.findall('investor')):
    investor.set('id',str(id))

# Save XML
tree.write('hodlers.xml')

In [19]:
# Select Investor 4
investor = root.find(".//investor[@id='4']")
print(investor.text)

Chris Sacca
