In [1]:
from urllib.request import urlopen
from xml.etree.ElementTree import parse 

# Download the RSS Feed 
u = urlopen('http://planet.python.org/rss20.xml')
doc = parse(u)

In [15]:
u.url

'https://planetpython.org/rss20.xml'

In [16]:
res = 0
for item in doc.iterfind('channel/item'):
    title = item.findtext('title')
    date = item.findtext('pubDate')
    link = item.findtext("link")
    
    print(title)
    print(date)
    print(link)
    print()
    res += 1
    if res >= 5:
        break
    

Python for Beginners: How to Use Python Split Function
Mon, 29 Aug 2022 13:00:00 +0000
https://www.pythonforbeginners.com/strings/how-to-use-python-split-function

IslandT: The upgrade version of my music player
Mon, 29 Aug 2022 09:17:24 +0000
https://islandtropicaman.com/wp/2022/08/29/the-upgrade-version-of-my-music-player/

Podcast.__init__: Ship With Confidence By Automating Quality Assurance
Sun, 28 Aug 2022 20:23:35 +0000
https://www.pythonpodcast.com/keysight-quality-assurance-automation-episode-375/

Marc Richter: Heroku’s attempt to scare away users and its impact on my Heroku-Fanboy position
Sun, 28 Aug 2022 19:23:30 +0000
https://www.marc-richter.info/herokus-attempt-to-scare-away-users-and-its-impact-on-my-heroku-fanboy-position/

Matthew Wright: Don’t append rows to a pandas DataFrame
Sun, 28 Aug 2022 19:00:25 +0000
https://www.wrighters.io/dont-append-rows-to-a-pandas-dataframe/



In [19]:
e = doc.find('channel/title')

In [23]:
e.tag, e.text, e.attrib

('title', 'Planet Python', {})

##### Parsing Huge XML

In [24]:
from xml.etree.ElementTree import iterparse

# recipie for reading child from a parent element
# tag/tag
def parse_and_remove(filename, path):
    path_parts = path.split('/')
    doc = iterparse(filename, ('start', 'end'))
    
    # skip the root element 
    root_element = next(doc)
    print(root_element)
    
    tag_stack = []
    element_stack = []
    
    for event, elem in doc:
        if event == 'start':
            tag_stack.append(elem.tag)
            element_stack.append(elem)
        elif event == 'end':
            if tag_stack == path_parts:
                yield elem
                element_stack[-2].remove(elem) # remove the previously added element from the parent element
            try:
                tag_stack.pop()
                element_stack.pop()
            except Exception as e:
                pass
                
    

##### Turing a dict to XML 

In [1]:
from xml.etree.ElementTree import Element

def dict_to_xml(tag, d):
    element = Element(tag)
    for key, val in d.items():
        child = Element(key)
        child.text = str(val)
        element.append(child)
    return element

In [2]:
from xml.etree.ElementTree import tostring 
from collections import OrderedDict
# use OrderedDict to preserve the order of the keys
s = {'name': "GOOG", "shares": 100, "price": 490.1}
s = OrderedDict(s)
e = dict_to_xml('stock', s)
tostring(e)

b'<stock><name>GOOG</name><shares>100</shares><price>490.1</price></stock>'

In [36]:
# set attribute 
e.set('_id', '1234')
tostring(e)

b'<stock _id="1234"><name>GOOG</name><shares>100</shares><price>490.1</price></stock>'

In [50]:
from xml.sax.saxutils import escape, unescape
print(escape('<tab>'))
print(unescape(escape('<tab>')))

&lt;tab&gt;
<tab>


In [12]:
example_xml = '''<?xml version="1.0"?>
<stop>
    <id>14791</id>
    <nm>Clark &amp; Balmoral</nm>
    <sri>
        <rt>22</rt>
        <d>North Bound</d>
        <dd>North Bound</dd>
    </sri>
    <cr>22</cr>
    <pre>
        <pt>5 MIN</pt>
        <fd>Howard</fd>
        <v>1378</v>
        <rn>22</rn>
    </pre>
    <pre>
        <pt>15 MIN</pt>
        <fd>Howard</fd>
        <v>1867</v>
        <rn>22</rn>
    </pre>
</stop>'''

In [66]:
from xml.etree.ElementTree import parse, Element 
from io import StringIO
wrapper = StringIO(example_xml)
# print(wrapper.read())
doc = parse(wrapper)
root = doc.getroot()
root.remove(doc.find('sri'))
root.remove(doc.find('pre'))

In [81]:
e = Element('spam')
e.text = 'This is a text'
root.insert(1, e)

In [77]:
list(root).index(doc.find('id'))

0

In [80]:
doc.write('newpred.xml', xml_declaration=True, encoding='utf-8')

In [2]:
from io import StringIO
from xml.etree.ElementTree import parse


xml = """<?xml version="1.0" encoding="utf-8"?>
<top>
<author>David Beazley</author>
<content>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Hello World</title>
</head>
<body>
<h1>Hello World!</h1>
</body>
</html>
</content>
</top>"""
file = StringIO(xml)
doc = parse(file)
doc.findtext('author')


'David Beazley'

In [8]:
doc.find('content')
doc.find('content/{http://www.w3.org/1999/xhtml}html/')

<Element '{http://www.w3.org/1999/xhtml}title' at 0x7f6fc8275f80>

In [9]:
doc.find('content/{http://www.w3.org/1999/xhtml}html/{http://www.w3.org/1999/xhtml}body')

<Element '{http://www.w3.org/1999/xhtml}body' at 0x7f6fc8276020>

In [10]:
# Handling XML namespaces
class XMLNamespaces:
    
    def __init__(self, **kwargs):
        self.namespaces = {}
        for name, uri in kwargs.items():
            self.register(name, uri)
    
    def register(self, name, uri):
        self.namespaces[name] = '{' + uri + '}'
    
    def __call__(self, path):
        return path.format_map(self.namespaces)

In [11]:
ns = XMLNamespaces(html='http://www.w3.org/1999/xhtml')

In [18]:
doc.find(ns('content/{html}html/{html}body'))

<Element '{http://www.w3.org/1999/xhtml}body' at 0x7f6fc8276020>

In [39]:
import sqlite3
db = sqlite3.connect(':memory:')
conn = sqlite3.connect('file:cachedb?mode=memory&cache=shared')

In [22]:
c = db.cursor()

In [24]:
c.execute('create table portfolio (symbol text, shares integer, price real)')

<sqlite3.Cursor at 0x7f6fd3870140>

In [26]:
c.execute("insert into portfolio values ('GOOG', 100, 490.1)")

<sqlite3.Cursor at 0x7f6fd3870140>

In [33]:
data_type = ('a', 100, 490.1)
c.execute('insert into portfolio values (?, ?, ?)', data_type)

<sqlite3.Cursor at 0x7f6fd3870140>

In [34]:
for row in c.execute('select * from portfolio'):
    print(row)

('GOOG', 100, 490.1)
('a', 100, 490.1)


In [40]:
db.commit()
db.close()