# 6.1. Reading and Writing CSV Data

In [4]:
# getting data in and out of a program 

import csv

# read the data as a sequence of tuples
with open('stocks.csv') as f:
    f_csv = csv.reader(f)
    headers = next(f_csv)
    for row in f_csv: # row will be a tuple 
        # Processing row 
        print(row[0], row[4])

from collections import namedtuple
with open('stocks.csv') as f:
    f_csv = csv.reader(f)
    headings = next(f_csv)
    Row = namedtuple('Row', headings)
    for r in f_csv:
        row = Row(*r)

# read the data as a sequence of dictionaries
import csv 
with open('stocks.csv') as f:
    f_csv = csv.DictReader(f)
    for row in f_csv:
        print(row)

# write CSV data 
headers = ['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
rows = [('AA', 39.48, '6/11/2007', '9:36am', -0.18, 181800),
        ('AIG', 71.38, '6/11/2007', '9:36am', -0.15, 195500),
        ('AXP', 62.58, '6/11/2007', '9:36am', -0.46, 935000),
       ]
with open('stocks.csv', 'w') as f:
    f_csv = csv.writer(f)
    f_csv.writerow(headers)
    f_csv.writerows(rows)

# If you have the data as a sequence of dictionaries 
headers = ['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
rows = [{'Symbol':'AA', 'Price':39.48, 'Date':'6/11/2007',
'Time':'9:36am', 'Change':-0.18, 'Volume':181800},
{'Symbol':'AIG', 'Price': 71.38, 'Date':'6/11/2007',
'Time':'9:36am', 'Change':-0.15, 'Volume': 195500},
{'Symbol':'AXP', 'Price': 62.58, 'Date':'6/11/2007',
'Time':'9:36am', 'Change':-0.46, 'Volume': 935000},
]

with open('stocks.csv','w') as f:
    f_csv = csv.DictWriter(f, headers)
    f_csv.writeheader()
    f_csv.writerows(rows)

# Example of reading tab-separated values
with open('stocks.csv') as f:
    f_tsv = csv.reader(f, delimiter='\t')
    for row in f_tsv:
        print(row)

# extra type conversions on CSV
col_types = [str, float, str, str, float, int]
with open('stocks.csv') as f:
    f_csv = csv.reader(f)
    headers = next(f_csv)
    for row in f_csv:
        # Apply conversions to the row items 
        row = tuple(convert(value) for convert, value in zip(col_types, row))
        print(row)

print('Reading as dicts with type conversion')
field_types = [('Price', float),
              ('Change', float),
              ('Volume', int)]

with open ('stocks.csv') as f:
    for row in csv.DictReader(f):
        row.update((key, conversion(row[key])) for key, conversion in field_types)
        print(row)
        
# Pandas package. Pandas includes a convenient pandas.read_csv() function that will load CSV data into a DataFrame object

AA -0.18
AIG -0.15
AXP -0.46
{'Price': '39.48', 'Time': '9:36am', 'Volume': '181800', 'Symbol': 'AA', 'Date': '6/11/2007', 'Change': '-0.18'}
{'Price': '71.38', 'Time': '9:36am', 'Volume': '195500', 'Symbol': 'AIG', 'Date': '6/11/2007', 'Change': '-0.15'}
{'Price': '62.58', 'Time': '9:36am', 'Volume': '935000', 'Symbol': 'AXP', 'Date': '6/11/2007', 'Change': '-0.46'}
['Symbol,Price,Date,Time,Change,Volume']
['AA,39.48,6/11/2007,9:36am,-0.18,181800']
['AIG,71.38,6/11/2007,9:36am,-0.15,195500']
['AXP,62.58,6/11/2007,9:36am,-0.46,935000']
('AA', 39.48, '6/11/2007', '9:36am', -0.18, 181800)
('AIG', 71.38, '6/11/2007', '9:36am', -0.15, 195500)
('AXP', 62.58, '6/11/2007', '9:36am', -0.46, 935000)
Reading as dicts with type conversion
{'Price': 39.48, 'Time': '9:36am', 'Volume': 181800, 'Symbol': 'AA', 'Date': '6/11/2007', 'Change': -0.18}
{'Price': 71.38, 'Time': '9:36am', 'Volume': 195500, 'Symbol': 'AIG', 'Date': '6/11/2007', 'Change': -0.15}
{'Price': 62.58, 'Time': '9:36am', 'Volume': 93

# 6.2. Reading and Writing JSON Data

In [23]:
# JSON (JavaScript Object Notation)
import json 

data = {
    'name': 'ACME',
    'shares': 100,
    'price': 542.23
}

json_str = json.dumps(data)
print(json_str)
# JSON-encoded string back into a Python data structure 
data = json.loads(json_str)
print(data)

# With files  Writing JSON data 
with open('data.json', 'w') as f:
    json.dump(data, f)

# Reading data back 
with open('data.json', 'r') as f:
    data = json.load(f)
    print(data)
    
# JSON encoding supports the basic types of None, bool, int, float, str. lists, tuples, dictionaries 
# For instance, True is mapped to true, False is mapped to false, and None is mapped to null

print(json.dumps(False))
print(json.dumps(True))
print(json.dumps(None))

d = {'a': True, 'b': 'Hello', 'c': None}
json.dumps(d)

# pprint() function in the pprint module. 
from urllib.request import urlopen
import json 
u = urlopen('http://samples.openweathermap.org/data/2.5/weather?q=London,uk&appid=b6907d289e10d714a6e88b30761fae22')
resp = json.loads(u.read().decode('utf-8'))
from pprint import pprint
pprint(resp)

# decode JSON data 
s = '{"name": "ACME", "shares": 50, "price": 490.1}'
from collections import OrderedDict 
data = json.loads(s, object_pairs_hook=OrderedDict)

# turn JSON dictionary into a Python object 
class JSONObject:
    def __init__(self, d):
        self.__dict__=d
        
data = json.loads(s, object_hook=JSONObject)

class Point:
    def __init__(self, x, y):
        self.x = x 
        self.y = y 

def serialize_instance(obj):
    d = {'__classname__': type(obj).__name__}
    d.update(vars(obj))
    return d 
        
p = Point(2,3)
# serialize instances 
json.dumps(serialize_instance(p))

# Dictionary mapping names to known classes 
classes = {
    'Point': Point 
}

def unserialize_object(d):
    clsname = d.pop('__classname__', None)
    if clsname:
        cls = classes[clsname]
        obj = cls.__new__(cls) # Make instance without calling __init__
        for key, value in d.items():
            setattr(obj, key, value)
            return obj 
    else:
        return d 
p = Point(2,3)
s = json.dumps(p, default=serialize_instance)
print(s)
a = json.loads(s, object_hook=unserialize_object)
print(a)
a.x 


{"price": 542.23, "name": "ACME", "shares": 100}
{'price': 542.23, 'name': 'ACME', 'shares': 100}
{'price': 542.23, 'name': 'ACME', 'shares': 100}
false
true
null
{'base': 'stations',
 'clouds': {'all': 90},
 'cod': 200,
 'coord': {'lat': 51.51, 'lon': -0.13},
 'dt': 1485789600,
 'id': 2643743,
 'main': {'humidity': 81,
          'pressure': 1012,
          'temp': 280.32,
          'temp_max': 281.15,
          'temp_min': 279.15},
 'name': 'London',
 'sys': {'country': 'GB',
         'id': 5091,
         'message': 0.0103,
         'sunrise': 1485762037,
         'sunset': 1485794875,
         'type': 1},
 'visibility': 10000,
 'weather': [{'description': 'light intensity drizzle',
              'icon': '09d',
              'id': 300,
              'main': 'Drizzle'}],
 'wind': {'deg': 80, 'speed': 4.1}}
{"x": 2, "__classname__": "Point", "y": 3}
<__main__.Point object at 0x7f1260a726a0>


2

# 6.3. Parsing Simple XML Data

In [27]:
# xml.etree.ElementTree module can be used to extract data from simple XML documents
from urllib.request import urlopen
from xml.etree.ElementTree import parse

# Download the RSS feed and parse it 
u = urlopen('http://planet.python.org/rss20.xml')
doc= parse(u) # parse the entire XML document into a document object 

# Extract and output tags of interest 
for item in doc.iterfind('channel/item'): # find(), iterfind(),and findtext() to search for sepcific XML elements
    title = item.findtext('title')
    date = item.findtext('pubDate')
    link = item.findtext('link')
    
    print(title)
    print(date)
    print(link)
    print()
# text, and get() method can be used to extract attributes
e = doc.find('channel/title')
print(e.get('some_attribute'))

Talk Python to Me: #148 Python Book Authors'  Panel Discussion
Sun, 28 Jan 2018 08:00:00 +0000
https://talkpython.fm/episodes/show/148/python-book-authors-panel-discussion

Evennia: Kicking into gear from a distance
Sat, 27 Jan 2018 23:27:46 +0000
http://evennia.blogspot.com/2018/01/kicking-into-gear-from-distance.html

Weekly Python StackOverflow Report: (cx) stackoverflow python report
Sat, 27 Jan 2018 21:56:00 +0000
http://python-weekly.blogspot.com/2018/01/cx-stackoverflow-python-report.html

Anarcat: A summary of my 2017 work
Sat, 27 Jan 2018 16:54:02 +0000
https://anarc.at/blog/2018-01-27-summary-2017-work/

Sandipan Dey: Image Colorization Using Optimization in Python
Sat, 27 Jan 2018 01:09:09 +0000


Matthew Rocklin: Write Dumb Code
Sat, 27 Jan 2018 00:00:00 +0000
https://matthewrocklin.com/blog//work/2018/01/27/write-dumb-code

Sumana Harihareswara - Cogito, Ergo Sumana: Preserving Threading In Google Group or Mailman Mailing List Replies with Thunderbird
Fri, 26 Jan 2018 22:2

# 6.4. Parsing Huge XML Files Incrementally

In [29]:
from xml.etree.ElementTree import iterparse 

def parse_and_remove(filename, path):
    path_parts = path.split('/')
    doc = iterparse(filename, ('start', 'end'))
    # Skip the root element 
    next(doc)
    
    tag_stack = []
    elem_stack = []
    for event, elem in doc:
        if event == 'start':
            tag_stack.append(elem.tag)
            elem_stack.append(elem)
        elif event == 'end':
            if tag_stack == path_parts:
                yield elem 
                elem_stack[-2].remove(elem)
            try:
                tag_stack.pop()
                elem_stack.pop()
            except IndexError:
                pass 

# ranks ZIP codes by the number of pothole reports 
from xml.etree.ElementTree import parse
from collections import Counter 

potholes_by_zip = Counter()

doc = parse('potholes.xml')
for pothole in doc.iterfind('row/row'):
    potholes_by_zip[pothole.findtext('zip')] += 1

for zipcode, num in potholes_by_zip.most_common():
    print(zipcode, num)

# This version of code runs with a memory footprint of only 7MB
from collections import Counter
potholes_by_zip = Counter()

data = parse_and_remove('potholes.xml', 'row/row')
for pothole in data:
    potholes_by_zip[pothole.findtext('zip')] += 1

for zipcode, num in potholes_by_zip.most_common():
    print(zipcode, num)

60617 13
60626 8
60651 7
60623 6
60647 6
60625 4
60636 4
60609 4
60613 4
60628 4
60641 3
60622 3
60657 3
60619 3
60629 3
60654 2
60656 2
60644 2
60618 2
60649 2
60638 2
60660 1
60614 1
60631 1
60634 1
60707 1
60630 1
60643 1
60652 1
60637 1
60616 1
60612 1
60639 1
60632 1
60617 13
60626 8
60651 7
60623 6
60647 6
60625 4
60636 4
60609 4
60613 4
60628 4
60641 3
60622 3
60657 3
60619 3
60629 3
60654 2
60656 2
60644 2
60618 2
60649 2
60638 2
60660 1
60614 1
60631 1
60634 1
60707 1
60630 1
60643 1
60652 1
60637 1
60616 1
60612 1
60639 1
60632 1


# 6.5. Turning a Dictionary into XML

In [4]:
# xml.etree.ElementTree library is commonly used for 
from xml.etree.ElementTree import Element 
from xml.etree.ElementTree import tostring 

def dict_to_xml(tag, d):
    '''
    Turn a simple dict of key/value pairs into XML
    '''
    elem = Element(tag)
    for key, val in d.items():
        child = Element(key)
        child.text = str(val)
        elem.append(child)
    return elem 

s = {'name': 'GOOG', 'shares': 100, 'price': 490.1}
e = dict_to_xml('stock', s)
tostring(e)

e.set('_id', '1234')
tostring(e)

def dict_to_xml_str(tag, d):
    '''
    Turn a simple dict of key/value pairs into XML
    '''
    parts = ['<{}>'.format(tag)]
    for key, val in d.items():
        parts.append('<{0}>{1}</{0}>'.format(key, val))
    parts.append('</{}>'.format(tag))
    return ''.join(parts)

# escape() and unescape() functions in XML.sax.saxutils
from xml.sax.saxutils import escape, unescape
escape('<spam>')

'&lt;spam&gt;'

# 6.6. Parsing, Modifying, and Rewriting XML

In [6]:
from xml.etree.ElementTree import parse, Element 
doc = parse('potholes.xml')
root = doc.getroot()

# Remove a few elements 
root.remove(root.find('sri'))
root.remove(root.find('cr'))

# Insert a new element after <nm>...</nm>
root.getchildren().index(root.find('nm'))

TypeError: remove() argument must be xml.etree.ElementTree.Element, not None

# 6.7. Parsing XML Documents with Namespaces

In [8]:
# Some queries that work 
doc.findtext('author')
doc.find('content')

# 6.8. Interacting with a Relational Database 

In [13]:
# select, insert, delete rows in a relational database
stocks = [
    ('GOOG', 100, 490.1),
    ('AAPL', 50, 545.75),
    ('FB', 150, 7.45),
    ('HPQ', 75, 33.2),
]
import sqlite3
db = sqlite3.connect('database.db')

# Once you have a cursor, you can start executing SQL queries
c = db.cursor()
c.execute('create table portfolio (symbol text, shares integer, price real)')
db.commit() 

# insert a sequence of rows into the data 
c.executemany('insert into portfolic values (?,?,?)', stocks)
db.commit()

# insert a sequence of rows into the data 
c.executemany('insert into portfolio values(?,?,?)', stocks)
db.commit

# perform a query 
for row in db.execute('select * from portfolio'):
    print(row)

min_price = 100 
for row in db.execute('select * from portfolio where price >= ?', (min_price,)):
    print(row)

OperationalError: table portfolio already exists

# 6.9. Decoding and Encoding Hexadecimal Digits

In [1]:
# Initial byte string 
s = b'hello'

# Encode as hex 
import binascii
h = binascii.b2a_hex(s)

# Decode back to bytes
binascii.a2b_hex(h)

# base64.b16decode() and base64.b16encode() functions only operate with uppercase 

import base64
h = base64.b16encode(s)
base64.b16decode(h)

h = base64.b16encode(s)
print(h)
print(h.decode('ascii'))

b'68656C6C6F'
68656C6C6F


# 6.10. Decoding and Encoding Base64

In [3]:
# decode or encode binary data using Base64 encoding 

# Some byte data 
s = b'hello'
import base64

# Encode as Base64
a = base64.b64encode(s)
print(a)

# Decode from Base64 
base64.b64decode(a) # decoding Base64 both byte strings and Unicode text strings can be supplied 

b'aGVsbG8='


b'hello'

# 6.11. Reading and Writing Binary Arrays of Structures

In [6]:
from struct import Struct

def write_records(records, format, f):
    '''
    Write a sequence of tuples to a binary file of structures.
    '''
    record_struct = Struct(format)
    for r in records:
        f.write(record_struct.pack(*r))



def read_records(format, f):
    record_struct = Struct(format)
    chunks = iter(lambda: f.read(record_struct.size), b'')
    return (record_struct.unpack(chunk) for chunk in chunks)

# Example
if __name__ == '__main__':
    records = [(1, 2.3, 4.5),
              (6, 7.8, 9.0),
              (12, 13.4, 56.7)]
    with open('data.b', 'wb') as f:
        write_records(records, '<idd', f)
    
    with open('data.b','rb') as f:
              for rec in read_records('<idd', f):
                  print(rec)



(1, 2.3, 4.5)
(6, 7.8, 9.0)
(12, 13.4, 56.7)


# 6.12. Reading Nested and Variable-SIzed Binary Structures