In [9]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re

osm_file = open("chicago_abbrev.osm", "r")

street_type_re = re.compile(r'\S+\.?$', re.IGNORECASE)
street_types = defaultdict(int)

def audit_street_type(street_types, stree_name):
    m = street_type_re.search(stree_name)
    if m:
        street_type = m.group()
        street_types[street_type] += 1
        
def print_sorted_dict(d):
    keys = d.keys()
    keys = sorted(keys, key=lambda s: s.lower())
    for k in keys:
        v = d[k]
        print("%s: %d" % (k, v))
        
        
def is_street_name(elem):
    return (elem.tag == "tag") and (elem.attrib['k'] == "addr:street")

    
def audit():
    for event, elem in ET.iterparse(osm_file):
        if is_street_name(elem):
            audit_street_type(street_types, elem.attrib['v'])
    print_sorted_dict(street_types)
    
if __name__ == '__main__':
    audit()

FileNotFoundError: [Errno 2] No such file or directory: 'chicago_abbrev.osm'

### Uniformity

In [15]:
import csv
import pprint

fieldname = "wgs84_pos#lat"
minval = -90
maxval = 90

def skip_lines(input_file, skip):
    for i in range(0, skip):
        next(input_file)
    
def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        return False
    
def is_array(s):
    if isinstance(s, list):
        return True
    return False
        
    
def audit_float_field(v, counts):
    v = v.strip()
    if v == "NULL":
        counts['nulls'] += 1
    elif v == "":
        counts['empties'] += 1
    elif is_array(v):
        counts['arrays'] += 1
    elif not is_number(v):
        print("Found non number:", v)
    else:
        v = float(v)
        if not ((minval < v) and (v < maxval)):
            print("Found out of range value:", v)
            
if __name__ == '__main__':
    input_file = csv.DictReader(open("cities.csv"))
    skip_lines(input_file, 3)
    counts = {"nulls": 0, "empties": 0, "arrays": 0}
    nrows = 0
    for row in input_file:
        audit_float_field(row[fieldname], counts)
        nrows += 1
    
    print("num cities:", nrows)
    print("nulls:", counts['nulls'])
    print("empties:", counts['empties'])
    print("arrays:", counts['arrays'])
    
    

num cities: 39
nulls: 0
empties: 0
arrays: 0
