In [126]:
import xml.etree.cElementTree as ET

# Sample 10% of file
sample_file_k10 = "sample10.osm"

# Sample %20
sample_file_k5 = "sample5.osm"

# Sample 1/3
sample_file_k3 = "sample3.osm"

# whole file
osm_file = "philadelphia_pennsylvania.osm"

# set paramater to take every kth top-level element
# k = 10
k = 5
# k=3

# LXML (http://lxml.de) is definitely faster to read and summarize the OSM XML tree, though it wasn't clear how to 
# update the tree with new tags. 

def get_element(osm_file, tags=('node', 'way', 'relation')):
    context = iter(ET.iterparse(osm_file, events=('start', 'end')))
    _, root = next(context)
    for event, elem in context:
        if event == 'end' and elem.tag in tags: # Yield only node, way and relation tags
            yield elem
            root.clear()

with open(sample_file_k5, 'wb') as output:
    output.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    output.write('<osm>\n  ') 
    
    # write every kth top-level element
    for i, elem in enumerate(get_element(osm_file)):
        if i % k == 0: # When i divided by k leaves remainder 0
            output.write(ET.tostring(elem, encoding='utf-8'))
    
    output.write('</osm>')

In [171]:
from collections import Counter

def count_level_I_tags(filename):
        root = ET.parse(filename).getroot()
        items = []
        for child in root.iter():
            tag_name = child.tag
            items.append(tag_name)
        tags = Counter(items)
        return tags

In [172]:
# count_level_I_tags('sample10.osm')
count_level_I_tags('sample5.osm')
# count_level_I_tags('sample3.osm')
# count_level_I_tags('philadelphia_pennsylvania.osm')

Counter({'member': 12875,
         'nd': 676685,
         'node': 562370,
         'osm': 1,
         'relation': 796,
         'tag': 344722,
         'way': 52300})

In [178]:
import pprint

def count_tags(filename):
        root = ET.parse(filename).getroot()
        items = []
        for node in root.findall('node/tag'):
            tag_name = node.get('k')
            items.append(tag_name)
        for way in root.findall('way/tag'):
            tag_name = way.get('k')
            items.append(tag_name)
        for rel in root.findall('relation/tag'):
            tag_name = rel.get('k')
            items.append(tag_name)
        all_tags = Counter(items)
        return dict(all_tags)


In [179]:
# count_tags('sample10.osm')
count_tags('sample5.osm')

{'Comment': 8,
 'FIXME': 55,
 'FIXME2': 1,
 'FIXME3': 1,
 'FIXME4': 1,
 'FIXME:hgv': 2,
 'FIXME:railway': 2,
 'GPS_Date': 10,
 'GPS_Time': 10,
 'HFCS': 626,
 'Horz_Prec': 10,
 'ISO3166-1': 1,
 'ISO3166-1:alpha2': 1,
 'ISO3166-1:alpha3': 1,
 'ISO3166-1:numeric': 1,
 'Keyword': 1,
 'MATERIAL': 21,
 'Max_PDOP': 10,
 'NAME': 10,
 'NHD:ComID': 4084,
 'NHD:Elevation': 881,
 'NHD:FCode': 4089,
 'NHD:FDate': 881,
 'NHD:FTYPE': 881,
 'NHD:FType': 3206,
 'NHD:Feeder_Pat': 1,
 'NHD:GNIS_ID': 28,
 'NHD:GNIS_Name': 28,
 'NHD:Grade': 1,
 'NHD:Name': 1,
 'NHD:Permanent_': 881,
 'NHD:Phone': 1,
 'NHD:RESOLUTION': 3206,
 'NHD:ReachCode': 4062,
 'NHD:Resolution': 881,
 'NHD:descriptio': 1,
 'NHD:way_id': 3203,
 'NHS': 580,
 'NJDOT_SRI': 627,
 'NOTE': 32,
 'NRHP': 1,
 'PA:ANALGROUP': 1,
 'PA:DIVISION': 3,
 'PARK_NAME': 21,
 'Phone': 1,
 'SHAPE_LENG': 21,
 'Std_Dev': 7,
 'TRL_LENGTH': 21,
 'Vert_Prec': 10,
 'WDPA_ID:ref': 1,
 'WIDTH': 21,
 'Website': 1,
 'abandoned': 1,
 'abandoned:railway': 11,
 'abbr': 

In [51]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSMFILE = "./philadelphia_pennsylvania.osm"
OSMFILE_TEST = "./sample3.osm"
OSMFILE_TEST2 = "./sample5.osm"

# Matches words beginning with any non-whitespace character that repeats >1 time, possibly ends with a period and 
# occurs at the end of a string. 
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE) 

# Search through a list of strings to confirm they terminate with items in the 'expected' list.
# If strings end with an item that's not a member of 'expected,' add the group of strings surrounding this item to the
# list 'street_types'

expected = ["Street", "Streets", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons", "Pike", "Alley", "Circle", "East", "North", "South", "West", 
            "Extension", "Highway", "Plaza", "Terrace", "Walk", "Way", "Run", "Broadway"]

def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)

def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")

# Filter 
def audit(osmfile):
    osm_file = open(osmfile, "r")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):
        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
    osm_file.close()
    return street_types

In [None]:
# create expected list from scraped name strings at http://www.geographic.org/streetview/usa/pa/philadelphia.html





In [None]:
# Similarly, match against a list of cities in Philadelphia county? 
https://en.wikipedia.org/wiki/Philadelphia_County,_Pennsylvania#Communities

In [6]:
audit(OSMFILE_TEST2)

defaultdict(set,
            {'206': {'US 70 & US 206'},
             '33': {'Route 33'},
             '37th': {'N 37th'},
             '43rd': {'N 43rd'},
             '446-1234': {'1 Brookline BlvdHavertown, PA 19083(610) 446-1234'},
             '5': {'West Girard Avenue, 5'},
             '70': {'NJ 70', 'US 70'},
             '73': {'New Jersey 73'},
             '80': {'N Lewis RD Unit #80'},
             'Ave': {'Aramingo Ave',
              'Cottman Ave',
              'Devon St & Mt. Pleasant Ave',
              'E. Mt Airy Ave',
              'Fairmount Ave',
              'Fort Washington Ave',
              'Frankford Ave',
              'Germantown Ave',
              'Grays Ave',
              'Hirst Ave',
              'Montgomery Ave',
              'Park Ave',
              'Parkway Ave',
              'S Clinton Ave',
              'Stenton Ave',
              'West Girard Ave'},
             'Ave.': {'Bonny Brook Ave.',
              'East Butler Ave.',
             

In [98]:
audit(OSMFILE)

defaultdict(set,
            {'1': {'Route 1'},
             '111': {'South Clinton Avenue Ste. 111'},
             '13': {'W Main St #13'},
             '168': {'Marlton Pike East Ste. 168'},
             '19047': {'200 Manor Ave. Langhorne, PA 19047',
              '2245 E. Lincoln Hwy, Langhorne, PA 19047',
              '2275 E Lincoln Hwy, Langhorne, PA 19047',
              '2300  East Lincoln Highway, Pennsylvania 19047'},
             '19067': {'East Trenton Avenue Morrisville, PA 19067'},
             '206': {'US 206', 'US 70 & US 206'},
             '33': {'Route 33'},
             '37th': {'N 37th'},
             '38': {'New Jersey 38', 'New Jersey Route 38', 'Route 38'},
             '39th': {'N 39th'},
             '40': {'1140 US Highway 40', 'Rt 40'},
             '4080': {'4080'},
             '41st': {'S. 41st'},
             '43rd': {'N 43rd'},
             '446-1234': {'1 Brookline BlvdHavertown, PA 19083(610) 446-1234'},
             '452': {'Market Street; Pennsylv

In [141]:
import sys
import time
from bs4 import BeautifulSoup
from __future__ import division 

# Is there way to reverse this logic, so the values become the keys, and any item matched to a value is replaced with
# that value's key? 
mapping = { "St": "Street",
            "St.": "Street",
           "ST": "Street",
           "st": "Street",
           "Sreet": "Street",
           "Sstreet": "Street",
            "Atreet": "Street",
           "Steet": "Street",
           "street": "Street",
           "Sts.": "Streets",
           "AVE": "Avenue",
           "Ave": "Avenue",
           "Ave.": "Avenue",
           "ave": "Avenue",
           "avenue": "Avenue",
           "E": "East",
           "E.": "East",
           "e": "East",
           "N": "North",
           "N.": "North",
           "s": "South",
           "S": "South",
           "S.": "South",
           "south": "South",
           "W": "West",
           "Blvd": "Boulevard", 
           "Blvd.": "Boulevard",
           "Cir": "Circle",
           "Ct": "Court",
           "Dr": "Drive",
           "Ln": "Lane",
           "lane": "Lane",
           "Hwy":"Highway",
           "PIke": "Pike",
           "Rd": "Road",
           "Rd.": "Road",
           "rd": "Road",
           "road": "Road",
           "ROAD": "Road",
           "RD": "Road",
           "ext": "Extension",
           "way": "Way",
           "&": "and"
            }

# filter out phone numbers and full addresses and add relevant tags
house_number = re.compile(r'^\d{1,4}') # Between 1 and 4 digits at the beginning of a lines

# unit_number needs to match two digits at the end of a line, unless they follow a state name Or US
#Unit #80 = 

# if the last two digits are preceded by a State name or US, then add 'Highway' in between for US Highway one, or New Jersey Highway 33 
#state_US_hwy = 

zipcode = re.compile(r'\d{5,5}') # Five digits

# any chr, 3 digits, any chr, 3 digits, any chr, 4 digits
phone = re.compile(r'\(?\d{3}\)?[-\.\s]??\d{3}[-\.\s]??\d{4}') 

cities = ['Philadelphia', 'Langhorne', 'Morrisville']

state = ['PA', 'Pennsylvania']

# If words aren't spaced
abutted = re.compile(r'([A-Z]{1}\w+)([A-Z]\w+)')

# fix all cap to normal case
all_cap = re.compile(r'[A-Z]{3,}') # At least 3 caps 

#fix all lower to normal case
all_low = re.compile(r'\b[a-z]{4,}\b') # At least 4 lower case chrs with whitespace before and after

# If two words joined by 'and' or '&', add "Streets"
intersection = re.compile(r'(\w+\s\band\b\s\w+)|(\w+\s\b&\b\s\w+)')

def clean_streets(osmfile):
    start_time = time.time()
    reload(sys)
    sys.setdefaultencoding('utf-8')
    soup = BeautifulSoup(open(osmfile, "r+b"), "xml")
    
    street_tags = soup.find_all("tag", attrs={"k": "addr:street"})
    
    phones = soup.find_all("tag", attrs={"k": "phone"})
    zips = soup.find_all("tag", attrs={"k": "addr:postcode"})
    house_nums = soup.find_all("tag", attrs={"k": "addr:housenumber"})
    state = soup.find_all("tag", attrs={"k"})

    for tag in street_tags:
        num = house_number.search(tag['v'])
        if num:
            v_val = num.group()
            new_tag = soup.new_tag("tag", k="addr:housenumber", v='{}'.format(v_val))
            if new_tag not in house_nums:
                tag.insert_after(new_tag)
        
        call_me = phone.search(tag['v'])
        if call_me:
            v_val = call_me.group()
            new_tag = soup.new_tag("tag", k="phone", v='{}'.format(v_val))
            if new_tag not in phones:
                tag.insert_after(new_tag)
        
        post = zipcode.search(tag['v'])
        if post:
            v_val = post.group()
            new_tag = soup.new_tag("tag", k="addr:postcode", v='{}'.format(v_val))
            if new_tag not in zips:
                tag.insert_after(new_tag)        
        
        words = tag['v'].split()
        
        if tag['v'] == 'Cecil B. Moore':
            words.append('Avenue')
        
        is_street = street_type_re.search(tag['v'])
        intersect = re.search(intersection, tag['v'])
        if intersect and not is_street:
            words.append("Streets")
        
        # If only 1 or 2 words, and 2 word strings start with North, South, East, West, etc. also add street
        if tag not is_street and len(words) == 1:
            words.append('Street')
        if tag not is_street and len(words) == 2 and words[0] in ["North", "South", "West", "East", "Spring"]: 
            words.append('Street') 
        
        for idx, word in enumerate(words):
            if word in state:
                new_tag = soup.new_tag("tag", k="state", v='{}'.format(word))
                if new_tag not in state_tags:
                    tag.insert_after(new_tag)
            
            house_num = house_number.search(word)
            if house_num:
                del words[idx]
            
            zip_cs = zipcode.search(word)
            if zip_cs:
                del words[idx]
            
            banana_phone = phone.search(word)
            if banana_phone:
                del words[idx]
            
            no_space = re.search(abutted, word)
            if no_space:
                words[idx] = abutted.sub(r'\1', word).strip(',')
                words.insert(idx+1, re.sub(abutted, r'\2', word).strip(','))
#             all_cap.search(word)
#             if all_cap:
#                 re.sub(all_cap, map(lambda i: i.lower, range(all_cap[1],len(all_cap))))
#             all_low.search(word)
#             if all_low:
#                 re.sub(all_low, map(lambda i: i.upper, range(all_low[1],len(all_low))))  
        clean_name = ' '.join(str(mapping.get(word, word)) for word in words)
        tag['v'] = str(clean_name)
    with open(osmfile, "w") as f:
        f.write(soup.prettify())
    print("--- {}min ---".format((time.time() - start_time)/60))


            

In [125]:
n = "1 Brookline Boulevard Havertown PA 19083(610) 446-1234"
y = phone.findall(n)
# z = y.group()
type(z)
# z

l = []

test_osm_xml = """
<?xml version="1.0" encoding="UTF-8"?>
<osm>
 <node changeset="34353963" id="1483624883" lat="39.9787384" lon="-75.3038692" timestamp="2015-09-30T19:10:51Z" uid="3276050" user="JCarden" version="2">
  <tag k="name" v="Kettle"/>
  <tag k="amenity" v="restaurant"/>
  <tag k="cuisine" v="Diner"/>
  <tag k="addr:street" v="1 Brookline Boulevard Havertown PA 19083(610) 446-1234"/>
  <tag k="addr:postcode" v="19083"/>
  <tag k="addr:housenumber" v="1"/>
 </node>
</osm>
"""

test_soup = BeautifulSoup(test_osm_xml, "xml")
    
street_tgs = test_soup.find_all("tag", attrs={"k": "addr:street"})

n_tags = []

for idx, tag in enumerate(street_tgs):
    call_me = phone.search(tag['v'])
    if call_me:
        v_val = call_me.group()
        new_tag = test_soup.new_tag("tag", k="phone", v='{}'.format(v_val))
        n_tags.append(new_tag)
        tag.insert_after(new_tag)
        
test_soup
# n_tags
# type(n_tags[0])

#     new_tag = soup.new_tag("tag", k="phone", v='{}'.format(v_val))
#     print new_tag
#             if new_tag not in phones:
#                 soup.tag.insert_after(new_tag)


<?xml version="1.0" encoding="unicode-escape"?>\n<?xml version="1.0" encoding="UTF-8"?><osm>\n<node changeset="34353963" id="1483624883" lat="39.9787384" lon="-75.3038692" timestamp="2015-09-30T19:10:51Z" uid="3276050" user="JCarden" version="2">\n<tag k="name" v="Kettle"/>\n<tag k="amenity" v="restaurant"/>\n<tag k="cuisine" v="Diner"/>\n<tag k="addr:street" v="1 Brookline Boulevard Havertown PA 19083(610) 446-1234"/><tag k="phone" v="(610) 446-1234"/>\n<tag k="addr:postcode" v="19083"/>\n<tag k="addr:housenumber" v="1"/>\n</node>\n</osm>

In [142]:
clean_streets(OSMFILE_TEST2)

In [144]:
audit(OSMFILE_TEST2)

defaultdict(set,
            {'80': {'North Lewis Road Unit #80'},
             'Avenue,': {'West Girard Avenue,'},
             'Bigler': {'Bigler'},
             'Center': {'Town Center'},
             'Jersey': {'New Jersey'},
             'Mallon': {'Mallon'},
             'NJ': {'NJ'},
             'NJ-73': {'NJ-73'},
             'PA': {'Brookline Boulevard Havertown PA',
              'East Lincoln Highway Langhorne PA'},
             'Route': {'Route'},
             'Spruce': {'Spruce'},
             'US': {'US', 'US and US'},
             'Vine': {'and Vine'},
             'Warren': {'Warren'}})

In [None]:
lower = re.compile(r'^([a-z]|_)*$')
lower_colon = re.compile(r'^([a-z]|_)*:([a-z]|_)*$')
problemchars = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]')


def key_type(element, keys):
    if element.tag == "tag":
        name = element.get('k')
        l = lower.search(name)
        if l:
            keys["lower"] += 1 
        lc = lower_colon.search(name)
        if lc:
            keys["lower_colon"] += 1
        p = problemchars.search(name)
        if p:
            keys["problemchars"] +=1
        if not any((l, lc, p)):
            keys["other"] += 1
        
    return keys

def process_map(filename):
    keys = {"lower": 0, "lower_colon": 0, "problemchars": 0, "other": 0}
    for _, element in ET.iterparse(filename):
        keys = key_type(element, keys)

    return keys

In [None]:
def get_user(element):
    return element.get('user')

def process_map(filename):
    users = set()
    for _, element in ET.iterparse(filename):
        user = get_user(element)
        if user != None:
            users.add(user)

    return users


In [63]:
import csv
import codecs
import re
import xml.etree.cElementTree as ET

import schema

OSM_PATH = "example.osm"

NODES_PATH = "nodes.csv"
NODE_TAGS_PATH = "nodes_tags.csv"
WAYS_PATH = "ways.csv"
WAY_NODES_PATH = "ways_nodes.csv"
WAY_TAGS_PATH = "ways_tags.csv"

LOWER_COLON = re.compile(r'^([a-z]|_)+:([a-z]|_)+')
PROBLEMCHARS = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]')

SCHEMA = schema.schema

# Make sure the fields order in the csvs matches the column order in the sql table schema
NODE_FIELDS = ['id', 'lat', 'lon', 'user', 'uid', 'version', 'changeset', 'timestamp']
NODE_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_FIELDS = ['id', 'user', 'uid', 'version', 'changeset', 'timestamp']
WAY_TAGS_FIELDS = ['id', 'key', 'value', 'type']
WAY_NODES_FIELDS = ['id', 'node_id', 'position']


def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS, default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []  # Handle secondary tags the same way for both node and way elements

    for e in element.iter("tag"):
        id = element.get('id')
        key = e.get('k')
        value = e.get('v')
        low = LOWER_COLON.search(key)
        prob = PROBLEMCHARS.search(key)
        if low and not prob:
            kt = key.split(':', 1)
            tags.append({"id": id, "key": kt[1], "value": value, "type": kt[0]})
        if not low and not prob:
            tags.append({"id": id, "key": key, "value": value, "type": 'regular'})
        if prob:
            continue
        
    if element.tag == 'node':
        for idx, val in enumerate(NODE_FIELDS):
            k = NODE_FIELDS[idx] 
            val = element.get(val)
            node_attribs.update({k: val})    
        if node_attribs:
            return {'node': node_attribs, 'node_tags': tags}
        else:
            return None   
    
    elif element.tag == 'way':
        for idx, val in enumerate(WAY_FIELDS):
            k = WAY_FIELDS[idx]
            val = element.get(val)
            way_attribs.update({k: val})
        i = 0
        for w in element.iter('nd'):
            n_id = w.get('ref')
            way_nodes.append({"id": id, "node_id": n_id, "position": i})
            i += 1
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
    
    print node_attribs
    print way_attribs
    print way_nodes
    print tags

# ================================================== #
#               Helper Functions                     #
# ================================================== #
def get_element(osm_file, tags=('node', 'way', 'relation')):
    """Yield element if it is the right type of tag"""

    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if event == 'end' and elem.tag in tags:
            yield elem
            root.clear()
            
def validate_element(element, validator, schema=SCHEMA):
    """Raise ValidationError if element does not match schema"""
    if validator.validate(element, schema) is not True:
        field, errors = next(validator.errors.iteritems())
        message_string = "\nElement of type '{0}' has the following errors:\n{1}"
        error_strings = (
            "{0}: {1}".format(k, v if isinstance(v, str) else ", ".join(v))
            for k, v in errors.iteritems()
        )
        raise cerberus.ValidationError(
            message_string.format(field, "\n".join(error_strings))
        )


class UnicodeDictWriter(csv.DictWriter, object):
    """Extend csv.DictWriter to handle Unicode input"""

    def writerow(self, row):
        super(UnicodeDictWriter, self).writerow({
            k: (v.encode('utf-8') if isinstance(v, unicode) else v) for k, v in row.iteritems()
        })

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)

In [64]:
for element in get_element(OSM_PATH, tags=('node', 'way')):
    x = shape_element(element)
    print x

{'node': {'changeset': '11129782', 'version': '7', 'uid': '451048', 'lat': '41.9730791', 'timestamp': '2012-03-28T18:31:23Z', 'lon': '-87.6866303', 'id': '261114295', 'user': 'bbmiller'}, 'node_tags': []}
{'node': {'changeset': '8448766', 'version': '6', 'uid': '451048', 'lat': '41.9730416', 'timestamp': '2011-06-15T17:04:54Z', 'lon': '-87.6878512', 'id': '261114296', 'user': 'bbmiller'}, 'node_tags': []}
{'node': {'changeset': '8581395', 'version': '5', 'uid': '451048', 'lat': '41.9729565', 'timestamp': '2011-06-29T14:14:14Z', 'lon': '-87.6939548', 'id': '261114299', 'user': 'bbmiller'}, 'node_tags': []}
{'node': {'changeset': '8581395', 'version': '5', 'uid': '451048', 'lat': '41.9707380', 'timestamp': '2011-06-29T14:14:14Z', 'lon': '-87.6976025', 'id': '261146436', 'user': 'bbmiller'}, 'node_tags': []}
{'node': {'changeset': '8581395', 'version': '7', 'uid': '451048', 'lat': '41.9740068', 'timestamp': '2011-06-29T14:14:15Z', 'lon': '-87.6988576', 'id': '261147304', 'user': 'bbmiller

In [None]:
# ================================================== #
#               Main Function                        #
# ================================================== #
def process_map(file_in, validate):
    """Iteratively process each XML element and write to csv(s)"""

    with codecs.open(NODES_PATH, 'w') as nodes_file, \
         codecs.open(NODE_TAGS_PATH, 'w') as nodes_tags_file, \
         codecs.open(WAYS_PATH, 'w') as ways_file, \
         codecs.open(WAY_NODES_PATH, 'w') as way_nodes_file, \
         codecs.open(WAY_TAGS_PATH, 'w') as way_tags_file:

        nodes_writer = UnicodeDictWriter(nodes_file, NODE_FIELDS)
        node_tags_writer = UnicodeDictWriter(nodes_tags_file, NODE_TAGS_FIELDS)
        ways_writer = UnicodeDictWriter(ways_file, WAY_FIELDS)
        way_nodes_writer = UnicodeDictWriter(way_nodes_file, WAY_NODES_FIELDS)
        way_tags_writer = UnicodeDictWriter(way_tags_file, WAY_TAGS_FIELDS)

        nodes_writer.writeheader()
        node_tags_writer.writeheader()
        ways_writer.writeheader()
        way_nodes_writer.writeheader()
        way_tags_writer.writeheader()

        validator = cerberus.Validator()

        for element in get_element(file_in, tags=('node', 'way')):
            el = shape_element(element)
            if el:
                if validate is True:
                    validate_element(el, validator)

                if element.tag == 'node':
                    nodes_writer.writerow(el['node'])
                    node_tags_writer.writerows(el['node_tags'])
                elif element.tag == 'way':
                    ways_writer.writerow(el['way'])
                    way_nodes_writer.writerows(el['way_nodes'])
                    way_tags_writer.writerows(el['way_tags'])


if __name__ == '__main__':
    # Note: Validation is ~ 10X slower. For the project consider using a small
    # sample of the map when validating.
    process_map(OSM_PATH, validate=True)
