Parse OSM file and export the result in CSV

In [99]:
import xml.etree.ElementTree
import csv
import re

In [100]:
dataFileName = 'daensen.osm'

In [145]:
files = {'node':'nodes.csv', 'way':'ways.csv', 'node_tags': 'node_tags.csv', 'way_tags': 'way_tags.csv', 'way_node':'way_node.csv'}
    
fields = {'node': ['id', 'lat', 'lon', 'user', 'uid', 'version', 'changeset', 'timestamp'],
           'way': ['id', 'user', 'uid', 'version', 'changeset', 'timestamp'],
           'node_tags': ['id', 'key', 'value', 'type'],
           'way_tags': ['id', 'key', 'value', 'type'],
           'way_node': ['id', 'node_id', 'position']} 

def initCSVfiles():
    fileDescriptor = {}
    for f in files:
        #print('init {}'.format(files[f]))
        fileId = open(files[f], 'w')
        csvWriter = csv.writer(fileId, delimiter=';',
                            quotechar='\'', quoting=csv.QUOTE_MINIMAL)
        csvWriter.writerow(fields[f])
        fileDescriptor[f] = {}
        fileDescriptor[f]['fileId'] = fileId
        fileDescriptor[f]['csvWriter'] = csvWriter
        
    return fileDescriptor

def closeCSVfiles(fileDesc):
    for f in fileDesc:
        #print(fileDesc[f]['fileId'])
        fileDesc[f]['fileId'].close()

def writeElem(elemType, elem, fileDesc):
    record = []
    for i in fields[elemType]:
        record.append(elem[i])
    
    fileDesc[elemType]['csvWriter'].writerow(record)
    #print('Write in {}: {}'.format(fileDesc[elemType], record))

def parseTag(id, attrib):
    # TODO Parse k --> value, key
    return {'id': id, 'key':attrib['k'], 'value':attrib['v'], 'type':attrib['k']}
    
def parseElement(element, fileDesc):
    targetElement = ['node', 'way']
    
    nodes = []
    ways = []
    node_tags = []
    way_tags = []
    node_way = []
    
    if element.tag in targetElement:
        #print('Parsing: {}'.format(element.tag))
        selectedAttribs = fields[element.tag]

        attribs = element.attrib
        elem = {}
        for i in selectedAttribs:
            elem[i] = attribs[i]  
        writeElem(element.tag, elem, fileDesc)
        
        position = 0
        for e in element:
            tag = {}
            node = {}
            if e.tag == 'nd':
                node = {'id': elem['id'], 'node_id': e.attrib['ref'], 'position': position}
                position += 1
                writeElem('way_node', node, fileDesc)
            elif e.tag == 'tag':
                tag = parseTag(elem['id'], e.attrib)
                writeElem(element.tag + '_tags', tag, fileDesc)
        
def parseOSMfile(fileName):
    fileDesc = initCSVfiles()
    #print(fileDesc)
    e = xml.etree.ElementTree.parse(fileName).getroot()
    for element in e:
        parseElement(element, fileDesc)
    closeCSVfiles(fileDesc)

In [146]:
parseOSMfile(dataFileName)