In [2]:
"""
Your task in this exercise has two steps:

- audit the OSMFILE and change the variable 'mapping' to reflect the changes needed to fix 
    the unexpected street types to the appropriate ones in the expected list.
    You have to add mappings only for the actual problems you find in this OSMFILE,
    not a generalized solution, since that may and will depend on the particular area you are auditing.
- write the update_name function, to actually fix the street name.
    The function takes a string with street name as an argument and should return the fixed name
    We have provided a simple test so that you see what exactly is expected
"""
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint
import os
path='C:\\Users\\aemra\\Documents\\GitHub\\Wrangle-OpenStreetMap-Data'
os.chdir(path)
OSMFILE = "houston_texas.osm"


expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons",'Freeway','Road','Way']



def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)


def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")

def update_name(name, mapping):
    for v in mapping:
        #print v
        if re.search(v,name):
            name=name.replace(v,mapping[v])
            print (name )         
            return name
        else:
            pass
mapping = { "St": "Street",
            "St.": "Street",
            "Ave": "Avenue",
            "Rd." : "Road",
           'Ave.': 'Avenue',
           'Blvd' : 'Boulevard',
           'Blvd.':'Boulevard',
           'Dr': 'Drive',
           'Frwy':'Freeway',
           'Pkwy': 'Parkway',
           'Rd':'Road',
           'Rd.':'Road',
           'Stree':'Street',
           'blvd':'Boulevard',
           'street':'Street'
            }

def get_element(osm_file):
    """Yield element 
    """
    context = ET.iterparse(osm_file, events=('start', 'end'))
    _, root = next(context)
    for event, elem in context:
        if event == 'end':
            yield elem
            root.clear()
   
def audit(osmfile):
    street_types = defaultdict(set)
    for elem in get_element(osmfile):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
    return street_types




def test():
    st_types = audit(OSMFILE)
    pprint.pprint(dict(st_types))

    for st_type, ways in st_types.items():
        for name in ways:
            better_name = update_name(name, mapping)
            print (name, "=>", better_name)
    
if __name__ == '__main__':
    test()

{'1/2': {'Avenue M 1/2', 'Avenue R 1/2'},
 '10': {'I 10'},
 '101': {'FM 78, Suite 101'},
 '110': {'Memorial Drive, Ste 110'},
 '1142': {'Lake Woodlands Drive #1142'},
 '125': {'798 Sorella Court Suite 125'},
 '146': {'TX 146', 'Texas 146'},
 '1464': {'FM 1464'},
 '1488': {'Farm-to-Market Road 1488', 'FM 1488'},
 '150': {'Garth Road Suite 150'},
 '160': {'Nelson Way #160'},
 '1640': {'FM 1640'},
 '1663': {'FM 1663'},
 '1764': {'Farm-to-Market Road 1764', 'FM 1764'},
 '1774': {'Farm-to-Market Road 1774'},
 '18': {'800 W NASA Parkway #18'},
 '180': {'Katy Freeway #180'},
 '185': {'I-45 South, Suite 185'},
 '1960': {'Fm 1960', 'FM 1960'},
 '200': {'College Park Drive Ste 200'},
 '2100': {'FM 2100'},
 '216': {'Briarhills Pkwy #216'},
 '240': {'Bissonnet St #240'},
 '242': {'SH 242'},
 '249': {'TX 249'},
 '270': {'FM 270'},
 '290': {'Highway 290', 'Windfern Rd #290', 'US 290'},
 '2920': {'FM 2920', 'Fm 2920'},
 '300': {'Town & Country Blvd #300'},
 '332': {'West Highway 332', 'TX 332'},
 '35