In [None]:
"""
Converting NYC Street names to Zip Codes
"""

In [2]:
import pandas as pd
import numpy as np

In [22]:
df = pd.read_csv('data/example_streets.csv').fillna('')
df

Unnamed: 0,streets,zipcodes
0,beach street,
1,little clove road,
2,narrows road south,
3,ocean terrace,
4,bay st,
...,...,...
892,west 53rd street,
893,west 55th street,
894,west 56th street,
895,west 57th street,


In [38]:
def street_to_zip(street_names, verbose=False):
    """ takes in a list of street names and returns a list of zipcodes """
    assert type(street_names) == list, "Input streets should be in a list format for efficiency reasons"
    
    # init 311 zips (https://data.cityofnewyork.us/Social-Services/311-Service-Requests-from-2010-to-Present/erm2-nwe9)
    df311 = pd.read_json("https://data.cityofnewyork.us/resource/erm2-nwe9.json?$limit=82800")
    df311 = df311[df311['incident_zip'].notna() & df311['incident_address'].notna()]  # fillter null vals
    df311 = df311[["incident_zip", "incident_address"]]  # get only 
    df311["incident_address"] = df311["incident_address"].apply(lambda var: var.lower().strip())
    # init existing streets/zipcodes (possibly from manual entry)
    translate_df = pd.read_csv('data/example_streets.csv').fillna('')
    
    if verbose:
        print("database done loading, translating streets in progress\n")
    
    ret = []
    for street in street_names:
        # manual version:
        search_df = translate_df[translate_df["streets"].str.contains(street)]
        if len(search_df) > 0 and search_df.iloc[0].zipcodes != '':
            ret.append(search_df.iloc[0].zipcodes)
        else:
            # automatic
            search_df = df311[df311["incident_address"].str.contains(street)]
            if len(search_df) > 0:
                ret.append(search_df.iloc[0].incident_zip)
            else:
                if verbose:
                    print("Could not find", street)
                ret.append("")
    return ret

In [39]:
# running street to zip initially to save time later
streets = list(df["streets"])
zips = street_to_zip(streets, True)

database done loading, translating streets in progress

Could not find little clove road
Could not find clove rd
Could not find hylan blvd
Could not find manor rd
Could not find todt hill rd
Could not find malcolm x blvd
Could not find marcus garvey blvd
Could not find veterans avenue
Could not find richmond rd
Could not find amboy rd
Could not find 4th ave
Could not find shore blvd
Could not find shore pkwy north
Could not find shore pkwy south
Could not find shore rd
Could not find shore pkwy
Could not find ave h
Could not find so conduit ave
Could not find east 174 street
Could not find murdock ave nue
Could not find seagirt blvd
Could not find east 167 street
Could not find east 170 street
Could not find east 188 street
Could not find east 165 street
Could not find east 169 street
Could not find east 181 street
Could not find east 134 street
Could not find cross island pkwy sr south
Could not find conner street
Could not find east 138 street
Could not find east 149 street
Could not

Could not find 9th avenue
Could not find 94th avenue
Could not find 6th avenue
Could not find colonial rd
Could not find east 135th street
Could not find east 138th street
Could not find east 144th street
Could not find east 149th street
Could not find east 161st street
Could not find east 163rd street
Could not find east 169th street
Could not find east 170th street
Could not find east 174th street
Could not find east 177th street
Could not find east 18th street
Could not find e 204th st
Could not find east 23rd street
Could not find east 233rd street
Could not find east 241st street
Could not find honeywell st
Could not find pelham bridge rd
Could not find queens blvd
Could not find southern blvd
Could not find st. anns ave
Could not find west 125th street
Could not find west 135th street
Could not find west 145th street
Could not find west 155th street
Could not find west 167th street
Could not find west 30th street
Could not find west 34th street
Could not find west 39th street
Cou

In [42]:
actual_zips = [name for name in zips if name != ""]
ratio = len(actual_zips)/len(zips)
print("Percent Zipcode Yield:", ratio*100)

Percent Zipcode Yield: 67.11259754738016


In [40]:
df["zipcodes"] = zips
df

Unnamed: 0,streets,zipcodes
0,beach street,10304.0
1,little clove road,
2,narrows road south,10305.0
3,ocean terrace,10301.0
4,bay st,10304.0
...,...,...
892,west 53rd street,
893,west 55th street,
894,west 56th street,
895,west 57th street,


In [41]:
street_loc = "data/example_streets.csv"
df.to_csv(street_loc, index=False)  # np.savetxt(street_loc, example_streets, delimiter=",", fmt='%s')
print("streets saved for later at:", street_loc)

streets saved for later at: data/example_streets.csv
