In [1]:
import googlemaps
import numpy as np
import pandas as pd
import pickle

In [2]:
# set keyfile path, open, and read private API key
# keyfile = '/dartfs/rc/lab/P/Psych81.09/course/shared/gmaps.api.key'
fd = open(keyfile, 'r')
key = fd.readline()[:-1]
fd.close()

In [3]:
# pass key to google maps client for auth
gmaps = googlemaps.Client(key=key)

In [4]:
df=pd.read_csv('violations_info_full.csv', index_col='Unnamed: 0', 
               parse_dates={'Issue Datetime' : ['Issue Date', 'Violation Time']})

In [5]:
# create dictionary to hold geodata. {row number : geodata}
geodata = dict()

In [35]:
for ix, row in df.iterrows():
    
    # skip rows already done
    if not ix in geodata.keys():
        
        if not ix % 100:
            print(f'running row {ix}')
        
        # format address for google maps API call
        try:
            row['House Number'].isdigit()
            fmt_addr = row['House Number'] + ' ' + row['Street Name'] + ', ' + row['City & State']
            
        except (TypeError, AttributeError):
            try:
                fmt_addr = row['Street Name'] + ' and' + row['Intersecting Street'].split('of')[-1] + ', ' \
                + row['City & State']
            # some entries just have a street name, no house number or intersecting street. Some have no info at all
            except (AttributeError, TypeError):
                print(f'No data for row {ix}')
                continue
                

            
        # not sure how google maps deals with errors, so handle a few possibilities
        try:
#             addr_info = gmaps.geocode(fmt_addr)

        # identify rows where API throws error, but don't stop loop
        except:
            print(f'Error in row {ix}')
            continue

        # maybe API returns an empty list when no matching address is found?
        if addr_info:
            geodata[ix] = addr_info
        else:
            print(f'No data returned for row {ix}')


No data for row 2805
No data for row 4256
No data for row 4259
running row 4300
running row 4400
running row 4500
running row 4600
No data returned for row 4645
running row 4700
running row 4800
running row 4900
running row 5000
running row 5100
running row 5200
running row 5300
running row 5400
running row 5500
running row 5600


In [36]:
# save out dictionary with geodata
# with open('new_geodata.p', 'wb') as f:
#     pickle.dump(geodata, f)

In [5]:
# load in geodata dict
with open('new_geodata.p', 'rb') as f:
    geodata = pickle.load(f)

In [7]:
geodata[0]

[{'address_components': [{'long_name': 'Saint Nicholas Avenue & Jefferson Street',
    'short_name': 'St Nicholas Ave & Jefferson St',
    'types': ['intersection']},
   {'long_name': 'Brooklyn',
    'short_name': 'Brooklyn',
    'types': ['political', 'sublocality', 'sublocality_level_1']},
   {'long_name': 'Kings County',
    'short_name': 'Kings County',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'New York',
    'short_name': 'NY',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'United States',
    'short_name': 'US',
    'types': ['country', 'political']},
   {'long_name': '11237', 'short_name': '11237', 'types': ['postal_code']}],
  'formatted_address': 'St Nicholas Ave & Jefferson St, Brooklyn, NY 11237, USA',
  'geometry': {'location': {'lat': 40.708059, 'lng': -73.922539},
   'location_type': 'GEOMETRIC_CENTER',
   'viewport': {'northeast': {'lat': 40.70940798029149,
     'lng': -73.92119001970849},
    'southwest'

In [15]:
for ix, data in geodata.items():
    for component in data[0]['address_components']:
        if 'postal_code' in component['types']:
            df.loc[ix, 'Zip Code'] = int(component['short_name'])
            
    df.loc[ix, 'Lat'] = data[0]['geometry']['location']['lat']
    df.loc[ix, 'Long'] = data[0]['geometry']['location']['lng']

In [16]:
df

Unnamed: 0,Issue Datetime,Summons Number,Violation Code,Violation Location,Violation Precinct,House Number,Street Name,Intersecting Street,City & State,Ticket Cost,Ticket Type,Zip Code,Lat,Long
0,2019-01-14 12:00:00,8678224277,14,83,83,W,St Nicholas Ave,07ft N/of Jefferson,"Brooklyn, NY",115.0,General No Standing: Standing or parking where...,11237.0,40.708059,-73.922539
1,2019-01-11 11:29:00,8678223868,21,90,90,N,Wythe Ave,08ft W/of Rutledge S,"Brooklyn, NY",45.0,Street Cleaning: No parking where parking is n...,11249.0,40.700221,-73.959873
2,2019-01-13 15:05:00,8699568241,50,79,79,S,Macon St,0ft E/of Arlington P,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11216.0,40.680918,-73.951157
3,2019-01-11 08:37:00,8701817176,50,90,90,N,S 1st St,0ft E/of Driggs Ave,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11211.0,40.713345,-73.960163
4,2019-01-14 12:37:00,8695487570,50,90,90,S,Debevoise St,0ft E/of Graham Ave,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11206.0,40.701637,-73.942267
5,2019-01-12 06:52:00,8715522556,50,81,81,S,Jefferson Ave,0ft E/of Howard Ave,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11221.0,40.686648,-73.920757
6,2019-01-12 15:33:00,8598239392,50,79,79,N,Fulton St,0ft E/of Kingston Av,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11216.0,40.679938,-73.941209
7,2019-01-12 08:42:00,8582151032,50,81,81,S,Pulaski St,0ft E/of Marcus Garv,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11221.0,40.693415,-73.940090
8,2019-01-11 15:31:00,8611202790,50,90,90,S,Rutledge St,0ft E/of Marcy Ave,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11206.0,40.703747,-73.954442
9,2019-01-14 14:39:00,8689805205,50,90,90,S,Harrison Ave,0ft E/of Middleton S,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11206.0,40.703287,-73.950641


In [17]:
# save out dataframe with geodata
df.to_csv('violations_final.csv')

In [11]:
# example of google maps API data format
geodata[4]

[{'address_components': [{'long_name': '133',
    'short_name': '133',
    'types': ['street_number']},
   {'long_name': 'Frost Street', 'short_name': 'Frost St', 'types': ['route']},
   {'long_name': 'Williamsburg',
    'short_name': 'Williamsburg',
    'types': ['neighborhood', 'political']},
   {'long_name': 'Brooklyn',
    'short_name': 'Brooklyn',
    'types': ['political', 'sublocality', 'sublocality_level_1']},
   {'long_name': 'Kings County',
    'short_name': 'Kings County',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'New York',
    'short_name': 'NY',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'United States',
    'short_name': 'US',
    'types': ['country', 'political']},
   {'long_name': '11211', 'short_name': '11211', 'types': ['postal_code']}],
  'formatted_address': '133 Frost St, Brooklyn, NY 11211, USA',
  'geometry': {'bounds': {'northeast': {'lat': 40.7181074, 'lng': -73.9462073},
    'southwest': {'

In [9]:
# check that geodata added correctly
df.head(5)

Unnamed: 0,Summons Number,Violation Code,Violation Time,House Number,Street Name,City & State,Zip Code,Lat,Long
0,8725653684,71,0319P,235,Bedford Ave,"Brooklyn, NY",11211,40.715957,-73.959527
1,8725653696,20,0322P,235,Bedford Ave,"Brooklyn, NY",11211,40.715957,-73.959527
2,8725653702,20,0324P,235,Bedford Ave,"Brooklyn, NY",11211,40.715957,-73.959527
3,8670481194,21,1138A,133,Frost St,"Brooklyn, NY",11211,40.718053,-73.94625
4,8670481212,21,1140A,133,Frost St,"Brooklyn, NY",11211,40.718053,-73.94625
