# Geocoding
This notebook takes the entity name & address, and uses google's geocoding API to obtain Lat+Long information

In [1]:
import pandas as pd
import googlemaps
import time
from tqdm import tqdm
import pickle

df = pd.read_pickle('DPH_entities_df.pkl')

In [15]:
gmaps = googlemaps.Client(key='AIzaSyB-rR0pSddf_hka3OETuiAnXx20-ZlOVUE')
geocoded = []

for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    place = row['map_add']
    name = row['name']
    query = f'{name}, {place}'
    try:
        geocode_result = gmaps.geocode(query)
        entry = {
            'index': index,
            'result': geocode_result
        }
        geocoded.append(entry)
    except Exception as e:
        print(e, row)
    time.sleep(30/1000)  # There's a rate limit

# Cache these results, so we don't have to call them again
with open('geocoded_ggl.pkl', 'wb') as pick:
    pickle.dump(geocoded, pick)

# get the top results, because fuckit - good enough
top_res = []
for g in geocoded:
    res = g['result'][0]
    lat = res['geometry']['location']['lat']
    lng = res['geometry']['location']['lng']
    place_id = res['place_id']
    try:
        types = res['types']
        e = {
            'index': g['index'],
            'lat': lat,
            'lng': lng,
            'place_id': place_id,
            'types': types
        }
        top_res.append(e)
    except Exception as e:
        print(e, res)
        break

df_geocoded = pd.DataFrame.from_records(top_res, index='index')
new_df = pd.concat([df, df_geocoded], axis=1)
new_df.to_pickle('geocoded_df.pkl')

100%|██████████| 7872/7872 [17:46<00:00,  7.38it/s]


In [3]:
new_df

Unnamed: 0,dph_id,map_add,name,score,date,lat,lng,place_id,types
0,MTIyOTQyMTA=,"175 MEAD RD, DECATUR, GA 30030",OAKHURST ELEMENTARY SCHOOL,100,03-28-2023,33.763843,-84.306035,ChIJRfrKdi4H9YgRwOiI0-YFgbI,"[establishment, point_of_interest, primary_sch..."
1,MTIyOTQyOTM=,"3100 MOUNT OLIVE DR, DECATUR, GA 30033",DRUID HILLS MIDDLE SCHOOL,100,03-28-2023,33.819316,-84.273158,ChIJI-EIKIsH9YgRtfevY250O74,"[establishment, point_of_interest, school]"
2,MTIyOTk3MzY=,"5330 SNAPFINGER WOODS DR, DECATUR, GA 30035","SPORTZ CENTER ACADEMY, INC FAK",90,03-28-2023,33.709944,-84.172128,ChIJc5eRemSs9YgRlfB8wal4mlQ,"[establishment, point_of_interest]"
3,MTIyOTk4MjU=,"1707 CHURCH STREET, SUITE C-7, DECATUR, GA 30033",CHAT PATTI INDIAN VEGETARIAN RESTAURANT,91,03-28-2023,33.797030,-84.280828,ChIJ8bIRYDkG9YgR7BY43mZj93Y,"[establishment, food, point_of_interest, resta..."
4,MTIzMDEzMzc=,"4306 LAWRENCEVILLE HIGHWAY, SUITE 140, TUCKER,...",FIVE GUYS,97,03-28-2023,33.855507,-84.209795,ChIJB4iT1T2m9YgRnW5VlvLqF_I,"[establishment, food, meal_takeaway, point_of_..."
...,...,...,...,...,...,...,...,...,...
7867,MzEzNjg1MQ==,"299 NORTH HIGHLAND AVE STE F, ATLANTA, GA 30307",Bread & Butterfly,99,05-28-2020,33.762429,-84.358166,ChIJHwhNYAEE9YgRCs-R7L-_uWI,"[cafe, establishment, food, point_of_interest,..."
7868,OTQ5MjM3NQ==,"730 BARNETT ST NE STE A, ATLANTA, GA 30306",Plant Based Pizzeria- Mobile,100,05-12-2020,33.774294,-84.358944,ChIJW31yfH8F9YgRdtbpWzsNBe0,"[establishment, food, point_of_interest, resta..."
7869,Nzk1NjQzMw==,"296 NORTHSIDE DR SE STE B, ATLANTA, GA 30315",Boston Fish Supreme,92,05-11-2020,33.746267,-84.405973,ChIJ59NmO3AD9YgRJsL3BTxHUVk,"[establishment, food, point_of_interest, resta..."
7870,Nzk1Mzc1MQ==,"1005 CRESTLINE PKWY, ATLANTA, GA 30328",SpringHill Suites By Marriott Atlanta Perimete...,99,05-08-2020,33.929352,-84.356719,ChIJM9VxgKwO9YgRyAdoKEnhMeU,"[establishment, lodging, point_of_interest]"
