#### Author: Arash Farahani
#### Date: 8/1/2017

### Usage
This program goes over all the Manhattan GBATed DOL-QCEW files and checks the error sheet in Google Maps API to retreive Formatted Addresses and Lat-Lon. The process is repeated twice: first with the original address, and next with the Trade/Legal name added to the address. 

In [232]:
import googlemaps
from datetime import datetime
import pandas as pd
import re, os
import configparser

config = configparser.RawConfigParser()
config.read('API_Keys.cfg')
google_api_key = config.get('Google', 'QCEW_API_Key')

gmaps = googlemaps.Client(key=google_api_key)

#test
geocode_result=gmaps.geocode("BETH ISRAEL MEDICAL CENTER, 1ST AVE AT 16TH ST, New York, NY 10003")
print(geocode_result[0]['geometry']['location']['lat'],geocode_result[0]['geometry']['location']['lng'])
print(geocode_result)


40.7333526 -73.9823138
[{'address_components': [{'long_name': 'First Avenue at 16th Street', 'short_name': 'First Avenue at 16th Street', 'types': ['route']}, {'long_name': 'Manhattan', 'short_name': 'Manhattan', 'types': ['political', 'sublocality', 'sublocality_level_1']}, {'long_name': 'New York', 'short_name': 'New York', 'types': ['locality', 'political']}, {'long_name': 'New York County', 'short_name': 'New York County', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'New York', 'short_name': 'NY', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'United States', 'short_name': 'US', 'types': ['country', 'political']}, {'long_name': '10003', 'short_name': '10003', 'types': ['postal_code']}], 'formatted_address': 'First Avenue at 16th Street, New York, NY 10003, USA', 'geometry': {'location': {'lat': 40.7333526, 'lng': -73.9823138}, 'location_type': 'GEOMETRIC_CENTER', 'viewport': {'northeast': {'lat': 40.73470158029149, 'lng': -73.980964

In [233]:
directory = os.fsencode("Z:/EAD/DOL Data/QCEW to RPAD address merge/forgbat/Manhattan/")

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename == "forgbatmn06.xlsx":    #.endswith(".xlsx"):
        print(os.path.join(directory,filename.encode()))
        adds=pd.ExcelFile(os.path.join(directory,filename.encode()).decode())
        print(adds.sheet_names)
        df = adds.parse('GBATErr')
        df.head()
        
        
        df.fillna('')
        # trade2 is trade name when available, and legal name when not.
        df['trade2'] = ""
        df.loc[df.trade.fillna('')!= '', 'trade2'] = df.trade 
        df.loc[df.trade.fillna('')== '', 'trade2'] = df.legal
        # trade(or legal) name + Original Address + City, State, Zip
        df['NameAddress']= df.trade2.fillna('')+', '+df.originaladdress.fillna('')+', New York, NY '+df.pzip.apply(str)
        df['Address']= df.originaladdress.fillna('')+', New York, NY '+df.pzip.apply(str)
        df['NameAddress'].head()
        
        #df.reset_index(inplace=True)
        df['Gformatted_address0']= ""
        df['Glat0']= 0
        df['Glon0']= 0
        df['GPartial0']= False
        df['Gtypes0']=""
        df['Gformatted_address1']= ""
        df['Glat1']= 0
        df['Glon1']= 0
        df['GPartial1']= False
        df['Gtypes1']=""
        i=-1
        for var in ['Address','NameAddress']:
            i+=1
            for index, row in df.iterrows():
                if index<10:     #df.count()['index']:
                    geocode_result=gmaps.geocode(row[var])
                    if len(geocode_result)>0:
                        if 'partial_match' in geocode_result:
                            df.loc[df.index == index,['GPartial'+ str(i),'Gformatted_address'+ str(i), 'Glat'+ str(i), 'Glon'+ str(i), 'Gtypes'+ str(i)]]=(
                                                 geocode_result[0]['partial_match'], geocode_result[0]['formatted_address']
                                                 ,geocode_result[0]['geometry']['location']['lat'],geocode_result[0]['geometry']['location']['lng'],geocode_result[0]['types'])
                        else:
                            df.loc[df.index == index,['Gformatted_address'+ str(i), 'Glat'+ str(i), 'Glon'+ str(i), 'Gtypes'+ str(i)]]=(
                                geocode_result[0]['formatted_address'],geocode_result[0]['geometry']['location']['lat']
                                ,geocode_result[0]['geometry']['location']['lng'],geocode_result[0]['types'])
                    else:
                        df.loc[df.index == index,['Gformatted_address'+ str(i), 'Glat'+ str(i), 'Glon'+ str(i), 'Gtypes'+ str(i)]]=(
                                                                                                    'Not Found',0,0,[0])

            # Prepare for Second Run of GBAT

            pat= r"([0-9\-]+)(.*?)(,.*)"
            repl1 = lambda m: m.group(1)
            repl2 = lambda m: m.group(2)
            df['Gnumber'+ str(i)]=df['Gformatted_address'+ str(i)].str.replace(pat, repl1)
            df['Gstreet'+ str(i)]=df['Gformatted_address'+ str(i)].str.replace(pat, repl2)
            df['boro']=1
            df[['Gnumber'+ str(i),'Gformatted_address'+ str(i)]].head(10)
        # Save The Results
        pd.ExcelFile(os.path.join(directory,filename.encode()).decode())
        writer = pd.ExcelWriter(os.path.join(directory,('_GoogOut_'+filename).encode()).decode())
        df.to_excel(writer,'Sheet1')
        print('Just Saved: ', '_GoogOut_'+filename )
        #df2.to_excel(writer,'Sheet2')
        writer.save()
    else:
        continue


## SOME OTHER STUFF
# Look up an address with reverse geocoding
#reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452))

# Request directions via public transit
#now = datetime.now()
#directions_result = gmaps.directions("Sydney Town Hall",
#                                     "Parramatta, NSW",
#                                     mode="transit",
#                                     departure_time=now)

## Some Other Tests
#geocode_result=gmaps.geocode("BETH ISRAEL MEDICAL CENTER, 1ST AVE AT 16TH ST, New York, NY 10003")
#print(geocode_result[0]['geometry']['location']['lat'],geocode_result[0]['geometry']['location']['lng'])
#print(geocode_result[0]['partial_match'])
#print(geocode_result[0])



b'Z:/EAD/DOL Data/QCEW to RPAD address merge/forgbat/Manhattan/forgbatmn06.xlsx'
['forgbatmn06', 'GBATOut', 'GBATErr', 'GBATExt']


ValueError: Length of values does not match length of index

In [235]:
#df.reset_index(inplace=True)
df['Gformatted_address0']= ""
df['Glat0']= 0
df['Glon0']= 0
df['GPartial0']= False
df['Gtypes0']=""
df['Gformatted_address1']= ""
df['Glat1']= 0
df['Glon1']= 0
df['GPartial1']= False
df['Gtypes1']=""
i=-1
for var in ['Address','NameAddress']:
    i+=1
    for index, row in df.iterrows():
        if index<10:     #df.count()['index']:
            geocode_result=gmaps.geocode(row[var])
            if len(geocode_result)>0:
                if 'partial_match' in geocode_result:
                    df.loc[df.index == index,['GPartial'+ str(i),'Gformatted_address'+ str(i), 'Glat'+ str(i), 'Glon'+ str(i), 'Gtypes'+ str(i)]]=(
                                         geocode_result[0]['partial_match'], geocode_result[0]['formatted_address']
                                         ,geocode_result[0]['geometry']['location']['lat'],geocode_result[0]['geometry']['location']['lng'],str(geocode_result[0]['types']))
                else:
                    df.loc[df.index == index,['Gformatted_address'+ str(i), 'Glat'+ str(i), 'Glon'+ str(i), 'Gtypes'+ str(i)]]=(
                        geocode_result[0]['formatted_address'],geocode_result[0]['geometry']['location']['lat']
                        ,geocode_result[0]['geometry']['location']['lng'],str(geocode_result[0]['types']))
            else:
                df.loc[df.index == index,['Gformatted_address'+ str(i), 'Glat'+ str(i), 'Glon'+ str(i), 'Gtypes'+ str(i)]]=(
                                                                                            'Not Found',0,0,str([0]))

    # Prepare for Second Run of GBAT

    pat= r"([0-9\-]+)(.*?)(,.*)"
    repl1 = lambda m: m.group(1)
    repl2 = lambda m: m.group(2)
    df['Gnumber'+ str(i)]=df['Gformatted_address'+ str(i)].str.replace(pat, repl1)
    df['Gstreet'+ str(i)]=df['Gformatted_address'+ str(i)].str.replace(pat, repl2)
    df['boro']=1
    df[['Gnumber'+ str(i),'Gformatted_address'+ str(i)]].head(10)
# Save The Results
pd.ExcelFile(os.path.join(directory,filename.encode()).decode())
writer = pd.ExcelWriter(os.path.join(directory,('_GoogOut_'+filename).encode()).decode())
df.to_excel(writer,'Sheet1')
print('Just Saved: ', '_GoogOut_'+filename )
#df2.to_excel(writer,'Sheet2')
writer.save()



## SOME OTHER STUFF
# Look up an address with reverse geocoding
#reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452))

# Request directions via public transit
#now = datetime.now()
#directions_result = gmaps.directions("Sydney Town Hall",
#                                     "Parramatta, NSW",
#                                     mode="transit",
#                                     departure_time=now)

## Some Other Tests
#geocode_result=gmaps.geocode("BETH ISRAEL MEDICAL CENTER, 1ST AVE AT 16TH ST, New York, NY 10003")
#print(geocode_result[0]['geometry']['location']['lat'],geocode_result[0]['geometry']['location']['lng'])
#print(geocode_result[0]['partial_match'])
#print(geocode_result[0])



Just Saved:  _GoogOut_forgbatmn06.xlsx
