In [1]:
import pandas as pd
import geocoder
import json
import time
from flatten_json import flatten
from geocodio.client import GeocodioClient
from pandas.io.json import json_normalize
from pathlib import Path

pd.set_option('display.max_colwidth', -1)

client = GeocodioClient('{API Key}')

In [6]:
"""
Variables to set
"""
# Name of source file (csv file) containing latitude and longitude columns
source_filename = 'AllData.csv'

# Name of the output file (csv file)
output_filename = 'AllData_output.csv'

In [3]:
base_path = Path('../data/')
report_path = Path('../reports/')
source_path = base_path / ('source/')
interim_path = base_path / ('interim/')
processed_path = base_path / ('processed/')

def get_latlong(df):
    """Take a dataframe with Latitude and Longitude columns
    and extract the fields into a tupled list.

    Arguments:
        df {dataframe} -- A dataframe of the source file that contains
        Latitude and Longitude columns.

    Returns:
        list -- tupled list of Latitude Longitude.
    """
    latlong_list = list(df[['Latitude', 'Longitude']].itertuples(index=False, name=None))
    return latlong_list

def get_geoinfo(latlong_list):
    """Pass the lat long list to Geocodio to decode and then save file.

    Arguments:
        latlong_list {list} -- returned list from get_latlong()

    Returns:
        address_name -- The name of the file that was saved.
    """
    addresses = client.reverse(latlong_list)

    address_name = 'addresses_'+ time.strftime("%Y%m%d-%H%M%S") + '.json'
    address_path = interim_path / address_name

    try:
        with open(address_path, 'w') as outfile:
            json.dump(addresses, outfile)
    except:
        print('Error outputting file.')
    else:
        return address_name

def flat_zips(df_addresses):
    """Take the multi-level json results loaded into a dataframe from Geocodio and
    flattened to a list of dictionary values.  Retreive only the first result
    from each lat/long list, as each list can have multiple.  The top result is
    the most accurate.

    Arguments:
        df_addresses {dataframe} -- Take a dataframe of the Geocodio information.

    Returns:
        list -- list of dictionary geocodio results
    """
    address_list = []
    for index, row in df_addresses.iterrows():
        dic = row['results']
        dic_flattened = [flatten(d) for d in dic]
        address_list.append(dic_flattened[0])
    return address_list

def flatzips_to_df(listofzips):
    """Convert list of dictionary Geocodio results to a dataframe and rename
    column names.

    Arguments:
        listofzips {list} -- returned list from flat_zips()

    Returns:
        dataframe -- clean Geocodio dataframe results.
    """
    df = pd.DataFrame(listofzips)

    # Rename columns
    # If they have a '_' character, return right most string, else return original column header
    df.columns = [c[c.rfind('_')+1:] if '_' in c else c for c in df.columns]
    return df

def create_report(source, zips, filename):
    """Combine the original file with the Geocodio results and save to output file
    location.

    Arguments:
        source {dataframe} -- dataframe of source data
        zips {dataframe} -- returned dataframe from flatzips_to_df()
        filename {str} -- name of the output file

    Returns:
        dataframe -- dataframe of combined results.
    """
    df = pd.concat([source, zips], axis=1, sort=False)
    df.to_csv(report_path / filename)
    return df

In [7]:
# Create DataFrame from source file
df_source = pd.read_csv(source_path / source_filename)

# Run list of lat longs through Geocodio API and save them to a file
# Retrieve filename
geofile_name = get_geoinfo(get_latlong(df_source))
geofile_path = interim_path / geofile_name

# Open file and import into a dataframe
df_geo = pd.read_json(geofile_path)

# Flatten json structure and return the most accurate record per lat/long
df_zips = flatzips_to_df(flat_zips(df_geo))

# Move interim file to processed
geofile_path.replace(processed_path / geofile_name)

# Create output file
create_report(df_source, df_zips, output_filename)

Unnamed: 0,Longitude,Latitude,vin cnt,accuracy,type,city,country,county,street,number,postdirectional,predirectional,state,street.1,suffix,zip,address,lat,lng,source
0,-75.211975,40.138763,101,0.99,nearest_street,Fort Washington,US,Montgomery County,Washington Ln,125,,,PA,Washington,Ln,19034,"125 Washington Ln, Fort Washington, PA 19034",40.139083,-75.211969,TIGER/Line® dataset from the US Census Bureau
1,-75.211805,40.136823,861,0.98,nearest_street,Fort Washington,US,Montgomery County,Pennsylvania Ave,352,,,PA,Pennsylvania,Ave,19034,"352 Pennsylvania Ave, Fort Washington, PA 19034",40.137473,-75.211738,TIGER/Line® dataset from the US Census Bureau
2,-75.209047,40.135473,24,0.97,nearest_street,Fort Washington,US,Montgomery County,Pennsylvania Ave,449,,,PA,Pennsylvania,Ave,19034,"449 Pennsylvania Ave, Fort Washington, PA 19034",40.134989,-75.207590,TIGER/Line® dataset from the US Census Bureau
3,-75.206928,40.133964,300,0.98,nearest_street,Fort Washington,US,Montgomery County,Pennsylvania Ave,507,,,PA,Pennsylvania,Ave,19034,"507 Pennsylvania Ave, Fort Washington, PA 19034",40.134554,-75.206862,TIGER/Line® dataset from the US Census Bureau
4,-75.174967,39.953187,87,1.00,rooftop,Philadelphia,US,Philadelphia County,S 21st St,11,,S,PA,21st,St,19103,"11 S 21st St, Philadelphia, PA 19103",39.953071,-75.174970,Philadelphia
5,-75.160820,39.825797,199,1.00,rooftop,Woodbury,US,Gloucester County,Mantua Pike,663,,,NJ,Mantua,Pike,08096,"663 Mantua Pike, Woodbury, NJ 08096",39.825612,-75.160222,"NJ Office of Information Technology (NJOIT), Office of Geographic Information Systems (OGIS)"
6,-75.159701,39.828153,157,1.00,rooftop,Woodbury,US,Gloucester County,Mantua Pike,555,,,NJ,Mantua,Pike,08096,"555 Mantua Pike, Woodbury, NJ 08096",39.828058,-75.159234,"NJ Office of Information Technology (NJOIT), Office of Geographic Information Systems (OGIS)"
7,-75.158825,39.830905,169,1.00,rooftop,Woodbury,US,Gloucester County,Mantua Pike,487,,,NJ,Mantua,Pike,08096,"487 Mantua Pike, Woodbury, NJ 08096",39.830720,-75.158156,"NJ Office of Information Technology (NJOIT), Office of Geographic Information Systems (OGIS)"
8,-75.157909,39.832010,645,1.00,rooftop,Woodbury,US,Gloucester County,Mantua Pike,439,,,NJ,Mantua,Pike,08096,"439 Mantua Pike, Woodbury, NJ 08096",39.831954,-75.157667,"NJ Office of Information Technology (NJOIT), Office of Geographic Information Systems (OGIS)"
9,-75.157338,40.458386,8,1.00,rooftop,Ottsville,US,Bucks County,Farm School Rd,3701,,,PA,Farm School,Rd,18942,"3701 Farm School Rd, Ottsville, PA 18942",40.458395,-75.157340,Bucks
