In [1]:
import pandas as pd
import geocoder
import json
import time
from flatten_json import flatten
from geocodio.client import GeocodioClient
from pandas.io.json import json_normalize
from pathlib import Path

pd.set_option('display.max_colwidth', -1)

client = GeocodioClient('<API CODE GOES HERE>')

In [2]:
"""
Variables to set
"""
# Name of source file (csv file) containing latitude and longitude columns
source_filename = '2018data_sample.csv'

# Name of the output file (csv file)
output_filename = '2018data_output.csv'

In [3]:
base_path = Path('../data/')
report_path = Path('../reports/')
source_path = base_path / ('source/')
interim_path = base_path / ('interim/')
processed_path = base_path / ('processed/')

def get_latlong(df):
    """Take a dataframe with Latitude and Longitude columns
    and extract the fields into a tupled list.

    Arguments:
        df {dataframe} -- A dataframe of the source file that contains
        Latitude and Longitude columns.

    Returns:
        list -- tupled list of Latitude Longitude.
    """
    latlong_list = list(df[['Latitude', 'Longitude']].itertuples(index=False, name=None))
    return latlong_list

def get_geoinfo(latlong_list):
    """Pass the lat long list to Geocodio to decode and then save file.

    Arguments:
        latlong_list {list} -- returned list from get_latlong()

    Returns:
        address_name -- The name of the file that was saved.
    """
    addresses = client.reverse(latlong_list)

    address_name = 'addresses_'+ time.strftime("%Y%m%d-%H%M%S") + '.json'
    address_path = interim_path / address_name

    try:
        with open(address_path, 'w') as outfile:
            json.dump(addresses, outfile)
    except:
        print('Error outputting file.')
    else:
        return address_name

def flat_zips(df_addresses):
    """Take the multi-level json results loaded into a dataframe from Geocodio and
    flattened to a list of dictionary values.  Retreive only the first result
    from each lat/long list, as each list can have multiple.  The top result is
    the most accurate.

    Arguments:
        df_addresses {dataframe} -- Take a dataframe of the Geocodio information.

    Returns:
        list -- list of dictionary geocodio results
    """
    address_list = []
    for index, row in df_addresses.iterrows():
        dic = row['results']
        dic_flattened = [flatten(d) for d in dic]
        address_list.append(dic_flattened[0])
    return address_list

def flatzips_to_df(listofzips):
    """Convert list of dictionary Geocodio results to a dataframe and rename
    column names.

    Arguments:
        listofzips {list} -- returned list from flat_zips()

    Returns:
        dataframe -- clean Geocodio dataframe results.
    """
    df = pd.DataFrame(listofzips)

    # Rename columns
    # If they have a '_' character, return right most string, else return original column header
    df.columns = [c[c.rfind('_')+1:] if '_' in c else c for c in df.columns]
    return df

def create_report(source, zips, filename):
    """Combine the original file with the Geocodio results and save to output file
    location.

    Arguments:
        source {dataframe} -- dataframe of source data
        zips {dataframe} -- returned dataframe from flatzips_to_df()
        filename {str} -- name of the output file

    Returns:
        dataframe -- dataframe of combined results.
    """
    df = pd.concat([source, zips], axis=1, sort=False)
    df.to_csv(report_path / filename)
    return df

In [4]:
# Create DataFrame from source file
df_source = pd.read_csv(source_path / source_filename)

# Run list of lat longs through Geocodio API and save them to a file
# Retrieve filename
geofile_name = get_geoinfo(get_latlong(df_source))
geofile_path = interim_path / geofile_name

# Open file and import into a dataframe
df_geo = pd.read_json(geofile_path)

# Flatten json structure and return the most accurate record per lat/long
df_zips = flatzips_to_df(flat_zips(df_geo))

# Move interim file to processed
geofile_path.replace(processed_path / geofile_name)

# Create output file
create_report(df_source, df_zips, output_filename)

Unnamed: 0,fid,Latitude,Longitude,ep50k,ep40k,newsales,total_sale,accuracy,type,city,...,postdirectional,predirectional,state,street,suffix,zip,address,lat,lng,source
0,1,43.453270,-71.563725,934,797,1819,4628,1.00,rooftop,Tilton,...,,,NH,Tilton,Rd,03276,"40 Tilton Rd, Tilton, NH 03276",43.453220,-71.563417,Statewide
1,2,43.535919,-71.465683,1018,895,2466,5096,1.00,rooftop,Laconia,...,,,NH,Bisson,Ave,03246,"59 Bisson Ave, Laconia, NH 03246",43.535827,-71.465494,Statewide
2,3,43.189023,-71.502413,1736,1521,4181,6739,1.00,nearest_street,Concord,...,,,NH,Us Hwy 3,,03301,"143 Us Hwy 3, Concord, NH 03301",43.188451,-71.502693,TIGER/Line® dataset from the US Census Bureau
3,4,43.175952,-70.868042,549,507,1539,2557,1.00,rooftop,Dover,...,,,NH,Dover Point,Rd,03820,"1 Dover Point Rd, Dover, NH 03820",43.175961,-70.868043,Statewide
4,5,43.193601,-71.526021,293,264,1543,2399,1.00,nearest_street,Concord,...,,,NH,Us Hwy 3,,03301,"99 Us Hwy 3, Concord, NH 03301",43.193999,-71.527076,TIGER/Line® dataset from the US Census Bureau
5,6,43.174826,-71.528539,503,429,0,812,1.00,nearest_street,Concord,...,,,NH,I 93,,03301,"I 93, Concord, NH 03301",43.174440,-71.531834,TIGER/Line® dataset from the US Census Bureau
6,7,43.245885,-70.898873,452,214,0,739,1.00,rooftop,Somersworth,...,,,NH,Rt 108,,03878,"221 Rt 108, Somersworth, NH 03878",43.246272,-70.898092,Statewide
7,8,43.071079,-70.780773,275,230,682,1275,1.00,nearest_street,Portsmouth,...,,,NH,Us Hwy 1st,Byp,03801,"524 Us Hwy 1st Byp, Portsmouth, NH 03801",43.071418,-70.779595,TIGER/Line® dataset from the US Census Bureau
8,9,43.077191,-70.785097,242,205,570,1086,1.00,nearest_street,Portsmouth,...,,,NH,Us Hwy 4,,03801,"195 Us Hwy 4, Portsmouth, NH 03801",43.077100,-70.785834,TIGER/Line® dataset from the US Census Bureau
9,10,43.241101,-70.898323,270,234,430,868,0.99,rooftop,Somersworth,...,,,NH,Rt 108,,03878,"112 Rt 108, Somersworth, NH 03878",43.241144,-70.899569,Statewide
