### Import libraries

In [1]:
import pandas as pd
import numpy as np

### Read in IP address data

In [2]:
df = pd.read_csv('./data/ipaddresstrain.csv', index_col=0)
df = df.loc[df['ip'].notna()][['ip']]
df = df.sample(100)
df.head(5)

Unnamed: 0,ip
2094,83.254.20.53
6382,86.44.33.121
8807,76.89.129.139
1267,70.95.69.64
5609,96.255.237.170


### Define and run function to grab latitude and longitude from IP address

In [3]:
%%time
# pip install ip2geotools
# https://pypi.org/project/ip2geotools/

from ip2geotools.databases.noncommercial import DbIpCity

def add_lat_lon(df, ip_column):
    '''
    Add lat and long columns to datafram using an API
    '''
    lst_lat = []
    lst_lon = []

    for ip in df[ip_column].tolist():
        try:
            response = DbIpCity.get(ip, api_key='free')
        except:
            lst_lat.append(np.nan)
            lst_lon.append(np.nan)
            continue

        lst_lat.append(response.latitude)
        lst_lon.append(response.longitude)
    
    df['lat'] = lst_lat
    df['lon'] = lst_lon
    
    return df

df = add_lat_lon(df=df, ip_column='ip')
df.head(5)

CPU times: user 2.82 s, sys: 245 ms, total: 3.06 s
Wall time: 1min 15s


Unnamed: 0,ip,lat,lon
2094,83.254.20.53,59.4147,17.921818
6382,86.44.33.121,53.349764,-6.260273
8807,76.89.129.139,34.14835,-118.001452
1267,70.95.69.64,32.717421,-117.162771
5609,96.255.237.170,38.894985,-77.036571


### Define and run function to grab geographic information by latitude and longitude

In [4]:
%%time
# pip install geopy
import geopy

def add_geo_info(df, user_agent, lat_column, lon_column):
    '''
    Use geopy API to gather geographic information based on latitude and longitude
    '''

    def get_geo_info(df, geolocator, lat_field, lon_field):
        try:
            location = geolocator.reverse((df[lat_field], df[lon_field]))
            return location.raw['address']
        except:
            return np.nan

    # Instantiate geopy Nominatim object
    geolocator = geopy.Nominatim(user_agent=user_agent)
    
    # Create dataframe with dictionary of geographic info and concatenate to large original dataframe
    dct_geo_info = df.apply(get_geo_info, axis=1, geolocator=geolocator, lat_field=lat_column, lon_field=lon_column)
    df_geo_info = pd.DataFrame(dct_geo_info)
    df_concat = pd.concat([df, df_geo_info[0].apply(pd.Series)], axis=1, sort=False)
    
    # Rearrange and name columns
    lst_col = [name for name in df_concat.columns.tolist() if name not in ['postcode', 'city', 'country']]
    lst_col.insert(3,'postcode')
    lst_col.insert(4,'city')
    lst_col.insert(5,'country')
    df_concat = df_concat[lst_col]
    df_concat.rename(columns={"postcode": "zip_code"}, inplace=True)

    return df_concat

user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
df = add_geo_info(df=df, user_agent=user_agent, lat_column='lat', lon_column='lon')
df.head(5)

CPU times: user 1.93 s, sys: 151 ms, total: 2.08 s
Wall time: 2min 51s


Unnamed: 0,ip,lat,lon,zip_code,city,country,0,amenity,borough,building,...,residential,retail,road,shop,state,state_district,suburb,tourism,town,village
2094,83.254.20.53,59.4147,17.921818,16432.0,,Sverige,,,,,...,,,Kotkagatan,,,,Akalla,,,
6382,86.44.33.121,53.349764,-6.260273,,Dublin,Ireland,,,,,...,,,,,,Leinster,,,,
8807,76.89.129.139,34.14835,-118.001452,91016.0,,United States of America,,,,,...,,,West Lime Avenue,,California,,,,Monrovia,
1267,70.95.69.64,32.717421,-117.162771,92101.0,San Diego,United States of America,,,,,...,,,Civic Center Plz,,California,,Banker's Hill,Juan Rodriguez Cabrillo's discovery of San Die...,,
5609,96.255.237.170,38.894985,-77.036571,,,United States of America,,,,,...,,,Ellipse Road Northwest,,District of Columbia,,,,,


### Summary
 - By using two API's, *ip2geotools* & *geopy*, we were able to:
     - Grab the latitude and longitude from the IP address
     - Grab the zip code and other geographical information from the latitude and longitude

In [5]:
df[['ip', 'lat', 'lon', 'zip_code', 'city', 'country']].head(5)

Unnamed: 0,ip,lat,lon,zip_code,city,country
2094,83.254.20.53,59.4147,17.921818,16432.0,,Sverige
6382,86.44.33.121,53.349764,-6.260273,,Dublin,Ireland
8807,76.89.129.139,34.14835,-118.001452,91016.0,,United States of America
1267,70.95.69.64,32.717421,-117.162771,92101.0,San Diego,United States of America
5609,96.255.237.170,38.894985,-77.036571,,,United States of America
