### Import libraries

In [1]:
import pandas as pd
import numpy as np

### Read in IP address data

In [2]:
df = pd.read_csv('./data/ipaddresstrain.csv', index_col=0)
df = df.loc[df['ip'].notna()][['ip']]
df = df.sample(100)
df.head(5)

Unnamed: 0,ip
7762,81.158.54.161
2882,66.185.200.1
6833,79.97.166.36
4549,70.194.84.171
6900,213.100.108.117


### Define and run function to grab latitude and longitude from IP address

In [3]:
# pip install ip2geotools
# https://pypi.org/project/ip2geotools/

from ip2geotools.databases.noncommercial import DbIpCity

def add_lat_lon(df, ip_column):
    '''
    Add lat and long columns to datafram using an API
    '''
    lst_lat = []
    lst_lon = []

    for ip in df[ip_column].tolist():
        try:
            response = DbIpCity.get(ip, api_key='free')
        except:
            lst_lat.append(np.nan)
            lst_lon.append(np.nan)
            continue

        lst_lat.append(response.latitude)
        lst_lon.append(response.longitude)
    
    df['lat'] = lst_lat
    df['lon'] = lst_lon
    
    return df

df = add_lat_lon(df=df, ip_column='ip')
df.head(5)

Unnamed: 0,ip,lat,lon
7762,81.158.54.161,51.590238,-0.241051
2882,66.185.200.1,46.581497,-81.068538
6833,79.97.166.36,51.897928,-8.470581
4549,70.194.84.171,39.768333,-86.15835
6900,213.100.108.117,59.325117,18.071093


### Define and run function to grab geographic information by latitude and longitude

In [4]:
# pip install geopy
import geopy

def add_geo_info(df, user_agent, lat_column, lon_column):
    '''
    Use geopy API to gather geographic information based on latitude and longitude
    Geographic information: building, house_number, neighbourhood,city, county, state, postcode, country, country_code
    '''

    def get_geo_info(df, geolocator, lat_field, lon_field):
        try:
            location = geolocator.reverse((df[lat_field], df[lon_field]))
            return location.raw['address']
        except:
            return np.nan

    # Instantiate geopy Nominatim object
    geolocator = geopy.Nominatim(user_agent=user_agent)
    
    # Create dataframe with dictionary of geographic info and concatenate to large original dataframe
    dct_geo_info = df.apply(get_geo_info, axis=1, geolocator=geolocator, lat_field=lat_column, lon_field=lon_column)
    df_geo_info = pd.DataFrame(dct_geo_info)
    df_concat = pd.concat([df, df_geo_info[0].apply(pd.Series)], axis=1, sort=False)
    
    # Rearrange and name columns
    lst_col = [name for name in df_concat.columns.tolist() if name not in ['postcode', 'city', 'country']]
    lst_col.insert(3,'postcode')
    lst_col.insert(4,'city')
    lst_col.insert(5,'country')
    df_concat = df_concat[lst_col]
    df_concat.rename(columns={"postcode": "zip_code"}, inplace=True)

    return df_concat

user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
df = add_geo_info(df=df, user_agent=user_agent, lat_column='lat', lon_column='lon')
df.head(5)

Unnamed: 0,ip,lat,lon,zip_code,city,country,0,aeroway,amenity,borough,...,residential,retail,road,shop,state,state_district,suburb,tourism,town,village
7762,81.158.54.161,51.590238,-0.241051,NW9 6ED,London Borough of Barnet,United Kingdom,,,,,...,,,Colindeep Lane,,England,,,,,
2882,66.185.200.1,46.581497,-81.068538,P0M 1B0,,Canada,,,,,...,,,,,Ontario,Northeastern Ontario,,,Azilda,
6833,79.97.166.36,51.897928,-8.470581,T12 T853,Cork,Ireland,,,,,...,,,Oliver Plunkett Street,,,Munster,City Centre,,,
4549,70.194.84.171,39.768333,-86.15835,46254,Indianapolis,United States of America,,,,,...,,,Monument Circle,,Indiana,,,,,
6900,213.100.108.117,59.325117,18.071093,111 29,,Sverige,,,,,...,,,Stortorget,Grillska husets brödbod,,,Gamla stan,,,


### Summary
 - By using two API's, *ip2geotools* & *geopy*, we were able to:
     - Grab the latitude and longitude from the IP address
     - Grab the zip code and other geographical information from the latitude and longitude

In [5]:
df[['ip', 'lat', 'lon', 'zip_code', 'city', 'country']].head(5)

Unnamed: 0,ip,lat,lon,zip_code,city,country
7762,81.158.54.161,51.590238,-0.241051,NW9 6ED,London Borough of Barnet,United Kingdom
2882,66.185.200.1,46.581497,-81.068538,P0M 1B0,,Canada
6833,79.97.166.36,51.897928,-8.470581,T12 T853,Cork,Ireland
4549,70.194.84.171,39.768333,-86.15835,46254,Indianapolis,United States of America
6900,213.100.108.117,59.325117,18.071093,111 29,,Sverige
