![PyData_logo](./static/pydata-logo-madrid-2016.png)

# Remove Before Flight
## Analyzing Flight Safety Data with Python

### Getting missing values of latitude and longitude with Google geocoding API 

###### https://developers.google.com/maps/documentation/geocoding/intro?hl=es#geocoding 

In [1]:
import pandas as pd
import numpy as np

%matplotlib notebook
import matplotlib.pyplot as plt

In [2]:
# Some configuration options:
pd.set_option('max_columns', 70)
pd.set_option('max_rows', 50)

In [3]:
# Read the missing locations
need_location = pd.read_csv('./data/need_location.csv', index_col='Unnamed: 0')
need_location.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4307 entries, 46 to 76878
Data columns (total 5 columns):
ev_city       4285 non-null object
ev_country    4170 non-null object
ev_state      2936 non-null object
latitude      0 non-null float64
longitude     1 non-null object
dtypes: float64(1), object(4)
memory usage: 201.9+ KB


In [4]:
need_location.tail()

Unnamed: 0,ev_city,ev_country,ev_state,latitude,longitude
76812,Banbury,UK,,,
76836,VERMILLION 255B,,GM,,
76846,Kigali,RW,,,
76850,JUNEAU,USA,AK,,
76878,Cozumel,MX,,,


In [5]:
from utils.geoloc import GoogleMapsClient

In [6]:
# read key from file:
# Your API key here https://developers.google.com/maps/documentation/geocoding/get-api-key
with open('./key.txt') as f:
    key = f.readline().rstrip('\n')

In [7]:
Session = GoogleMapsClient(key=key)

index = need_location.index
cities = need_location['ev_city'].values
states = need_location['ev_state'].values
countries = need_location['ev_country'].values

As the number of request is limited to 2500 requests per day, the dataframe needs to be splitted:

In [8]:
# Part 1: 0:1999
for ii in range(2000):
    location = Session.get_lat_lon_from_city_country(cities[ii], countries[ii], states[ii])
    if location != []:
        lat = location[0]
        lon = location[1]
        need_location.set_value(index[ii], 'latitude', lat)
        need_location.set_value(index[ii], 'longitude', lon)
        
need_location.iloc[0:2000].to_csv('./data/have_location_part1.csv')

wrong request: TANZANIA       , SU
wrong request: Makarakskiy, RS
wrong request: Egelsbach, GE
wrong request: W CAMERON BLK  , GM
wrong request: Sazava, EZ
wrong request: Reichelheim, GE
wrong request: GULF OF MEXICO , GM
wrong request: KAFOUTINE, SG
wrong request: SOUTH PELTO    , GM
wrong request: Sulaimaniya, IZ
wrong request: VOLCANOES NTL P, HI, USA
wrong request: JEREMIE        , HA
wrong request: BELL MOUNTAIN  , AK, USA
wrong request: Alcora, SP
wrong request: EUGENE ISLAND  , GM
wrong request: AGO-6, AY
wrong request: HIGH ISLAND BLK, GM
wrong request: WALKER'S CAY   , BF
wrong request: Alcorsia, Teruel, Spain, SP
wrong request: FREEPORT, BF
wrong request: MISSING ACFT, AO
wrong request: WEST DELTA 105 , GM
wrong request: Apoquindo, CI
wrong request: E CAMERON BLK 2, GM
wrong request: Steinfeld, GE
wrong request: MT MCKINLEY VIL, AK, USA
wrong request: Santa Juana, CI
wrong request: Moixent, SP
wrong request: MIAZAL         , EC
wrong request: Subic Bay, RP
wrong request: Mera

In [9]:
need_location.iloc[0:2000]

Unnamed: 0,ev_city,ev_country,ev_state,latitude,longitude
46,NIGHTMUTE,USA,AK,60.479444,-164.724
50,UTICA,USA,NY,43.100903,-75.2327
56,Toronto,CA,,43.653226,-79.3832
92,Sydney,AS,,-33.972048,151.144
116,GRAND JUNCTION,USA,CO,39.063871,-108.551
120,JUNEAU,USA,AK,58.301944,-134.42
129,YUBA CITY,USA,CA,39.140448,-121.617
149,Sidney,USA,MT,47.716684,-104.156
153,Lake in the Woods,CA,ON,49.647675,-94.5553
163,KEMMERER,USA,WY,41.792447,-110.538


In [11]:
need_location.iloc[2000:]

Unnamed: 0,ev_city,ev_country,ev_state,latitude,longitude
35161,Miami,USA,FL,,
35187,CEDAR RAPIDS,USA,IA,,
35202,BURBANK,USA,CA,,
35212,BEDFORD PARK,USA,IL,,
35238,GARDNER,USA,KS,,
35240,FORT MYERS,USA,FL,,
35244,BETHEL,USA,AK,,
35250,"Naha, Okinawa",JA,,,
35272,Godensee,AU,,,
35275,MANILA,RP,,,


In [12]:
# Part 2: 2000:end
for ii in range(2000, len(need_location)):
    location = Session.get_lat_lon_from_city_country(cities[ii], countries[ii], states[ii])
    if location != []:
        lat = location[0]
        lon = location[1]
        need_location.set_value(index[ii], 'latitude', lat)
        need_location.set_value(index[ii], 'longitude', lon)
        

need_location.iloc[2000:].to_csv('./data/have_location_part2.csv')

wrong request: Kansai, JA
wrong request: Sucre, FN, BL
wrong request: Evora, FN, PO
wrong request: Googlong, NSW, AS
wrong request: Bosnia, BK
wrong request: TREASURE CAY   , BF
wrong request: W. CAMERON 617 , GM
wrong request: VERMILION 250  , GM
wrong request: TREASURE CAY   , BF
wrong request: DUNCAN TOWN    , BF
wrong request: EUCEMBUNE      , AS
wrong request: SHIP SHOAL 225 , GM
wrong request: TUCUTI , PM
wrong request: Makung Island, TW
wrong request: Rumbek, SU
wrong request: EC 270, GM, USA
wrong request: GULF-OF-MEXICO , GM
wrong request: GULF OF MEXICO , GM
wrong request: GULF OF MEXICO , GM
wrong request: STELLA MARIS   , BF
wrong request: Goma, CF
wrong request: SOTO CANO, HO
wrong request: PERDE BAY      , AK, USA
wrong request: RANCHO SANTA YN, MX
wrong request: Bourke , Australia, AS
wrong request: MIXCOLAJA      , GT


Timeout: 

In [13]:
need_location.iloc[2000:]

Unnamed: 0,ev_city,ev_country,ev_state,latitude,longitude
2000,,,,25.761680,-80.1918
2001,,,,41.977880,-91.6656
2002,,,,34.180839,-118.309
2003,,,,41.764172,-87.7813
2004,,,,38.810841,-94.9272
2005,,,,26.640628,-81.8723
2006,,,,60.792222,-161.756
2007,,,,26.195134,127.67
2008,,,,48.143000,11.4249
2009,,,,14.599512,120.984


In [35]:
# Notebook style
from IPython.core.display import HTML
css_file = './static/style.css'
HTML(open(css_file, "r").read())