![PyData_logo](./static/pydata-logo-madrid-2016.png)

# Remove Before Flight
## Analyzing Flight Safety Data with Python

### Getting missing values of latitude and longitude with Google geocoding API 

###### https://developers.google.com/maps/documentation/geocoding/intro?hl=es#geocoding 

In [1]:
import pandas as pd
import numpy as np

%matplotlib notebook
import matplotlib.pyplot as plt

In [2]:
# Some configuration options:
pd.set_option('max_columns', 70)
pd.set_option('max_rows', 50)

In [3]:
# Read the missing locations
need_location = pd.read_csv('./data/need_location.csv', index_col='Unnamed: 0')
need_location.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4307 entries, 46 to 76878
Data columns (total 5 columns):
ev_city       4285 non-null object
ev_country    4170 non-null object
ev_state      2936 non-null object
latitude      0 non-null float64
longitude     1 non-null object
dtypes: float64(1), object(4)
memory usage: 201.9+ KB


In [4]:
need_location.head()

Unnamed: 0,ev_city,ev_country,ev_state,latitude,longitude
46,NIGHTMUTE,USA,AK,,
50,UTICA,USA,NY,,
56,Toronto,CA,,,
92,Sydney,AS,,,
116,GRAND JUNCTION,USA,CO,,


In [5]:
from utils.geoloc import GoogleMapsClient

In [6]:
# read key from file:
# Your API key here https://developers.google.com/maps/documentation/geocoding/get-api-key
with open('./key.txt') as f:
    key = f.readline().rstrip('\n')

In [7]:
Session = GoogleMapsClient(key=key)

index = need_location.index
cities = need_location['ev_city'].values
states = need_location['ev_state'].values
countries = need_location['ev_country'].values

As the number of request is limited to 2500 requests per day, the dataframe needs to be splitted:

In [None]:
# Part 1: 0:1999
for ii in range(2000):
    location = Session.get_lat_lon_from_city_country(cities[ii], countries[ii], states[ii])
    if location != []:
        lat = location[0]
        lon = location[1]
        need_location.set_value(ii, 'latitude', lat)
        need_location.set_value(ii, 'longitude', lon)
        
need_location.iloc[0:2000].to_csv('./data/have_location_part1.csv')

In [None]:
# Part 2: 2000:end
for ii in range(2000, len(need_location)):
    location = Session.get_lat_lon_from_city_country(cities[ii], countries[ii], states[ii])
    if location != []:
        lat = location[0]
        lon = location[1]
        need_location.set_value(ii, 'latitude', lat)
        need_location.set_value(ii, 'longitude', lon)
        

need_location.iloc[2000:].to_csv('./data/have_location_part2.csv')

In [35]:
# Notebook style
from IPython.core.display import HTML
css_file = './static/style.css'
HTML(open(css_file, "r").read())