In [216]:
%matplotlib inline
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
# from citipy import citipy

# https://pypi.org/project/pygeocoder/
from pygeocoder import Geocoder
import reverse_geocoder as rg

# Google developer API key
from config import gkey

In [217]:
# File to Load
data_to_load = "data/Meteorite_Landings.csv"
meteor_landings = pd.read_csv(data_to_load, parse_dates=True)

# Display the data table for preview
meteor_landings.head()

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation,States,Counties
0,Aachen,1,Valid,L5,21.0,Fell,01/01/1880 12:00:00 AM,50.775,6.08333,"(50.775, 6.08333)",,
1,Aarhus,2,Valid,H6,720.0,Fell,01/01/1951 12:00:00 AM,56.18333,10.23333,"(56.18333, 10.23333)",,
2,Abee,6,Valid,EH4,107000.0,Fell,01/01/1952 12:00:00 AM,54.21667,-113.0,"(54.21667, -113.0)",,
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,01/01/1976 12:00:00 AM,16.88333,-99.9,"(16.88333, -99.9)",,
4,Achiras,370,Valid,L6,780.0,Fell,01/01/1902 12:00:00 AM,-33.16667,-64.95,"(-33.16667, -64.95)",,


In [218]:
# Refine source data into a starting DataFrame
meteor_landings_df = meteor_landings.filter(['name','mass (g)','year','reclat','reclong'], axis=1)
meteor_landings_df = meteor_landings_df.rename(columns={'mass (g)': 'mass',
                                                        'year': 'date',
                                                        'reclat': 'latitude',
                                                        'reclong': 'longitude'})

meteor_landings_df['admin_level_1'] = ''
meteor_landings_df['country'] = ''

meteor_landings_df.head()

Unnamed: 0,name,mass,date,latitude,longitude,admin_level_1,country
0,Aachen,21.0,01/01/1880 12:00:00 AM,50.775,6.08333,,
1,Aarhus,720.0,01/01/1951 12:00:00 AM,56.18333,10.23333,,
2,Abee,107000.0,01/01/1952 12:00:00 AM,54.21667,-113.0,,
3,Acapulco,1914.0,01/01/1976 12:00:00 AM,16.88333,-99.9,,
4,Achiras,780.0,01/01/1902 12:00:00 AM,-33.16667,-64.95,,


In [219]:
# Remove dates outside 584 year range and replace with NaT
meteor_landings_df['date'] = pd.to_datetime(meteor_landings_df['date'], errors='coerce').dt.year

meteor_landings_df.head()

Unnamed: 0,name,mass,date,latitude,longitude,admin_level_1,country
0,Aachen,21.0,1880.0,50.775,6.08333,,
1,Aarhus,720.0,1951.0,56.18333,10.23333,,
2,Abee,107000.0,1952.0,54.21667,-113.0,,
3,Acapulco,1914.0,1976.0,16.88333,-99.9,,
4,Achiras,780.0,1902.0,-33.16667,-64.95,,


In [220]:
# Clean-up DataFrame:
#     Remove rows with NaN/NaT values in year, reclat or reclong
#     Filter to remove landings below -59 to exlude Antarctic

print('Before: ', len(meteor_landings_df))

meteor_landings_df = meteor_landings_df.dropna(subset=['date','latitude','longitude'])
meteor_landings_df = meteor_landings_df[meteor_landings_df.latitude != 0]
meteor_landings_df = meteor_landings_df[meteor_landings_df.latitude > -59]

print('After: ', len(meteor_landings_df))

Before:  45716
After:  9693


In [221]:
keyed_geocoder = Geocoder(api_key = gkey)

In [222]:
# Create a new DataFrame of a sample of the source
sample_size = 50

rand_meteor_landings = meteor_landings_df.sample(n=sample_size)
rand_meteor_landings.reset_index(drop=True, inplace=True)

rand_meteor_landings

Unnamed: 0,name,mass,date,latitude,longitude,admin_level_1,country
0,Hammadah al Hamra 001,19418.0,1990.0,29.0,12.23333,,
1,Hammadah al Hamra 127,186.0,1995.0,28.48433,12.8575,,
2,Harper Dry Lake 017,16.4,2007.0,35.04102,-117.27465,,
3,Roosevelt County 108,4.75,2004.0,34.21667,-103.18333,,
4,Dar al Gani 849,933.0,1998.0,26.88333,16.68817,,
5,Nagaria,20.0,1875.0,26.98333,78.21667,,
6,Cat Mountain,2700.0,1981.0,32.14833,-111.11167,,
7,Ramlat as Sahmah 236,43.2,2005.0,20.56537,56.16462,,
8,El Médano 004,151.0,2010.0,-24.85,-70.53333,,
9,Delphos (c),885.0,1977.0,34.05,-103.63333,,


In [223]:
# Use Google Geocoding API to reverse lookup lat & long to get country and state

for row in range(len(rand_meteor_landings)):
    result = keyed_geocoder.reverse_geocode(rand_meteor_landings['latitude'][row],
                                            rand_meteor_landings['longitude'][row])
    rand_meteor_landings.loc[row,'admin_level_1'] = result.administrative_area_level_1
    rand_meteor_landings.loc[row,'country'] = result.country

print(len(rand_meteor_landings))

50


In [224]:
rand_meteor_landings

Unnamed: 0,name,mass,date,latitude,longitude,admin_level_1,country
0,Hammadah al Hamra 001,19418.0,1990.0,29.0,12.23333,Jabal al Gharbi District,Libya
1,Hammadah al Hamra 127,186.0,1995.0,28.48433,12.8575,Wadi al Shatii District,Libya
2,Harper Dry Lake 017,16.4,2007.0,35.04102,-117.27465,California,United States
3,Roosevelt County 108,4.75,2004.0,34.21667,-103.18333,New Mexico,United States
4,Dar al Gani 849,933.0,1998.0,26.88333,16.68817,Jufra District,Libya
5,Nagaria,20.0,1875.0,26.98333,78.21667,Uttar Pradesh,India
6,Cat Mountain,2700.0,1981.0,32.14833,-111.11167,Arizona,United States
7,Ramlat as Sahmah 236,43.2,2005.0,20.56537,56.16462,Al Wusta Governorate,Oman
8,El Médano 004,151.0,2010.0,-24.85,-70.53333,Región de Antofagasta,Chile
9,Delphos (c),885.0,1977.0,34.05,-103.63333,New Mexico,United States


In [174]:
# Plot x-axis = bins of half-centuries
# y-axis = sum of landings
# plot-points = size of point is sum of mass


In [175]:
# basemap world with landings

In [176]:
# Overlay California sum of landings & mass over map

In [212]:
rev_geocode = []

coordinates = meteor_landings.iloc[1]['GeoLocation']
print(coordinates)
result = rg.search(coordinates)
# for row in range(len(rand_meteor_landings)):
#     coordinates = [rand_meteor_landings.iloc[row]['latitude'],rand_meteor_landings.iloc[row]['longitude']]
#     result = rg.search(coordinates)
#     print(coordinates)
    
# #     rev_geocode = result
#     print(result)
# #     rev_geocode.append(result, ignore_index=True)
    
result

(56.18333, 10.23333)


TypeError: Expecting a tuple or a tuple/list of tuples