In [216]:
%matplotlib inline
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
# from citipy import citipy

# https://pypi.org/project/pygeocoder/
from pygeocoder import Geocoder
import reverse_geocoder as rg

# Google developer API key
from config import gkey

In [261]:
# File to Load
data_to_load = "data/Meteorite_Landings.csv"
meteor_landings = pd.read_csv(data_to_load, parse_dates=True)

# Display the data table for preview
meteor_landings.head()

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation,States,Counties
0,Aachen,1,Valid,L5,21.0,Fell,01/01/1880 12:00:00 AM,50.775,6.08333,"(50.775, 6.08333)",,
1,Aarhus,2,Valid,H6,720.0,Fell,01/01/1951 12:00:00 AM,56.18333,10.23333,"(56.18333, 10.23333)",,
2,Abee,6,Valid,EH4,107000.0,Fell,01/01/1952 12:00:00 AM,54.21667,-113.0,"(54.21667, -113.0)",,
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,01/01/1976 12:00:00 AM,16.88333,-99.9,"(16.88333, -99.9)",,
4,Achiras,370,Valid,L6,780.0,Fell,01/01/1902 12:00:00 AM,-33.16667,-64.95,"(-33.16667, -64.95)",,


In [276]:
# Refine source data into a starting DataFrame
meteor_landings_df = meteor_landings.filter(['name','mass (g)','fall','year','reclat','reclong'], axis=1)
meteor_landings_df = meteor_landings_df.rename(columns={'mass (g)': 'mass',
                                                        'year': 'date',
                                                        'reclat': 'latitude',
                                                        'reclong': 'longitude'})

print(len(meteor_landings_df))
meteor_landings_df.head()

45716


Unnamed: 0,name,mass,fall,date,latitude,longitude
0,Aachen,21.0,Fell,01/01/1880 12:00:00 AM,50.775,6.08333
1,Aarhus,720.0,Fell,01/01/1951 12:00:00 AM,56.18333,10.23333
2,Abee,107000.0,Fell,01/01/1952 12:00:00 AM,54.21667,-113.0
3,Acapulco,1914.0,Fell,01/01/1976 12:00:00 AM,16.88333,-99.9
4,Achiras,780.0,Fell,01/01/1902 12:00:00 AM,-33.16667,-64.95


In [277]:
# Remove dates outside 584 year range and replace with NaT
meteor_landings_df['date'] = pd.to_datetime(meteor_landings_df['date'], errors='coerce').dt.year

In [278]:
# Create a copy of date cleared DataFrame for use for the scatter plot later
meteor_landings_nat = meteor_landings_df

# Remove 'date' Nat and zero mass
meteor_landings_nat = meteor_landings_nat.dropna(subset=['date'])
meteor_landings_nat = meteor_landings_nat[meteor_landings_nat.mass > 0]

print(len(meteor_landings_nat))
meteor_landings_nat.head()

45277


Unnamed: 0,name,mass,fall,date,latitude,longitude
0,Aachen,21.0,Fell,1880.0,50.775,6.08333
1,Aarhus,720.0,Fell,1951.0,56.18333,10.23333
2,Abee,107000.0,Fell,1952.0,54.21667,-113.0
3,Acapulco,1914.0,Fell,1976.0,16.88333,-99.9
4,Achiras,780.0,Fell,1902.0,-33.16667,-64.95


In [279]:
# Clean-up DataFrame:
#     Remove rows with NaN/NaT values in year, reclat or reclong
#     Filter to remove landings below -59 to exlude Antarctic

print('Before: ', len(meteor_landings_df))

meteor_landings_df = meteor_landings_df.dropna(subset=['date','latitude','longitude'])
meteor_landings_df = meteor_landings_df[meteor_landings_df.latitude != 0]
meteor_landings_df = meteor_landings_df[meteor_landings_df.latitude > -59]

print('After: ', len(meteor_landings_df))

Before:  45716
After:  9693


In [280]:
# Add two blank columns that we'll need for reverse gecoding
meteor_landings_df['admin_level_1'] = ''
meteor_landings_df['country'] = ''

meteor_landings_df.head()

Unnamed: 0,name,mass,fall,date,latitude,longitude,admin_level_1,country
0,Aachen,21.0,Fell,1880.0,50.775,6.08333,,
1,Aarhus,720.0,Fell,1951.0,56.18333,10.23333,,
2,Abee,107000.0,Fell,1952.0,54.21667,-113.0,,
3,Acapulco,1914.0,Fell,1976.0,16.88333,-99.9,,
4,Achiras,780.0,Fell,1902.0,-33.16667,-64.95,,


In [281]:
# Create a new DataFrame of a sample of the source
sample_size = 50

rand_meteor_landings = meteor_landings_df.sample(n=sample_size)
rand_meteor_landings.reset_index(drop=True, inplace=True)

rand_meteor_landings

Unnamed: 0,name,mass,fall,date,latitude,longitude,admin_level_1,country
0,Dar al Gani 719,62.0,Found,1999.0,27.15,16.2,,
1,Dhofar 416,281.0,Found,2001.0,18.76427,54.23325,,
2,Acfer 021,2140.0,Found,1989.0,27.55,3.61667,,
3,Sayh al Uhaymir 513,154.2,Found,2009.0,20.12538,56.55335,,
4,Jiddat al Harasis 446,210.578,Found,2007.0,19.60792,56.43172,,
5,Modoc (1948),1800.0,Found,1948.0,38.5,-101.1,,
6,Mount Margaret,850.0,Found,1972.0,-28.83333,122.18333,,
7,Kaba,3000.0,Fell,1857.0,47.35,21.3,,
8,Northwest Africa 027,705.0,Found,1999.0,30.33333,-5.83333,,
9,O'Malley 015,63.9,Found,2011.0,-30.53842,131.25525,,


In [282]:
keyed_geocoder = Geocoder(api_key = gkey)

In [223]:
# Use Google Geocoding API to reverse lookup lat & long to get country and state

for row in range(len(rand_meteor_landings)):
    result = keyed_geocoder.reverse_geocode(rand_meteor_landings['latitude'][row],
                                            rand_meteor_landings['longitude'][row])
    rand_meteor_landings.loc[row,'admin_level_1'] = result.administrative_area_level_1
    rand_meteor_landings.loc[row,'country'] = result.country

print(len(rand_meteor_landings))

50


In [225]:
rand_meteor_landings.head()

Unnamed: 0,name,mass,date,latitude,longitude,admin_level_1,country
0,Hammadah al Hamra 001,19418.0,1990.0,29.0,12.23333,Jabal al Gharbi District,Libya
1,Hammadah al Hamra 127,186.0,1995.0,28.48433,12.8575,Wadi al Shatii District,Libya
2,Harper Dry Lake 017,16.4,2007.0,35.04102,-117.27465,California,United States
3,Roosevelt County 108,4.75,2004.0,34.21667,-103.18333,New Mexico,United States
4,Dar al Gani 849,933.0,1998.0,26.88333,16.68817,Jufra District,Libya


In [355]:
# Create a DataFrame for the scatter plot
bubble_df = meteor_landings_nat.groupby(['date'], as_index=False)\
.agg({'mass': 'sum', 'name': 'count'})
bubble_df = bubble_df.rename(columns={'date': 'year', 'name': 'count'})

print(len(bubble_df))
bubble_df.head(10)

239


Unnamed: 0,year,mass,count
0,1688.0,695.0,1
1,1715.0,7000.0,1
2,1716.0,1700.0,1
3,1723.0,39.0,1
4,1724.0,98000.0,1
5,1740.0,24700.0,1
6,1741.0,14360.0,1
7,1749.0,700000.0,1
8,1751.0,49000.0,1
9,1753.0,21540.0,2


In [349]:
# Plot x-axis = bins of half-centuries
# y-axis = sum of landings

bins = [1675,1700,1725,1750,1775,1800,1825,1850,1875,1900,1925,1950,1975,2000,2025]
bin_labels = ['1676-1700','1701-1725','1726-1750','1751-1775','1776-1800',
              '1801-1825','1826-1850','1851-1875','1876-1900','1901-1925','1926-1950',
              '1951-1975','1976-2000','2001-2025']

In [358]:
# Bin into 25 year groups
bubble_df['years_bin'] = pd.cut(bubble_df['year'], bins, labels=bin_labels)

bubble_df.head()

Unnamed: 0,year,mass,count,years_bin
0,1688.0,695.0,1,1676-1700
1,1715.0,7000.0,1,1701-1725
2,1716.0,1700.0,1,1701-1725
3,1723.0,39.0,1,1701-1725
4,1724.0,98000.0,1,1701-1725


In [360]:
# Group by the bins
plot_df = bubble_df.groupby(['years_bin'], as_index=True)\
.agg({'mass': 'sum', 'count': 'sum'})

plot_df

Unnamed: 0_level_0,mass,count
years_bin,Unnamed: 1_level_1,Unnamed: 2_level_1
1676-1700,695.0,1
1701-1725,106739.0,4
1726-1750,739060.0,3
1751-1775,101940.0,8
1776-1800,10213580.0,17
1801-1825,65928240.0,69
1826-1850,32571920.0,100
1851-1875,77780320.0,206
1876-1900,77330860.0,306
1901-1925,124752400.0,372


In [361]:
# Variables to control alpha transparency an scatter plot point size
plot_alpha = 0.75 #alpha value
scatter_size = 1 #scatter size multiplier

# Build the scatter plots for each city types
plt.scatter(x=plot_df['years_bin'], y=plot_df['count'], marker="o", facecolors="coral", edgecolors="black",
            s=plot_df['mass']*scatter_size, alpha=plot_alpha)
plt.grid()

# Incorporate the other graph properties
plt.title('Meteor Landings Recorded Since 1600')
plt.xlabel('Grouped Years')
plt.ylabel('Number of Meteors')

# Create a legend
# lgnd_handle_size = 30
# lgnd = plt.legend(['Urban','Suburban','Rural'], title='City Types', fontsize=8)
# lgnd.legendHandles[0]._sizes = [lgnd_handle_size]
# lgnd.legendHandles[1]._sizes = [lgnd_handle_size]
# lgnd.legendHandles[2]._sizes = [lgnd_handle_size]

# Incorporate a text label regarding circle size
plt.text(0.92, .6, 'Note:\nCircle size correlates with total meteor mass.',\
         transform=plt.gcf().transFigure, fontsize=10, ha='left')

# Save Figure
# plt.savefig('bubble.png')

KeyError: 'years_bin'

In [175]:
# basemap world with landings

In [176]:
# Overlay California sum of landings & mass over map