# Use the URL from NASA's Open Data Portal (https://data.nasa.gov/Space-Science/Meteorite-Landings/gh4g-9sfh) API to request data and bring it in as JSON. 

## Then extract the following features from that data to create a dataframe(columns should be in the order listed below):
ID
Year
Fall
Name
NameType
Mass
Latitude
Longitude

In [193]:
import requests  #similar to urllib, this library allows a computer to ping a website
import json      #library to handle JSON formatted data
import pandas as pd
import numpy as np
import re #library for regular expressions

In [194]:
#filepath = "datasets/Meteorite_Landings.json"
#filepath = "datasets/test_json.json"
#sandbox database to practice getting data from their API
#url = r"https://data.nasa.gov/Space-Science/Meteorite-Landings/gh4g-9sfh"
url = r"https://data.nasa.gov/resource/gh4g-9sfh.json"

In [195]:
# Defines empty lists for each value we need
ID = []     #id
Year = []   #year
Fall = []   #fall
Name = []   #name
Type = []   #nametype
Mass = []   #mass
Lat = []    #reclat
Lon = []    #reclong

In [196]:
# In the request to the website, give it the url and add the API key at the end
response = requests.get(url)
response

<Response [200]>

In [197]:
nasadata = response.json()
#nasadata

In [198]:
# Verify that this is a list type
type(nasadata)

list

In [199]:
# Verify there are 1000 records
len(nasadata)

1000

In [200]:
# For one element, show the key names (columns)
nasadata[0]

{'name': 'Aachen',
 'id': '1',
 'nametype': 'Valid',
 'recclass': 'L5',
 'mass': '21',
 'fall': 'Fell',
 'year': '1880-01-01T00:00:00.000',
 'reclat': '50.775000',
 'reclong': '6.083330',
 'geolocation': {'latitude': '50.775', 'longitude': '6.08333'}}

In [201]:
# Verify inside the list is a list of dictionaries
type(nasadata[0])

dict

In [202]:
# Number of columns in 1 dictionary inside nasadata list of dictionaries
len(nasadata[0].keys())
print(nasadata[0].values())
print(nasadata[0].keys())
#nasadata

dict_values(['Aachen', '1', 'Valid', 'L5', '21', 'Fell', '1880-01-01T00:00:00.000', '50.775000', '6.083330', {'latitude': '50.775', 'longitude': '6.08333'}])
dict_keys(['name', 'id', 'nametype', 'recclass', 'mass', 'fall', 'year', 'reclat', 'reclong', 'geolocation'])


In [None]:
# Not needed - modifies list to numpy ndarray
d = {}
name_list = [d['name'] for d in nasadata if 'name' in d]
print(len(name_list))
print(name_list)
idx = int(0)
y = {}
x = {}
while idx < len(name_list):  
#    x = {'name' : name_list[idx]}
    x['name'] = name_list[idx]
    d = dict(list(x.items()))
    idx = idx + 1

d

In [203]:
# Make dataframe from the default - list of dictionaries
df = pd.DataFrame(nasadata)
#df.head()

In [205]:
# Shows dataFrame columns
df.columns

Index([':@computed_region_cbhk_fwbd', ':@computed_region_nnqa_25f4', 'fall',
       'geolocation', 'id', 'mass', 'name', 'nametype', 'recclass', 'reclat',
       'reclong', 'year'],
      dtype='object')

In [206]:
# Drop columns we don't want
df.drop(':@computed_region_cbhk_fwbd', axis=1, inplace=True)
df.drop(':@computed_region_nnqa_25f4', axis=1, inplace=True)
df.head()

Unnamed: 0,fall,geolocation,id,mass,name,nametype,recclass,reclat,reclong,year
0,Fell,"{'latitude': '50.775', 'longitude': '6.08333'}",1,21,Aachen,Valid,L5,50.775,6.08333,1880-01-01T00:00:00.000
1,Fell,"{'latitude': '56.18333', 'longitude': '10.23333'}",2,720,Aarhus,Valid,H6,56.18333,10.23333,1951-01-01T00:00:00.000
2,Fell,"{'latitude': '54.21667', 'longitude': '-113.0'}",6,107000,Abee,Valid,EH4,54.21667,-113.0,1952-01-01T00:00:00.000
3,Fell,"{'latitude': '16.88333', 'longitude': '-99.9'}",10,1914,Acapulco,Valid,Acapulcoite,16.88333,-99.9,1976-01-01T00:00:00.000
4,Fell,"{'latitude': '-33.16667', 'longitude': '-64.95'}",370,780,Achiras,Valid,L6,-33.16667,-64.95,1902-01-01T00:00:00.000


In [207]:
# Show number of DataFrame keys
len(df.keys())

10

In [208]:
len(df.values)

1000

In [210]:
# Detect null values
df.isnull().sum()

fall            0
geolocation    12
id              0
mass           28
name            0
nametype        0
recclass        0
reclat         12
reclong        12
year            1
dtype: int64

In [211]:
df.isna().sum()

fall            0
geolocation    12
id              0
mass           28
name            0
nametype        0
recclass        0
reclat         12
reclong        12
year            1
dtype: int64

In [212]:
df.dropna(inplace=True)
print(len(df.values))
#df.fillna(int(0), inplace=True)
df.isna().sum()

959


fall           0
geolocation    0
id             0
mass           0
name           0
nametype       0
recclass       0
reclat         0
reclong        0
year           0
dtype: int64

In [213]:
copy_df = df.copy()
dataDict = copy_df.to_dict('records');
type(dataDict)
len(dataDict)

959

In [214]:
# Loop throw rows of dataDict
for rec in dataDict:
    ID.append(rec['id'])
    Year.append(rec['year'])   
    Fall.append(rec['fall'])
    Name.append(rec['name'])
    Type.append(rec['nametype'])
    Mass.append(rec['mass'])
    geo_dict = rec['geolocation']
    if len(geo_dict) == 2:
        Lat.append(geo_dict['latitude'])
        Lon.append(geo_dict['longitude'])
    
len(geo_dict)

2

In [215]:
print(len(ID))
print(len(Year))
print(len(Fall))
print(len(Name))
print(len(Type))
print(len(Mass))
print(len(Lat))
print(len(Lon))

959
959
959
959
959
959
959
959


In [216]:
# Save data
df.to_csv('NASA_Data.csv');