In [1]:
import requests
from getpass import getpass
import pandas as pd
from datetime import datetime, timedelta
from elasticsearch import Elasticsearch, helpers

In [2]:
def connect_to_nasa():
    url = "https://api.nasa.gov/neo/rest/v1/feed"
    nasa_api_key = getpass("NASA API Key: ")
    today = datetime.now()
    params = {
        "api_key": nasa_api_key,
        "start_date": today - timedelta(days=7),
        "end_date": datetime.now(),
    }
    return requests.get(url, params).json()

In [3]:
response = connect_to_nasa()

NASA API Key:  ········


In [4]:
def create_df(response):
    all_objects = []
    for date, objects in response["near_earth_objects"].items():
        for obj in objects:
            obj["close_approach_date"] = date
            all_objects.append(obj)
    df = pd.json_normalize(all_objects)
    return df.drop("close_approach_data", axis=1)

In [5]:
df = create_df(response)
df.head()

Unnamed: 0,id,neo_reference_id,name,nasa_jpl_url,absolute_magnitude_h,is_potentially_hazardous_asteroid,is_sentry_object,close_approach_date,links.self,estimated_diameter.kilometers.estimated_diameter_min,estimated_diameter.kilometers.estimated_diameter_max,estimated_diameter.meters.estimated_diameter_min,estimated_diameter.meters.estimated_diameter_max,estimated_diameter.miles.estimated_diameter_min,estimated_diameter.miles.estimated_diameter_max,estimated_diameter.feet.estimated_diameter_min,estimated_diameter.feet.estimated_diameter_max,sentry_data
0,2016657,2016657,16657 (1993 UB),https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,16.55,False,False,2024-10-27,http://api.nasa.gov/neo/rest/v1/neo/2016657?ap...,1.301832,2.910985,1301.832102,2910.985075,0.808921,1.808802,4271.102833,9550.476274,
1,2189700,2189700,189700 (2001 TA45),https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,19.18,False,False,2024-10-27,http://api.nasa.gov/neo/rest/v1/neo/2189700?ap...,0.387753,0.867042,387.75283,867.041687,0.240938,0.538755,1272.154996,2844.625049,
2,3740809,3740809,(2016 BF1),https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,25.4,False,False,2024-10-27,http://api.nasa.gov/neo/rest/v1/neo/3740809?ap...,0.022108,0.049436,22.108281,49.435619,0.013737,0.030718,72.533733,162.190357,
3,3774014,3774014,(2017 HG1),https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,27.5,False,False,2024-10-27,http://api.nasa.gov/neo/rest/v1/neo/3774014?ap...,0.008405,0.018795,8.405334,18.794898,0.005223,0.011679,27.576556,61.663054,
4,3805271,3805271,(2018 HA1),https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,22.1,False,False,2024-10-27,http://api.nasa.gov/neo/rest/v1/neo/3805271?ap...,0.101054,0.225964,101.054342,225.964377,0.062792,0.140408,331.543126,741.352967,


In [6]:
df = df.drop(['links.self', 'sentry_data'], axis=1)

In [7]:
df.isnull().values.any()

False

In [8]:
def connect_to_elastic():
    elasticsearch_endpoint = getpass("Host Address: ")
    elasticsearch_api_key = getpass("Elastic API Key: ")
    return Elasticsearch(hosts=elasticsearch_endpoint, api_key=elasticsearch_api_key)

In [9]:
es = connect_to_elastic()

Host Address:  ········
Elastic API Key:  ········


In [10]:
index_name = "ato"
es.indices.create(index=index_name)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'ato'})

In [11]:
def doc_generator(df, index_name):
    for index, document in df.iterrows():
        yield {
            "_index": index_name,
            "_id": f"{document['id']}",
            "_source": document.to_dict(),
        }

In [12]:
helpers.bulk(es, doc_generator(df, index_name))

(166, [])