In [1]:
import requests
from datetime import datetime, timedelta

def neosapi():
    api_key = "CH58a6JEL5zwrHF3aUCQZMcUZylAzoU9xeUYvRt0"  # <-- your API key here

    now = datetime.utcnow()
    start_date = (now - timedelta(days=0.5)).strftime("%Y-%m-%d")
    end_date = (now + timedelta(days=0.5)).strftime("%Y-%m-%d")

    url = "https://api.nasa.gov/neo/rest/v1/feed"
    params = {
        "start_date": start_date,
        "end_date": end_date,
        "api_key": api_key
    }

    response = requests.get(url, params=params)
    data = response.json()

    neos_list = []

    for date in data.get("near_earth_objects", {}):
        for neo in data["near_earth_objects"][date]:
            approach_data = neo.get("close_approach_data", [])
            if approach_data:
                approach_time = approach_data[0].get("close_approach_date_full")
                neos_list.append({
                    "asteroid_name": neo["name"],
                    "nasa_id": neo["id"],
                    "nasa_site_url": neo["nasa_jpl_url"],
                    "closest_approach_time_to_earth_IST": approach_time,
                    "closest_approach_distance_km": float(approach_data[0]["miss_distance"]["kilometers"]),
                    "velocity_kmph": float(approach_data[0]["relative_velocity"]["kilometers_per_hour"]),
                    "diameter_min_m": float(neo["estimated_diameter"]["meters"]["estimated_diameter_min"]),
                    "diameter_max_m": float(neo["estimated_diameter"]["meters"]["estimated_diameter_max"]),
                    "is_potentially_hazardous": neo["is_potentially_hazardous_asteroid"]
                })

    return neos_list

# Example usage:
print(neosapi())








"""
    Fetches Near-Earth Object (NEO) data from NASA's public API within a
    half-day window around the current UTC time (12 hours before and after now).
    
    Returns:
        A list of dictionaries, each containing detailed information about
        a Near-Earth Object expected to approach Earth in the specified time frame.
    
    Each dictionary contains:
        - name: The official name of the NEO.
        - id: Unique identifier of the NEO.
        - nasa_jpl_url: URL linking to NASA's Jet Propulsion Laboratory page for more info.
        - close_approach_time: Exact date and time when the NEO will closely approach Earth.
        - miss_distance_km: Closest distance the NEO will pass from Earth, in kilometers.
        - velocity_kph: Speed of the NEO relative to Earth, in kilometers per hour.
        - diameter_min_m: Estimated minimum diameter of the NEO, in meters.
        - diameter_max_m: Estimated maximum diameter of the NEO, in meters.
        - is_potentially_hazardous: Boolean indicating if the NEO is classified as potentially hazardous.
    
    Note:
        - Requires a valid NASA API key.
        - Uses NASA's NEO Feed API endpoint.
"""

[{'asteroid_name': '(2012 TG53)', 'nasa_id': '3610030', 'nasa_site_url': 'https://ssd.jpl.nasa.gov/tools/sbdb_lookup.html#/?sstr=3610030', 'closest_approach_time_to_earth_IST': '2025-Jun-26 01:44', 'closest_approach_distance_km': 57245843.4919121, 'velocity_kmph': 76876.3804562626, 'diameter_min_m': 60.8912622106, 'diameter_max_m': 136.1570015386, 'is_potentially_hazardous': False}, {'asteroid_name': '(2015 FQ)', 'nasa_id': '3713320', 'nasa_site_url': 'https://ssd.jpl.nasa.gov/tools/sbdb_lookup.html#/?sstr=3713320', 'closest_approach_time_to_earth_IST': '2025-Jun-26 04:51', 'closest_approach_distance_km': 56614154.547927074, 'velocity_kmph': 70022.5041773768, 'diameter_min_m': 93.0154253961, 'diameter_max_m': 207.9888141418, 'is_potentially_hazardous': False}, {'asteroid_name': '(2015 XF261)', 'nasa_id': '3736497', 'nasa_site_url': 'https://ssd.jpl.nasa.gov/tools/sbdb_lookup.html#/?sstr=3736497', 'closest_approach_time_to_earth_IST': '2025-Jun-26 05:44', 'closest_approach_distance_km':

"\n    Fetches Near-Earth Object (NEO) data from NASA's public API within a\n    half-day window around the current UTC time (12 hours before and after now).\n    \n    Returns:\n        A list of dictionaries, each containing detailed information about\n        a Near-Earth Object expected to approach Earth in the specified time frame.\n    \n    Each dictionary contains:\n        - name: The official name of the NEO.\n        - id: Unique identifier of the NEO.\n        - nasa_jpl_url: URL linking to NASA's Jet Propulsion Laboratory page for more info.\n        - close_approach_time: Exact date and time when the NEO will closely approach Earth.\n        - miss_distance_km: Closest distance the NEO will pass from Earth, in kilometers.\n        - velocity_kph: Speed of the NEO relative to Earth, in kilometers per hour.\n        - diameter_min_m: Estimated minimum diameter of the NEO, in meters.\n        - diameter_max_m: Estimated maximum diameter of the NEO, in meters.\n        - is_p

In [2]:
import pandas as pd
from datetime import datetime
# from nasa_asteroid_tracker import neosapi

#convert to dataframe
df = pd.DataFrame(neosapi())

# Data Type Optimization
df['asteroid_name'] = df['asteroid_name'].astype(str)
df['nasa_id'] = df['nasa_id'].astype(str)
df['nasa_site_url'] = df['nasa_site_url'].astype(str)
df['closest_approach_distance_km'] = df['closest_approach_distance_km'].round(2)
df['velocity_kmph'] = df['velocity_kmph'].round(2)
df['diameter_min_m'] = df['diameter_min_m'].round(2)
df['diameter_max_m'] = df['diameter_max_m'].round(2)
df['is_potentially_hazardous'] = df['is_potentially_hazardous'].astype(bool)

#time ordering -> YYYY-MM-DD HH:MM:SS
df['closest_approach_time_to_earth_IST'] = pd.to_datetime(df['closest_approach_time_to_earth_IST'], format='%Y-%b-%d %H:%M')

#chnage UTC to IST
df['closest_approach_time_to_earth_IST'] = df['closest_approach_time_to_earth_IST'] + pd.Timedelta(hours=5, minutes=30)

#cleaning asteroid_name column and removing brackets
df['asteroid_name'] = df['asteroid_name'].astype(str).str.replace(r'[()]', '', regex=True)

#uppercasing asteroid_name
df['asteroid_name'] = df['asteroid_name'].str.upper()  

#drop duplicates
df.drop_duplicates(subset=['nasa_id','asteroid_name'], inplace=True)

#unique column as data_id combination of date time(including miliseconds)
def generate_data_id(nasa_id):
    now = datetime.now()
          # Format date and time as required (2 digits each, millisecond 2 digits)
    dt_str = now.strftime('%d%m%y%H%M%S')  # day, month, year, hour, minute, second (all 2 digits)
    ms_str = str(int(now.microsecond / 10000)).zfill(2)  # convert microsecond to 2 digit millisecond (0-99)
    return f"{nasa_id}-{dt_str}{ms_str}"

df['data_id'] = df['nasa_id'].apply(generate_data_id)

#add data_load_datetime for data load date and time 
df['created_at'] = datetime.now()

#verify url
df['nasa_site_url'] = df['nasa_site_url'].apply(
    lambda url: url if isinstance(url, str) and url.startswith('https://') and 'nasa.gov' in url
    else 'url not found'
)

#handle is_potentially_hazardous boolean value and fill if needed
def determine_hazard(row):
    val = str(row['is_potentially_hazardous']).strip().lower()
    if val in ['true', 'yes', '1']:
        return True
    elif val in ['false', 'no', '0']:
        return False
    else:
        # Check other conditions
        if (row['diameter_max_m'] > 150 and
            row['closest_approach_distance_km'] < 1000000 and
            row['velocity_kmph'] > 8000):
            return True
        else:
            return False
df['is_potentially_hazardous'] = df.apply(determine_hazard, axis=1)


#handle duplicate values with data_id
if not df['data_id'].is_unique:
    print("Duplicates found in 'data_id'. Removing duplicate rows, keeping the first occurrence.")
    df = df.drop_duplicates(subset=['data_id'], keep='first')
else:
    print("All 'data_id' values are unique.")

# for debugging
# print(df)

All 'data_id' values are unique.


In [3]:
df.head()

Unnamed: 0,asteroid_name,nasa_id,nasa_site_url,closest_approach_time_to_earth_IST,closest_approach_distance_km,velocity_kmph,diameter_min_m,diameter_max_m,is_potentially_hazardous,data_id,created_at
0,2012 TG53,3610030,https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,2025-06-26 07:14:00,57245843.49,76876.38,60.89,136.16,False,3610030-26062504304853,2025-06-26 04:30:48.530071
1,2015 FQ,3713320,https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,2025-06-26 10:21:00,56614154.55,70022.5,93.02,207.99,False,3713320-26062504304853,2025-06-26 04:30:48.530071
2,2015 XF261,3736497,https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,2025-06-26 11:14:00,46556904.2,38025.69,23.69,52.97,False,3736497-26062504304853,2025-06-26 04:30:48.530071
3,2017 BZ6,3767015,https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,2025-06-27 00:54:00,27496195.33,13567.7,16.02,35.81,False,3767015-26062504304853,2025-06-26 04:30:48.530071
4,2017 BN32,3767261,https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,2025-06-27 00:48:00,61579924.74,25996.88,29.14,65.17,False,3767261-26062504304853,2025-06-26 04:30:48.530071
