In [85]:
# Import dotenv package for setting environment variables
from dotenv import load_dotenv

# Import os package
import os

# Set environment variables from the .env in the local environment
load_dotenv()

# Retrieve API key and store as Python variable
api_key = os.getenv("NASA_API_KEY")

type(api_key)

# Test the API key with a request
import requests
import json
import pandas as pd

# Search NASA API URL for Coronal Mass Ejections (CMEs) over North America for a certain month
base_url = "https://api.nasa.gov/DONKI/CME"

# Search for Geomagnetic Storms over a certain time range
start_date = "2024-05-05"
end_date = "2024-05-05"

# Define latitude and longitude for North America
latitude = 37.0902
longitude = -95.7129

# Build query URL
query = f"{base_url}?startDate={start_date}&endDate={end_date}&api_key={api_key}"

# Execute "GET" request with query_url
req = requests.get(url=query)

In [86]:
if req.status_code != 200:
    print(f"Error! {req.status_code}")

# Format data as JSON
og_data = req.json()

# Use json.dumps with argument indent=4 to format data
data = json.dumps(og_data, indent=4)
print(data)


[
    {
        "activityID": "2024-05-05T02:09:00-CME-001",
        "catalog": "M2M_CATALOG",
        "startTime": "2024-05-05T02:09Z",
        "instruments": [
            {
                "displayName": "STEREO A: SECCHI/COR2"
            },
            {
                "displayName": "SOHO: LASCO/C2"
            },
            {
                "displayName": "SOHO: LASCO/C3"
            }
        ],
        "sourceLocation": "N26W17",
        "activeRegionNum": 13663,
        "note": "[TRUE START TIME 2024-05-05T01:12Z IN LASCO C2 - RETAINING COR2A/T02:09Z START TIME AS NOTIFICATION SENT PRIOR TO UPDATE]. Narrow northern CME with filamentary structures which is similar to some previous CMEs from AR 3663. This CME is very likely associated with the M9.0 and M8.4 flares from AR 3663, peaking at 2024-05-04T23:48Z and 2024-05-05T01:27Z respectively, however no clear additional lower coronal signatures have been found in EUV imagery. This CME appears as two separately emerging fronts

In [93]:
# Convert geomagnetic_storms json file to a Pandas DataFrame
storms_df = pd.DataFrame(req.json())
storms_df.sample(n=2)

Unnamed: 0,activityID,catalog,startTime,instruments,sourceLocation,activeRegionNum,note,submissionTime,versionId,link,cmeAnalyses,linkedEvents
3,2024-05-05T08:09:00-CME-001,M2M_CATALOG,2024-05-05T08:09Z,[{'displayName': 'STEREO A: SECCHI/COR2'}],,,Faint CME to the northeast closely following t...,2024-05-06T11:32Z,3,https://webtools.ccmc.gsfc.nasa.gov/DONKI/view...,"[{'isMostAccurate': True, 'time21_5': '2024-05...",[{'activityID': '2024-05-05T08:07:00-FLR-001'}]
6,2024-05-05T17:00:00-CME-001,M2M_CATALOG,2024-05-05T17:00Z,"[{'displayName': 'SOHO: LASCO/C2'}, {'displayN...",,,"Bright CME with H-alpha emissions, with the so...",2024-05-08T12:48Z,3,https://webtools.ccmc.gsfc.nasa.gov/DONKI/view...,"[{'isMostAccurate': True, 'time21_5': '2024-05...",


In [94]:
# Keep only the columns: activityID, startTime, linkedEvents
working_storms_df = storms_df[['activityID', 'startTime', 'linkedEvents']]
working_storms_df.sample(n=2)

Unnamed: 0,activityID,startTime,linkedEvents
2,2024-05-05T06:38:00-CME-001,2024-05-05T06:38Z,[{'activityID': '2024-05-05T05:47:00-FLR-001'}]
6,2024-05-05T17:00:00-CME-001,2024-05-05T17:00Z,


## 'for loop' and 'explode()'

Because the linkedEvents sometimes contains multiple events per row we want to spread these to individual rows. We will illustrate 2 approaches to spread the indivudual rows:

1) a 'for loop' as illustrated before
2) the explode() function


In [43]:
# Initialize an empty list to store the expanded rows

# Iterate over each index in the DataFrame


# Create a new DataFrame from the expanded rows

# Use head() to show the dataframe


In [44]:
# Use the explode() function to expand the rows and drop missing observations

# Use head() to show the dataframe
