In [1]:
#%%capture output

# once finished, try on all HAPI records and export output to file by doing the top and bottom comments

from datetime import datetime, timedelta
import Scripts
from Scripts import create_sqlite_database, execution, executionALL
from hapiclient import hapi
from time import sleep
import signal

def timeout_handler(signum, frame):
    raise TimeoutError("Program execution timed out")

# check and see if data can be retrieved from HAPI server
def DataChecker(server, dataset, start, stop, parameters, attempts):
    # control loop
    dataFound = False
    timerTripped = False
    
    # register handler function called when alarm received
    signal.signal(signal.SIGALRM, timeout_handler)
    
    # place in a try/except to wait x seconds and try again if this fails
    try:
        # Get data
        #sleep(5.0)
        # set alarm to trigger after 10s
        signal.alarm(10)
        data, meta = hapi(server, dataset, parameters, start, stop)

    # if error(s) arise
    except TimeoutError:
        print("Data retrieval took longer than 10 sec")
        timerTripped = True
    except HAPIError:
        print("HAPI Error raised, trying again at next interval")

    # if no error arises
    else:
        # search for data
        while not dataFound:
            if len(data) == 0:
                attempts += 1
                return dataFound, attempts, timerTripped
            for entry in data:
                #print(type(data[0][1]))
                
                # once data is found, end loop
                if str(data[0][0]) != "":
                    dataFound = True
                    print("Data was successfully accessed")
                    print("Example data looks like " + str(data[0][0]))
                    attempts += 1
                    return dataFound, attempts, timerTripped
    finally:
        attempts += 1
        return dataFound, attempts, timerTripped

# retrieve prodKeys associated with HAPI URLs

# input abs path of database file you wish to query from
conn = create_sqlite_database("/home/jovyan/HDRL-Internship-2024/SPASE_Data_20240716.db")

# fails data checks 
HapiStmt = """SELECT prodKey FROM MetadataEntries WHERE url LIKE "%/hapi" LIMIT 2 OFFSET 7"""
#HapiStmt = """SELECT prodKey FROM MetadataEntries WHERE url LIKE "%/hapi" LIMIT 1 OFFSET 21"""
#HapiStmt = """SELECT prodKey FROM MetadataEntries WHERE url LIKE "%/hapi" LIMIT 1 OFFSET 58"""
#HapiStmt = """SELECT prodKey FROM MetadataEntries WHERE url LIKE "%/hapi" LIMIT 5 OFFSET 64"""


# example w multiple prodKeys
#HapiStmt = """SELECT prodKey FROM MetadataEntries WHERE url LIKE "%/hapi" LIMIT 1 OFFSET 45"""

#HapiStmt = """ SELECT prodKey FROM MetadataEntries WHERE url LIKE "%/hapi" LIMIT 1 OFFSET 67"""

"""spase://NASA/NumericalData/AMPTE-CCE/MEPA/PT0.1875S
spase://NASA/NumericalData/DE2/IDM/PT0.25S
spase://NASA/NumericalData/FAST/MAG/Fluxgate/PT7.8125MS
spase://NASA/NumericalData/ISIS1/SFS/Ionogram/PT29S
spase://NASA/NumericalData/MESSENGER/MAG/PT0.05S"""

prodKeys = []
prodKeys = execution(HapiStmt, conn)
print("The prodKeys are " + str(prodKeys))


server = 'https://cdaweb.gsfc.nasa.gov/hapi'
#dataset    = 'HELIOS1_E6_1HOUR_PARTICLE_FLUX'
#start      = '2011-08-06T00:00:00'
#stop       = '2011-08-06T00:30:00'
#parameters = 'SE_LAT'

# iterate thru prodKeys to assign as dataset
for prodKey in prodKeys:
    # check if multiple prodKeys for same URL (mult keys in one string)
    if ", " in prodKey:
        print("This HAPI URL has multiple product keys.")
        index = prodKeys.index(prodKey)
        prodKey = prodKey.replace("\'", "")
        # keep separating them until each prodKey is in own string
        # while ", " in prodKey:
        before, sep, after = prodKey.partition(", ")
        prodKeys[index] = before # remove this line if need to check all keys
        #prodKeys[index] = after
        #prodKeys.insert(index, before)
        prodKey = prodKeys[index]
    dataset = str(prodKey)
    
    # control loop
    dataFound = False
    timerTripped = False
    # list that holds all parameters for a given server
    paramNames =  []

    # retrieve all parameters and the start date from the server
    sleep(5.0)
    meta = hapi(server,dataset)
    # get parameters
    for k, v in meta.items():
        if k == "parameters":
            for params in v:
                for key, value in params.items():
                    if key == "name":
                        paramNames.append(value)
        # get start date
        elif k == "startDate":
            start = v

    #print(paramNames)
    
    # count of attempts to get data
    attempts = 0
    # dictionary that holds datetime obj for each interval
    intervals = {}
    intervals["1s"] = ""
    intervals["10s"] = ""
    intervals["1min"] = ""
    intervals["10min"] = ""
    intervals["1hr"] = ""
    intervals["1d"] = ""
    intervals["3d"] = ""
    intervals["1w"] = ""
    intervals["1mon"] = ""
    
    # create incremental intervals to test for data check
    date, sep, time = start.partition("T")
    time = time.replace("Z", "")
    dt_string = date + " " + time
    dt_obj = datetime.strptime(dt_string, "%Y-%m-%d %H:%M:%S")
    # 1 second
    intervals["1s"] = dt_obj + timedelta(seconds=1)
    # 10 seconds
    intervals["10s"] = dt_obj + timedelta(seconds=10)
    # 1 minute
    intervals["1min"] = dt_obj + timedelta(minutes=1)
    # 10 minutes
    intervals["10min"] = dt_obj + timedelta(minutes=10)
    # hour
    intervals["1hr"] = dt_obj + timedelta(hours=1)
    # day
    intervals["1d"] = dt_obj + timedelta(days=1)
    # 3 days
    intervals["3d"] = dt_obj + timedelta(days=3)
    # week
    intervals["1w"] = dt_obj + timedelta(weeks=1)
    # month
    intervals["1mon"] = dt_obj + timedelta(weeks=4,days=2)

    # to iterate thru all parameters in a server, use the for loop below to enclose the code below
    # for parameters in paramNames[1:]:
    
    # UNFINISHED: have data check occur in increasingly larger start/stop intervals until data is returned
    #              and only check for one parameter (Time is fine)
    parameter = paramNames[0]
    for k, v in intervals.items():
        if not dataFound and not timerTripped:
            stop = str(v)
            stop = stop.replace(" ", "T") + "Z"
            print("Checking parameter " + str(parameter) + " in HAPI record with id " + dataset +
                  " at the interval of " + str(k))
            dataFound, attempts, timerTripped = DataChecker(server, dataset, start, stop, parameter, attempts)
    # if all intervals fail or first interval takes too long -> no data
    if not dataFound:
    # inputs "HAPI info check passed after 1 attempt. HAPI data check failed after x attempts."
    #     into "Error" column in TestResults associated with that HAPI URL
        print("No data was found")
        HAPIErrorStmt = f""" UPDATE TestResults
                                SET Errors = 'HAPI info check passed after 1 attempt. HAPI data check \
                                failed after {attempts} attempts.'
                                FROM (SELECT SPASE_id, prodKey FROM TestResults
                                        INNER JOIN MetadataEntries USING (SPASE_id))
                                WHERE prodKey = '{dataset}' """
        Record_id = execution(f""" SELECT rowNum 
                                FROM (SELECT TestResults.rowNum, SPASE_id, prodKey FROM TestResults 
                                    INNER JOIN MetadataEntries USING (SPASE_id))
                                WHERE prodKey = '{dataset}';""", conn)
        executionALL(HAPIErrorStmt, conn)
        print(f"Sent error message to a TestResults entry with the row number {Record_id}")

#with open("../DatalinkCheckOutput.txt", "w") as file:
 #   file.write(output.stdout)

The prodKeys are ['PO_K0_PIX', 'PO_H0_UVI']
Checking parameter Time in HAPI record with id PO_K0_PIX at the interval of 1s
Checking parameter Time in HAPI record with id PO_K0_PIX at the interval of 10s
Checking parameter Time in HAPI record with id PO_K0_PIX at the interval of 1min
Checking parameter Time in HAPI record with id PO_K0_PIX at the interval of 10min
Checking parameter Time in HAPI record with id PO_K0_PIX at the interval of 1hr
Checking parameter Time in HAPI record with id PO_K0_PIX at the interval of 1d
Checking parameter Time in HAPI record with id PO_K0_PIX at the interval of 3d
Checking parameter Time in HAPI record with id PO_K0_PIX at the interval of 1w
Checking parameter Time in HAPI record with id PO_K0_PIX at the interval of 1mon
No data was found
Sent error message to a TestResults entry with the row number [2919, 2919]


TimeoutError: Program execution timed out

In [None]:
print("The program is done!")

In [16]:
# call .py file directly from notebook
#%run ./HAPICheck.py > ../DatalinkCheckOutput.txt