In [1]:
import itertools
import re
from pprint import pprint

import astropy.units as u
import sunpy.net.attr as attr
from astropy.time import Time, TimeDelta
from padre_meddea.util.util import parse_science_filename
from sunpy.extern.parse import parse
from sunpy.net import Fido
from sunpy.net import attrs as a
from sunpy.net.attr import SimpleAttr
from sunpy.net.dataretriever import GenericClient, QueryResponse
from sunpy.net.scraper import Scraper, get_timerange_from_exdict
from sunpy.time import TimeRange

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import sunpy

sunpy.__version__

'6.0.6'

In [3]:
from typing import List


class DataType(SimpleAttr):
    """
    Attribute for specifying the data type for the search.

    Attributes
    ----------
    value : str
        The data type value.
    """


class PADREClient(GenericClient):
    """
    PADRE data client for accessing spectrum data from the PADRE mission.
    """

    # NOTE: Works for SunPy >= v7.0.0
    # pattern = (
    #     "https://umbra.nascom.nasa.gov/padre/padre-{{Instrument}}/{{Level}}/{{DataType}}/{{year:4d}}/{{month:2d}}/{{day:2d}}/"
    #     "padre_{{Instrument}}_{{Level}}_{{DataType}}_{{year:4d}}{{month:2d}}{{day:2d}}T{{hour:2d}}{{minute:2d}}{{second:2d}}_v0.1.0.fits"
    # )

    # NOTE: When working with SunPy < v7.0.0, use the following combination of baseurl and pattern
    # Used for Data Levels "l0", "l1" and above
    # Example: https://umbra.nascom.nasa.gov/padre/padre-meddea/l0/spectrum/2025/05/01/padre_meddea_l0_spectrum_20250501T000000_v0.1.0.fits
    baseurl_l0_plus = (
        r"https://umbra.nascom.nasa.gov/padre/padre-{{Instrument}}/{{Level}}/{{DataType}}/%Y/%m/%d/"
        r"padre_{{Instrument}}_{{Level}}(?:test)?_{{DataType}}_%Y%m%dT%H%M%S_v0.1.0.fits"
    )
    pattern_l0_plus = "{}/padre_{Instrument}_{Level}_{DataType}_{year:4d}{month:2d}{day:2d}T{hour:2d}{minute:2d}{second:2d}_v0.1.0.fits"

    # Used for Data Level "raw"
    # Example: https://umbra.nascom.nasa.gov/padre/padre-meddea/raw/2025/05/01/PADREMDA0_250504055133.DAT
    baseurl_raw = (
        r"https://umbra.nascom.nasa.gov/padre/padre-{{Instrument}}/{{Level}}/%Y/%m/%d/"
        r"PADREMD{{DataType}}_%y%m%d%H%M%S.DAT"
    )
    pattern_raw = "{}/PADREMD{DataType}_{year:2d}{month:2d}{day:2d}{hour:2d}{minute:2d}{second:2d}.DAT"

    @classmethod
    def register_values(cls):
        adict = {
            a.Provider: [("sdac", "The Solar Data Analysis Center.")],
            a.Source: [
                ("padre", "(The Solar Polarization and Directivity X-Ray Experiment)")
            ],
            a.Instrument: [
                (
                    "meddea",
                    "Measuring Directivity to Determine Electron Anisotropy (MeDDEA)",
                ),
            ],
            DataType: [
                ("spectrum", "Spectrum data from MeDDEA."),
                ("photon", "Photon data from MeDDEA."),
                ("housekeeping", "Housekeeping data from MeDDEA."),
            ],
            a.Level: [
                ("raw", "Raw Binary CCSDS Packet data"),
                ("l0", "Raw data, converted to FITS, not in physical units."),
                ("l1", "Processed data, not in physical units."),
            ],
        }
        return adict

    def search(self, *args, **kwargs):
        """
        Query this client for a list of results.

        Parameters
        ----------
        \\*args: `tuple`
            `sunpy.net.attrs` objects representing the query.
        \\*\\*kwargs: `dict`
             Any extra keywords to refine the search.

        Returns
        -------
        A `QueryResponse` instance containing the query result.
        """
        matchdict = self._get_match_dict(*args, **kwargs)

        # Base URLs and Patterns to Search
        baseurls = [self.baseurl_l0_plus]
        patterns = [self.pattern_l0_plus]
        # Get Level constraints from matchdict - See if we need to add raw data to Search Space
        level_constraints = matchdict.get("Level", {})
        if "raw" in level_constraints:
            baseurls.append(self.baseurl_raw)
            patterns.append(self.pattern_raw)

        # Initialize an empty list to hold metadata
        # This will be populated with metadata for each file found
        metalist = []
        for baseurl, pattern in zip(baseurls, patterns):
            # Only replace DataType, Time in matchdict if the baseurl is the raw data URL
            if baseurl == self.baseurl_raw:
                # Template Replacement for DataType
                datatype_to_shortname = {
                    "photon": "A0",
                    "spectrum": "A2",
                    "housekeeping": "U8",
                }
                # Fix the DataType placeholder for Raw Data Files
                new_datatypes = []
                for data_type in matchdict.get("DataType", []):
                    new_datatypes.append(
                        datatype_to_shortname.get(data_type, data_type)
                    )
                matchdict["DataType"] = new_datatypes
                # Fix the Start Time and End Time for Raw Data Files
                original_start = matchdict.get("Start Time")
                original_end = matchdict.get("End Time")
                # Get the year as a string and take just the last 2 digits
                start_year_2digit = str(original_start.datetime.year)[-2:]
                end_year_2digit = str(original_end.datetime.year)[-2:]

                # Reconstruct the dates with 2-digit years
                start_time = Time(
                    f"00{start_year_2digit}-{original_start.datetime.month:02d}-{original_start.datetime.day:02d}T{original_start.datetime.hour:02d}:{original_start.datetime.minute:02d}:{original_start.datetime.second:02d}.000"
                )
                end_time = Time(
                    f"00{end_year_2digit}-{original_end.datetime.month:02d}-{original_end.datetime.day:02d}T{original_end.datetime.hour:02d}:{original_end.datetime.minute:02d}:{original_end.datetime.second:02d}.000"
                )
                # Reset the Start Time and End Time in matchdict
                matchdict["Start Time"] = start_time
                matchdict["End Time"] = end_time

            # baseurl, pattern, matchdict = self.pre_search_hook(*args, **kwargs)
            # print(
            #     f"Finished _pre_search_hook with \n\tbaseurl: {baseurl}, \n\tpattern: {pattern} and \n\tmatchdict: {matchdict}",
            #     end="",
            # )
            # pprint(matchdict)

            # Generate URLs for scraping
            scraper_urls = self.generate_url_permutations(baseurl, matchdict)
            # print(f"Generated URLs: {scraper_urls}")
            for scraper_url in scraper_urls:
                # Create Scraper instance with the complete URL pattern
                scraper = Scraper(pattern=scraper_url)
                tr = TimeRange(matchdict["Start Time"], matchdict["End Time"])
                # Get File Metadata for the populated URL Pattern & Time Range
                filesmeta = scraper._extract_files_meta(
                    tr, extractor=pattern, matcher=matchdict
                )
                # print(f"Finished _extract_files_meta with filesmeta: {filesmeta}")
                filesmeta = sorted(filesmeta, key=lambda k: k["url"])
                for i in filesmeta:
                    rowdict = self.post_search_hook(i, matchdict)

                    # NEW - use parse_science_filename to extract metadata from the filename
                    # filename = i["url"].split("/")[-1]
                    # filename_meta = parse_science_filename(filename)
                    # print(f"Parsed filename metadata: {filename_meta}")
                    # rowdict.update(filename_meta)

                    if rowdict:
                        metalist.append(rowdict)
        # pprint(f"Final metalist: {metalist}")
        return QueryResponse(metalist, client=self)

    def generate_url_permutations(self, pattern, matchdict):
        # Extract placeholders from the pattern
        placeholder_regex = r"{{([^:}]+)(?::.*?)?}}"
        placeholders = re.findall(placeholder_regex, pattern)

        # Filter to only placeholders we have in our dictionary
        valid_placeholders = [p for p in placeholders if p in matchdict]
        # make sure we don't have duplicates
        valid_placeholders = list(set(valid_placeholders))

        # Get all values for each placeholder
        placeholder_values = [
            matchdict[p] if isinstance(matchdict[p], list) else [matchdict[p]]
            for p in valid_placeholders
        ]

        # Generate all combinations
        urls = []
        for combo in itertools.product(*placeholder_values):
            url = pattern
            for placeholder, value in zip(valid_placeholders, combo):
                url = url.replace(f"{{{{{placeholder}}}}}", str(value))
                # Handle any format specifiers
                format_pattern = re.compile(f"{{{{{placeholder}:.*?}}}}")
                url = format_pattern.sub(str(value), url)
            urls.append(url)

        return urls

## Test Getting Raw, Level 0, and Level 1 Data All Together

In [4]:
results = Fido.search(
    a.Time("2025-05-01", "2025-05-05") & a.Instrument.meddea & DataType.spectrum
)
results



Start Time,End Time,Provider,Source,Instrument,DataType,Level
Time,Time,str4,str5,str6,str8,str2
2025-05-04 00:00:00.000,2025-05-04 00:00:00.999,SDAC,PADRE,meddea,spectrum,l1


## Test Getting All Level 1 Data Across All Data Types

In [5]:
results = Fido.search(
    a.Time("2025-05-01", "2025-05-05") & a.Instrument.meddea & a.Level.l1
)
results

Start Time,End Time,Provider,Source,Instrument,DataType,Level
Time,Time,str4,str5,str6,str12,str2
2025-05-04 00:00:00.000,2025-05-04 00:00:00.999,SDAC,PADRE,meddea,spectrum,l1
2025-05-04 00:00:00.000,2025-05-04 00:00:00.999,SDAC,PADRE,meddea,housekeeping,l1


## Test Getting L1 Photon Data (Does not exist yet)

In [6]:
results = Fido.search(
    a.Time("2025-05-01", "2025-05-05")
    & a.Instrument.meddea
    & a.Level.l1
    & DataType.photon
)
results

## Test Getting all RAW Data

In [7]:
results = Fido.search(
    a.Time("2025-05-01", "2025-05-05") & a.Instrument.meddea & a.Level.raw
)
results



In [8]:
import tempfile

with tempfile.TemporaryDirectory() as temp_dir:
    downloaded_files = Fido.fetch(results, path=temp_dir)
downloaded_files

Files Downloaded: 0file [00:00, ?file/s]


<parfive.results.Results object at 0x125b14850>
[]