In [1]:
import re
import itertools
from pprint import pprint

from sunpy.net import Fido, attrs as a
from sunpy.net.attr import SimpleAttr
from sunpy.net.dataretriever import GenericClient, QueryResponse
from sunpy.net.scraper import Scraper
from sunpy.time import TimeRange

from padre_meddea.util.util import parse_science_filename

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import sunpy

sunpy.__version__

'6.0.6'

In [3]:
class DataType(SimpleAttr):
    """
    Attribute for specifying the data type for the search.

    Attributes
    ----------
    value : str
        The data type value.
    """


class PADREClient(GenericClient):
    """
    PADRE data client for accessing spectrum data from the PADRE mission.
    """
    
    # NOTE: Works for SunPy >= v7.0.0
    # pattern = (
    #     "https://umbra.nascom.nasa.gov/padre/padre-{{Instrument}}/{{Level}}/{{DataType}}/{{year:4d}}/{{month:2d}}/{{day:2d}}/"
    #     "padre_{{Instrument}}_{{Level}}_{{DataType}}_{{year:4d}}{{month:2d}}{{day:2d}}T{{hour:2d}}{{minute:2d}}{{second:2d}}_v0.1.0.fits"
    # )
    
    # NOTE: When working with SunPy < v7.0.0, use the following combination of baseurl and pattern
    baseurl = (
        r"https://umbra.nascom.nasa.gov/padre/padre-{{Instrument}}/{{Level}}/{{DataType}}/%Y/%m/%d/"
        r"padre_{{Instrument}}_{{Level}}_{{DataType}}_%Y%m%dT%H%M%S_v0.1.0.fits"
    )
    pattern = (
        "{}/padre_{Instrument}_{Level}_{DataType}_{year:4d}{month:2d}{day:2d}T{hour:2d}{minute:2d}{second:2d}_v0.1.0.fits"
    )

    @classmethod
    def register_values(cls):
        adict = {
            a.Provider: [("sdac", "The Solar Data Analysis Center.")],
            a.Source: [
                ("padre", "(The Solar Polarization and Directivity X-Ray Experiment)")
            ],
            a.Instrument: [
                (
                    "meddea",
                    "Measuring Directivity to Determine Electron Anisotropy (MeDDEA)",
                ),
            ],
            DataType: [
                ("spectrum", "Spectrum data from MeDDEA."),
                ("photon", "Photon data from MeDDEA."),
                ("housekeeping", "Housekeeping data from MeDDEA."),
            ],
            a.Level: [
                ("l0", "Raw data, converted to FITS, not in physical units."),
                ("l1", "Processed data, not in physical units."),
            ],
        }
        return adict

    def search(self, *args, **kwargs):
        """
        Query this client for a list of results.

        Parameters
        ----------
        \\*args: `tuple`
            `sunpy.net.attrs` objects representing the query.
        \\*\\*kwargs: `dict`
             Any extra keywords to refine the search.

        Returns
        -------
        A `QueryResponse` instance containing the query result.
        """
        # baseurl added for backwards compatibility purposes only
        baseurl, pattern, matchdict = self.pre_search_hook(*args, **kwargs)
        # print(
        #     f"Finished _pre_search_hook with baseurl: {baseurl}, pattern: {pattern}, matchdict: {matchdict}"
        # )
        scraper_urls = self.generate_url_permutations(baseurl, matchdict)
        # print(f"Generated URLs: {scraper_urls}")
        metalist = []
        for scraper_url in scraper_urls:
            # Create Scraper instance with the complete URL pattern
            scraper = Scraper(pattern=scraper_url)
            tr = TimeRange(matchdict["Start Time"], matchdict["End Time"])
            # Get File Metadata for the populated URL Pattern & Time Range
            filesmeta = scraper._extract_files_meta(tr, extractor=pattern, matcher=matchdict)
            # print(f"Finished _extract_files_meta with filesmeta: {filesmeta}")
            filesmeta = sorted(filesmeta, key=lambda k: k["url"])
            for i in filesmeta:
                rowdict = self.post_search_hook(i, matchdict)
                
                # NEW - use parse_science_filename to extract metadata from the filename
                # filename = i["url"].split("/")[-1]
                # filename_meta = parse_science_filename(filename)
                # print(f"Parsed filename metadata: {filename_meta}")
                # rowdict.update(filename_meta)
                
                if rowdict:
                    metalist.append(rowdict)
        pprint(f"Final metalist: {metalist}")
        return QueryResponse(metalist, client=self)

    def generate_url_permutations(self, pattern, matchdict):
        # Extract placeholders from the pattern
        placeholder_regex = r"{{([^:}]+)(?::.*?)?}}"
        placeholders = re.findall(placeholder_regex, pattern)

        # Filter to only placeholders we have in our dictionary
        valid_placeholders = [p for p in placeholders if p in matchdict]
        # make sure we don't have duplicates
        valid_placeholders = list(set(valid_placeholders))

        # Get all values for each placeholder
        placeholder_values = [
            matchdict[p] if isinstance(matchdict[p], list) else [matchdict[p]]
            for p in valid_placeholders
        ]

        # Generate all combinations
        urls = []
        for combo in itertools.product(*placeholder_values):
            url = pattern
            for placeholder, value in zip(valid_placeholders, combo):
                url = url.replace(f"{{{{{placeholder}}}}}", str(value))
                # Handle any format specifiers
                format_pattern = re.compile(f"{{{{{placeholder}:.*?}}}}")
                url = format_pattern.sub(str(value), url)
            urls.append(url)

        return urls

In [4]:
# Show Added Client to Fido
Fido

Client,Description
CDAWEBClient,Provides access to query and download from the Coordinated Data Analysis Web (CDAWeb).
ADAPTClient,Provides access to the ADvanced Adaptive Prediction Technique (ADAPT) products of the National Solar Observatory (NSO).
EVEClient,Provides access to Level 0CS Extreme ultraviolet Variability Experiment (EVE) data.
GBMClient,Provides access to data from the Gamma-Ray Burst Monitor (GBM) instrument on board the Fermi satellite.
XRSClient,Provides access to several GOES XRS files archive.
SUVIClient,Provides access to data from the GOES Solar Ultraviolet Imager (SUVI).
GONGClient,Provides access to the Magnetogram products of NSO-GONG synoptic Maps.
LYRAClient,Provides access to the LYRA/Proba2 data archive.
NOAAIndicesClient,Provides access to the NOAA solar cycle indices.
NOAAPredictClient,Provides access to the NOAA SWPC predicted sunspot Number and 10.7 cm radio flux values.


In [5]:
results = Fido.search(
    a.Time("2025-05-01", "2025-05-05") & a.Instrument.meddea & a.Level.l1
)

("Final metalist: [OrderedDict([('Start Time', <Time object: scale='utc' "
 "format='iso' value=2025-05-04 00:00:00.000>), ('End Time', <Time object: "
 "scale='utc' format='iso' value=2025-05-04 00:00:00.999>), ('Provider', "
 "'SDAC'), ('Source', 'PADRE'), ('Instrument', 'meddea'), ('DataType', "
 "'spectrum'), ('Level', 'l1'), ('url', "
 "'https://umbra.nascom.nasa.gov/padre/padre-meddea/l1/spectrum/2025/05/04/padre_meddea_l1_spectrum_20250504T000000_v0.1.0.fits')]), "
 "OrderedDict([('Start Time', <Time object: scale='utc' format='iso' "
 "value=2025-05-04 00:00:00.000>), ('End Time', <Time object: scale='utc' "
 "format='iso' value=2025-05-04 00:00:00.999>), ('Provider', 'SDAC'), "
 "('Source', 'PADRE'), ('Instrument', 'meddea'), ('DataType', 'housekeeping'), "
 "('Level', 'l1'), ('url', "
 "'https://umbra.nascom.nasa.gov/padre/padre-meddea/l1/housekeeping/2025/05/04/padre_meddea_l1_housekeeping_20250504T000000_v0.1.0.fits')])]")


In [6]:
results

Start Time,End Time,Provider,Source,Instrument,DataType,Level
Time,Time,str4,str5,str6,str12,str2
2025-05-04 00:00:00.000,2025-05-04 00:00:00.999,SDAC,PADRE,meddea,spectrum,l1
2025-05-04 00:00:00.000,2025-05-04 00:00:00.999,SDAC,PADRE,meddea,housekeeping,l1


In [7]:
import tempfile

with tempfile.TemporaryDirectory() as temp_dir:
    downloaded_files = Fido.fetch(results, path=temp_dir)
downloaded_files

Files Downloaded:   0%|          | 0/2 [00:00<?, ?file/s]
[A
[A
Files Downloaded: 100%|██████████| 2/2 [00:00<00:00,  3.12file/s]


<parfive.results.Results object at 0x10eee1ab0>
['/var/folders/5l/_5r0pdg15fxg1_rkgmd3c1dm0000gn/T/tmp3slsdp0l/padre_meddea_l1_spectrum_20250504T000000_v0.1.0.fits', '/var/folders/5l/_5r0pdg15fxg1_rkgmd3c1dm0000gn/T/tmp3slsdp0l/padre_meddea_l1_housekeeping_20250504T000000_v0.1.0.fits']