In [1]:
import importlib
import json
import os

import numpy as np
from netCDF4 import Dataset, Variable  # pylint: disable=E0611

import glob 
from pathlib import Path
from datetime import datetime, timedelta
from typing import Union, List, Dict, Any

In [2]:

dataset_list = [
    "cryotempo_li",
    "ev_gdr",
    "e1_gdr",
    "e2_gdr",
    "cs2_l2i",
    "s3a_l2",
    "s3b_l2",
    "e1_fdr4alt",
    "e2_fdr4alt",
    "ev_fdr4alt",
]

class AltDataset:
    """Class to support Altimetry data sets."""

    def __init__(  # pylint :disable=R0917
        self,
        name: str,
        mission: str,
        level: str = "l2",
        overrides: dict | None = None,
        dataset_filename: str | None = None,
    ):
        """
        Class initialization.

        Args:
            name (str): Dataset name.
            **overrides: Optional keyword arguments to override default parameters.
        """
        self.name = name
        self.level = level
        self.mission = mission

        try:
            self.load_dataset(overrides, dataset_filename)
        except ImportError as exc:
            raise ImportError(f"Error loading dataset {name}") from exc

        if name not in dataset_list:
            raise ValueError(f"{name} is not a supported dataset in AltDataset class")

    def load_dataset(self, overrides: dict | None = None, dataset_filename: str | None = None):
        """Load dataset settings for current dataset name"""
        if dataset_filename is None:
            try:
                module = importlib.import_module(
                    f"cpom.altimetry.datasets.definitions.{self.level}.{self.mission}.{self.name}"
                )
            except ImportError as exc:
                raise ImportError(f"Could not load dataset definition {self.name}") from exc
        else:
            # Support passing a custom dataset definition file
            pass

        dataset_params = module.dataset_definition
        for k, v in dataset_params.items():
            if overrides:
                setattr(self, k, overrides.get(k, v))
            else:
                setattr(self, k, v)

        # Set any additional overrides not in the JSON
        if overrides:
            for k, v in overrides.items():
                if k not in dataset_params:
                    setattr(self, k, v)

    #------------------------------------------# 
    # Getters for L1 dirs, files and variables #
    #------------------------------------------#           

    #------------------------------------------# 
    # Getters for L2 dirs, files and variables #
    #------------------------------------------#
    def get_l2_dir(
        self, cyclenum: int | None = None, hemisphere: str | None = None, theme: str = "land_ice"
    ) -> str:
        """Get the L2 directory for a data product. If specified filters to specified cycle.
        For FDR4ALT data products filters to hemisphere, and theme.

        Args:
            cyclenum (int): Cycle number to filter by. Defaults to None.
            hemisphere (str): Hemisphere (FDR4ALT only). Defaults to None.
            theme (str): Theme (FDR4ALT only). Defaults to "land_ice".

        Returns:
            str: The L2 directory path.
        """
        if cyclenum is None:
            return self.l2_dir
        if "fdr4alt" in self.name:
            if hemisphere == "north":
                area = "/greenland"
            if hemisphere == "south":
                area = "/antarctica"
            else:
                area = "/*"
            return f"{self.l2_dir}/{area}/{theme}/Cycle_{cyclenum:03d}"

        if self.mission in ["ev"]:
            return f"{self.l2_dir}/cycle_{cyclenum:03d}"
        if self.mission in ["s3a", "s3b"]:
            return f"{self.l2_dir}/cycle{cyclenum:03d}"
        if self.mission in ["e1", "e2"]:
            return f"{self.l2_dir}/CYCLE{cyclenum:02d}"
        
    def get_product_startdate_from_filename(self, filename: str, yyyymm_str_fname_indices: list[int]) -> tuple:
        """
        Extract L2 product start date from the filename
        filename is the full path of a L2 file
        returns datetime and  integer (year, month, day)
        """
        filename = Path(filename)
        if ".SEN3" in filename.parent.name:
            fname = filename.parent.name
        else:
            fname = filename.name

        date_obj = datetime.strptime(fname[yyyymm_str_fname_indices[0]:yyyymm_str_fname_indices[1]],"%Y%m%d")

        if date_obj is None:
            raise ValueError(f"Could not extract date from filename {filename}")

        return date_obj, date_obj.year, date_obj.month, date_obj.day

    def get_files(
        self,
        min_dt_time=None,
        max_dt_time=None,
        cyclenum: int | None = None,
        modes: List = ["lrm", "sin"],
        hemisphere: str | None = None,  # Optional hemisphere filter for fdr4alt
        theme: str = "land_ice",  # Optional theme filter for fdr4alt
    ) -> List[Path]: # pylint: disable=R0917
        """
        Returns an array of files for dataset <self.name>.
        Options:
            - Select by cycle number.
            - Select by date range (min_dt_time, max_dt_time).

        Args:
            min_dt_time (datetime | str, optional): Min time datetime object or "YYYYMMDD" string.
            max_dt_time (datetime | str, optional): Max time datetime object or "YYYYMMDD" string.
            cyclenum (int | None, optional): Cycle number. Defaults to None.
            modes (List[str], optional): cs2 modes to load(cs2 only). Defaults to ["lrm", "sin"].
            hemisphere (str | None, optional): Hemisphere (FDR4ALT only). Defaults to None.
            theme (str, optional): Theme (FDR4ALT only). Defaults to "land_ice".
        Returns:
            List[Path]: List of L2 files matching the search criteria.
        """

        def _get_files_by_date(
            search_dir, search_pattern, yyyymm_str_fname_indices, min_dt_time, max_dt_time
        ):
            valid_files = []
            for file in Path(search_dir).rglob(search_pattern):
                if min_dt_time is None or max_dt_time is None:
                    valid_files.append(file)
                    continue
                date_obj, _, _, _ = self.get_product_startdate_from_filename(file, yyyymm_str_fname_indices)
                if date_obj is not None and min_dt_time <= date_obj <= max_dt_time:
                    valid_files.append(file)
            return valid_files

        base_dir = self.get_l2_dir(cyclenum, hemisphere, theme)
        
        if isinstance(min_dt_time, str):
            min_dt_time = datetime.strptime(min_dt_time, "%Y%m%d")
            max_dt_time = datetime.strptime(max_dt_time, "%Y%m%d")
        
        search_dir = (
            Path(base_dir) / f"{min_dt_time.year:04d}" / f"{min_dt_time.month:02d}"
            if min_dt_time.year == max_dt_time.year and min_dt_time.month == max_dt_time.month else
            Path(base_dir) / f"{min_dt_time.year:04d}"
            if min_dt_time.year == max_dt_time.year else
            Path(base_dir)
        )

        search_dir = search_dir if search_dir.is_dir() else base_dir

        if self.name in ["cs2_l2i"]:
            valid_files = []
            for mode in modes:
                mode_config = getattr(self, mode)
                mode_files = _get_files_by_date(
                    Path(search_dir) / mode.upper(),
                    mode_config["search_pattern"],
                    mode_config["yyyymm_str_fname_indices"],
                    min_dt_time,
                    max_dt_time,
                )
                valid_files.extend(mode_files)
        else:
            valid_files = _get_files_by_date(
                search_dir,
                self.search_pattern,
                self.yyyymm_str_fname_indices,
                min_dt_time,
                max_dt_time,
            )

        return valid_files

    def get_unified_time_epoch_offset(self, goal_epoch_str: str = "1991-01-01", this_epoch_str: str = None) -> float:
        """
        Convert a timestamp from one custom epoch to another.

        Parameters:
        - goal_epoch_str: str, the target epoch (default: "1991-01-01")
        - this_epoch_str: str, the original epoch (default: self.time_epoch)

        Returns:
        - float: the timestamp relative to `goal_epoch_str`
        """
        if this_epoch_str is None:
            this_epoch_str = self.time_epoch        

        this_epoch = datetime.fromisoformat(this_epoch_str)  # + timedelta(days=1)
        goal_epoch = datetime.fromisoformat(goal_epoch_str)

        # Calculate offset between epochs in seconds
        offset = (this_epoch - goal_epoch).total_seconds()

        return offset
    
    def get_variables_from_file(self, nc: Dataset, nc_var_paths: str) -> np.ndarray:
        """Retrieve variable from NetCDF file, handling groups if necessary.

        Args:
            nc (Dataset): The dataset object
            nc_var_paths (str or list[str]): The path(s) to variable(s) within the file,
            with groups separated by '/'.

        Raises:
            KeyError: If the variable or group is not found in the file.

        Returns:
            np.array|List[np.array]: The retrieved variable(s) as array(s).
        """
        def get_single_var(nc, nc_var_path):
            parts = nc_var_path.split("/")
            var = nc
            for part in parts:
                var = var[part]
                if var is None:
                    raise IndexError(f"NetCDF parameter '{nc_var_path}' not found.")
            return var[:]

        if isinstance(nc_var_paths, str):
            return get_single_var(nc, nc_var_paths)
        elif isinstance(nc_var_paths, (list, tuple)):
            return [get_single_var(nc, path) for path in nc_var_paths]
        else:
            raise TypeError("nc_var_paths must be a string or a list/tuple of strings.")

In [6]:
min_dt_time = datetime.strptime("19910101", "%Y%m%d")
max_dt_time = datetime.strptime("19920101", "%Y%m%d")
en1_gdr = AltDataset("e1_gdr", "e1", "l2") 
en1_dir = en1_gdr.get_l2_dir(6)
en1_files = en1_gdr.get_files(min_dt_time, max_dt_time, 6)
date = en1_gdr.get_product_startdate_from_filename(en1_files[0],en1_gdr.yyyymm_str_fname_indices)

print("------------------------------------")
print(f"EN1 GDR Directory: {en1_dir}")
print(f"EN1 GDR Date: {date}")
print(f"EN1 GDR Files: {en1_files}")

min_dt_time = datetime.strptime("20020101", "%Y%m%d")
max_dt_time = datetime.strptime("20020701", "%Y%m%d")
ev = AltDataset("ev_gdr", "ev", "l2") 
ev_dir = ev.get_l2_dir(6)
ev_files = ev.get_files(min_dt_time, max_dt_time, 6)
date = ev.get_product_startdate_from_filename(ev_files[0],ev.yyyymm_str_fname_indices)

print("------------------------------------")
print(f"EV GDR Directory: {ev_dir}")
print(f"EV GDR Date: {date}")
print(f"EV GDR Files: {ev_files}")

min_dt_time = datetime.strptime("19950101", "%Y%m%d")
max_dt_time = datetime.strptime("19951201", "%Y%m%d")
e2 = AltDataset("e2_gdr", "e2", "l2") 
e2_dir = e2.get_l2_dir(6)
e2_files = e2.get_files(min_dt_time, max_dt_time, 6)
date = e2.get_product_startdate_from_filename(e2_files[0],e2.yyyymm_str_fname_indices)

print("------------------------------------")
print(f"E2 GDR Directory: {e2_dir}")
print(f"E2 GDR Date: {date}")
print(f"E2 GDR Files: {e2_files}")

min_dt_time = datetime.strptime("20160301", "%Y%m%d")
max_dt_time = datetime.strptime("20170101", "%Y%m%d")
s3a = AltDataset("s3a_l2", "s3a", "l2") 
s3a_dir = s3a.get_l2_dir(6)
s3a_files = s3a.get_files(min_dt_time, max_dt_time, 6)
date = s3a.get_product_startdate_from_filename(s3a_files[0],s3a.yyyymm_str_fname_indices)

print("------------------------------------")
print(f"S3A GDR Directory: {s3a_dir}")
print(f"S3A GDR Date: {date}")
print(f"S3A GDR Files: {s3a_files}")


min_dt_time = datetime.strptime("20180301", "%Y%m%d")
max_dt_time = datetime.strptime("20181031", "%Y%m%d")
s3b = AltDataset("s3b_l2", "s3b", "l2") 
s3b_dir = s3b.get_l2_dir(6)
s3b_files = s3b.get_files(min_dt_time, max_dt_time, 6)
date = s3b.get_product_startdate_from_filename(s3b_files[0],s3b.yyyymm_str_fname_indices)

print("------------------------------------")
print(f"S3B GDR Directory: {s3b_dir}")
print(f"S3B GDR Date: {date}")
print(f"S3B GDR Files: {s3b_files}")

------------------------------------
EN1 GDR Directory: /home/willisc3/luna/CPOM/archive/SATS/RA/ERS1/REAPER/L2/CYCLE06
EN1 GDR Date: (datetime.datetime(1991, 8, 14, 0, 0), 1991, 8, 14)
EN1 GDR Files: [PosixPath('/home/willisc3/luna/CPOM/archive/SATS/RA/ERS1/REAPER/L2/CYCLE06/E1_REAP_ERS_ALT_2__19910814T151446_19910814T165319_RP01.NC'), PosixPath('/home/willisc3/luna/CPOM/archive/SATS/RA/ERS1/REAPER/L2/CYCLE06/E1_REAP_ERS_ALT_2__19910814T115916_19910814T133727_RP01.NC'), PosixPath('/home/willisc3/luna/CPOM/archive/SATS/RA/ERS1/REAPER/L2/CYCLE06/E1_REAP_ERS_ALT_2__19910815T060858_19910815T080607_RP01.NC'), PosixPath('/home/willisc3/luna/CPOM/archive/SATS/RA/ERS1/REAPER/L2/CYCLE06/E1_REAP_ERS_ALT_2__19910815T080545_19910815T094730_RP01.NC'), PosixPath('/home/willisc3/luna/CPOM/archive/SATS/RA/ERS1/REAPER/L2/CYCLE06/E1_REAP_ERS_ALT_2__19910814T083943_19910814T102100_RP01.NC'), PosixPath('/home/willisc3/luna/CPOM/archive/SATS/RA/ERS1/REAPER/L2/CYCLE06/E1_REAP_ERS_ALT_2__19910816T105332_199

In [None]:
min_dt_time = datetime.strptime("20160301", "%Y%m%d")
max_dt_time = datetime.strptime("20170101", "%Y%m%d")
s3a = AltDataset("s3a_l2", "s3a", "l2") 
s3a_dir = s3a.get_l2_dir(6)
s3a_files = s3a.get_files(min_dt_time, max_dt_time, 6)
# date = s3a.get_product_startdate_from_filename(s3a_files[0],s3a.yyyymm_str_fname_indices)

print(f"S3A GDR Directory: {s3a_dir}")
print(f"S3A GDR Date: {date}")
print(f"S3A GDR Files: {s3a_files}")

ValueError: time data 'N____20160703T2102' does not match format '%Y%m%d'

In [17]:
for file in Path("/home/willisc3/luna/CPOM/archive/SATS/RA/S3A/L2/SR_2_LAN_NT/cycle_006").rglob("*.SEN3"): 
    print(file)

In [None]:
en1_gdr.get_variables_from_file(Dataset(en1_files[0]), en1_gdr.latitude_param)  # Example of getting a variable from the first file
en1_gdr.get_variables_from_file(Dataset(en1_files[0]), en1_gdr.longitude_param)  # Example of getting a variable from the first file
en1_gdr.get_variables_from_file(Dataset(en1_files[0]), en1_gdr.latitude_nadir_param)  # Example of getting a variable from the first file
en1_gdr.get_variables_from_file(Dataset(en1_files[0]), en1_gdr.longitude_nadir_param)  # Example of getting a variable from the first file
en1_gdr.get_variables_from_file(Dataset(en1_files[0]), en1_gdr.lat_01_param)  # Example of getting a variable from the first file
en1_gdr.get_variables_from_file(Dataset(en1_files[0]), en1_gdr.lon_01_param)  # Example of getting a variable from the first file
en1_gdr.get_variables_from_file(Dataset(en1_files[0]), en1_gdr.power_param)  # Example of getting a variable from the first file

masked_array(
  data=[[--, 26.3, 14.58, ..., 12.47, 7.15, 10.22],
        [9.28, -108.34, -108.34, ..., 13.620000000000001, 10.06, 12.68],
        [17.080000000000002, 13.15, 17.05, ..., 5.16, 3.3200000000000003,
         5.15],
        ...,
        [16.3, 15.73, 15.870000000000001, ..., 14.71, 15.16, 14.71],
        [14.57, 14.73, 14.83, ..., 17.38, 17.69, 18.1],
        [18.39, 17.78, 18.1, ..., 20.64, 21.19, 21.22]],
  mask=[[ True, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],
  fill_value=32767)