In [3]:
#| default_exp utils/speasy
#| export
import speasy as spz
import polars as pl

from pydantic import BaseModel, model_validator

from speasy.core.dataprovider import DataProvider
from speasy import SpeasyVariable
from speasy.core.inventory import DatasetIndex, ParameterIndex

In [197]:
#| export
def spzvar2pldf(var: SpeasyVariable):
    # see SpeasyVariable.to_dataframe
    var = var.replace_fillval_by_nan()
    return pl.DataFrame(var.values, schema=var.columns).with_columns(
        time = pl.Series(var.time)
    ).lazy() # Need to `lazy` last or ShapeError: unable to add a column of length xxxx to a DataFrame of height yyyy
    
    
def spzvars2pldf(vars: list[SpeasyVariable]):
    # join all dataframes into a single one on the time column
    if len(vars) == 1:
        return spzvar2pldf(vars[0])
    return pl.concat([spzvar2pldf(var) for var in vars], how='align')

In [202]:
# | export
def get_provider(v: str) -> DataProvider:
    if v == "cda":
        return spz.cda
    else:
        return spz.amda


def get_dataset_index(v: str, provider: str) -> DatasetIndex:
    return get_provider(provider).flat_inventory.datasets[v]


class Variables(BaseModel):
    class Config:
        arbitrary_types_allowed = True  # RuntimeError: no validator found for <class 'speasy.products.variable.SpeasyVariable'>, see  `arbitrary_types_allowed` in Config

    products: list[str | ParameterIndex] = None
    timerange: list[str] = None

    provider: str = "cda"
    dataset: str = None
    parameters: list[str] = None

    data: list[SpeasyVariable] = None
    
    _disable_proxy: bool = True

    # initize products from provider and dataset if not provided
    @model_validator(mode="after")
    def check_products(self):
        if self.products is None and self.dataset:
            if self.parameters:
                self.products = [
                    f"{self.provider}/{self.dataset}/{var}" for var in self.parameters
                ]

            else:
                dataset_index = get_dataset_index(self.dataset, self.provider)
                self.products = [
                    member
                    for member in vars(dataset_index).values()
                    if isinstance(member, ParameterIndex)
                ]
                self.parameters = [member.spz_name() for member in self.products]

    def get_data(self):
        # return Variables with data set
        self.data = spz.get_data(self.products, self.timerange, disable_proxy=self._disable_proxy)
        return self

    def to_polars(self):
        return spzvars2pldf(self.data)

    def preview(self):
        return self.to_polars().head().collect()

In [53]:
def data_provider_summary(data_provider: DataProvider = spz.cda):
    # show the name of the data_provider, the number of datasets, parameters and catalogs

    inventory = data_provider.flat_inventory
    print("Data Provider:", data_provider.provider_name)
    print("Datasets:", len(inventory.datasets))
    print("Parameters:", len(inventory.parameters))
    print("Catalogs:", len(inventory.catalogs))

# data_provider_summary(spz.cda)
# data_provider_summary(spz.amda)
# data_provider_summary(spz.csa)

Data Provider: cda
Datasets: 2608
Parameters: 58510
Catalogs: 0
Data Provider: amda
Datasets: 1074
Parameters: 5397
Catalogs: 24
Data Provider: csa
Datasets: 912
Parameters: 1993
Catalogs: 0


In [None]:
from fastcore.utils import patch
from speasy.products import SpeasyVariable
from humanize import naturalsize

In [None]:
@patch
def preview(self: SpeasyVariable):
    print("===========================================")
    print(f"Name:         {self.name}")
    print(f"Columns:      {self.columns}")
    print(f"Values Unit:  {self.unit}")
    print(f"Memory usage: {naturalsize(self.nbytes)}")
    print(f"Axes Labels:  {self.axes_labels}")
    print("-------------------------------------------")
    print(f"Meta-data:    {self.meta}")
    print("-------------------------------------------")
    print(f"Time Axis:    {self.time[:3]}")
    print("-------------------------------------------")
    print(f"Values:       {self.values[:3]}")
    print("===========================================")
