In [None]:
# | default_exp ds.ts.io
# | export
from xarray import DataArray
import polars as pl
import pandas as pd
import numpy as np

In [None]:
# | exporti
def create_data_array(data: np.ndarray, time, cols: list | str, **kwargs):
    # convert time from non-nanoseconds into nanoseconds precision
    # https://github.com/pydata/xarray/issues/7493
    if time.dtype != "datetime64[ns]":
        time = time.astype("datetime64[ns]")

    if len(data.shape) == 1:
        coords = {"time": time}
    else:
        if isinstance(cols, str) and data.shape[1] > 1:
            v_dim = [f"{cols}_{i}" for i in range(data.shape[1])]
        else:
            v_dim = cols
        coords = {"time": time, "v_dim": v_dim}

    return DataArray(data, coords=coords, **kwargs)

In [None]:
# | exporti
def pddf2ts(df: pd.DataFrame, cols: list | str = None, time=None, **kwargs):
    cols = cols or df.columns.tolist()
    data = df[cols].to_numpy()
    times = df.index if time is None else df[time]
    return create_data_array(data, times, cols, **kwargs)

In [None]:
# | exporti
def get_columns(df: pl.DataFrame | pl.LazyFrame):
    if isinstance(df, pl.LazyFrame):
        return df.collect_schema().names()
    return df.columns


def pldf2ts(
    df: pl.DataFrame | pl.LazyFrame,
    cols: list | str = None,
    time="time",
    **kwargs,
):
    cols = cols or get_columns(df)
    if time in cols:
        cols.remove(time)
    if isinstance(df, pl.LazyFrame):
        df = df.collect()
    data = df[cols].to_numpy()
    times = df[time].to_numpy()
    return create_data_array(data, times, cols, **kwargs)

In [None]:
# | code-summary: Test for pldf2ts function
def test_pldf2ts():
    # Create a sample polars DataFrame
    time = np.arange("2005-02-01", "2005-02-06", dtype="datetime64[D]")
    value1 = np.random.rand(5)
    value2 = np.random.rand(5)
    value3 = np.random.rand(5, 2)
    df = pl.DataFrame(
        {"time": time, "value1": value1, "value2": value2, "value3": value3}
    )

    # Convert the polars DataFrame to a TimeSeries DataArray
    da = pldf2ts(df, cols=["value1", "value2"], time="time")
    assert da.shape == (5, 2)
    assert all(da.time == time)
    da = pldf2ts(df, cols="value1", time="time")
    assert da.shape == (5,)
    da = pldf2ts(df, cols="value3", time="time")
    assert da.shape == (5, 2)
    da = pldf2ts(df, cols=["value1", "value3"], time="time")
    assert da.shape == (5, 2)


test_pldf2ts()

In [None]:
# | export
def df2ts(
    df,
    cols: list | str = None,
    time: str = "time",
    **kwargs,
):
    """Convert DataFrame to TimeSeries"""
    if isinstance(df, (pl.DataFrame, pl.LazyFrame)):
        return pldf2ts(df, cols, time, **kwargs)
    elif isinstance(df, pd.DataFrame):
        return pddf2ts(df, cols, time, **kwargs)

In [None]:
# Test for df2ts function
# Create a sample pandas DataFrame
time = pd.date_range(start="2023-01-01", periods=5, freq="D")
sample_df = pd.DataFrame(
    {"time": time, "value1": np.random.rand(5), "value2": np.random.rand(5)}
)

# Convert the DataFrame to a TimeSeries DataArray
ts_data_array = df2ts(sample_df, cols=["value1", "value2"], time="time")

# Print the resulting DataArray
all(ts_data_array.time == time)

True