In [1]:
from transfer_learning_publication.cleaners import trim_to_column, clip_columns, fill_na_columns
import polars as pl
import numpy as np
from datetime import datetime, timedelta

In [3]:
# Create realistic time series data
dates = [datetime(2024, 1, 1) + timedelta(days=i) for i in range(15)]

time_series_df = pl.LazyFrame({
    "date": dates,
    "temperature": [
        None, None,  # No temp data at start
        18.5, 19.2, 20.1, 21.3, 22.0, 23.5, 24.1, 22.8,
        21.5, 20.0, 19.5, 18.0, 17.5
    ],
    "precipitation": [
        0.0, 0.0, 0.0,  # Precip data starts earlier
        5.2, 3.1, 0.0, 12.5, 8.3, 2.1, 0.0,
        1.5, 3.2, 4.5, 6.0, 8.5
    ],
    "streamflow": [
        None, None, None, None,  # Streamflow starts late
        float('nan'),  # Sensor error
        120.5, 135.2, None, 142.3, 138.6, 125.4,
        118.3, None, float('nan'),  # Missing data at end
        None
    ]
})

print("\n📊 Original Data:")
print(time_series_df.collect())

# Apply trimming based on streamflow
trimmed_ts = trim_to_column(time_series_df, "streamflow")
clipped_ts = clip_columns(trimmed_ts, ["streamflow", "temperature"], min_value=130, max_value=140)
filled_ts = fill_na_columns(clipped_ts, ["streamflow"], fill_value=0, add_binary_flag=True)

print("\n📊 Trimmed, Clipped, and Filled Data:")
print(filled_ts.collect())


📊 Original Data:
shape: (15, 4)
┌─────────────────────┬─────────────┬───────────────┬────────────┐
│ date                ┆ temperature ┆ precipitation ┆ streamflow │
│ ---                 ┆ ---         ┆ ---           ┆ ---        │
│ datetime[μs]        ┆ f64         ┆ f64           ┆ f64        │
╞═════════════════════╪═════════════╪═══════════════╪════════════╡
│ 2024-01-01 00:00:00 ┆ null        ┆ 0.0           ┆ null       │
│ 2024-01-02 00:00:00 ┆ null        ┆ 0.0           ┆ null       │
│ 2024-01-03 00:00:00 ┆ 18.5        ┆ 0.0           ┆ null       │
│ 2024-01-04 00:00:00 ┆ 19.2        ┆ 5.2           ┆ null       │
│ 2024-01-05 00:00:00 ┆ 20.1        ┆ 3.1           ┆ NaN        │
│ …                   ┆ …           ┆ …             ┆ …          │
│ 2024-01-11 00:00:00 ┆ 21.5        ┆ 1.5           ┆ 125.4      │
│ 2024-01-12 00:00:00 ┆ 20.0        ┆ 3.2           ┆ 118.3      │
│ 2024-01-13 00:00:00 ┆ 19.5        ┆ 4.5           ┆ null       │
│ 2024-01-14 00:00:00 ┆ 18.0 