In [1]:
from transfer_learning_publication.data import CaravanDataSource
from transfer_learning_publication.transforms import PipelineBuilder, Log, ZScore
from transfer_learning_publication.containers import TimeSeriesCollection

In [2]:
caravan = CaravanDataSource(
    base_path="/Users/cooper/Desktop/CARAVAN_CLEAN/train",
    region="tajikkyrgyz"
)

basins = caravan.list_gauge_ids()

ts_data = caravan.get_timeseries(
    gauge_ids=basins[:2],
    columns=["streamflow", "total_precipitation_sum"],
)

static_data = caravan.get_static_attributes(
    gauge_ids=basins[:2],
    columns=["area", "high_prec_freq"],
)

In [3]:
static_collection = caravan.to_time_series_collection(ts_data)
static_collection.feature_indices

{'streamflow': 0, 'total_precipitation_sum': 1}

In [5]:
preprocessing_pipelines = (
    PipelineBuilder(group_identifier="gauge_id")
    .add_per_basin(Log(), columns=["streamflow"])
    .add_global(ZScore(), columns=["streamflow", "total_precipitation_sum"])
    .build()
)

data_transformed = preprocessing_pipelines.fit_transform(ts_data.collect())
data_inv = preprocessing_pipelines.inverse_transform(data_transformed)

In [7]:
ts_data.collect().head(), data_transformed.head(), data_inv.head()

(shape: (5, 5)
 ┌────────────┬────────────┬─────────────┬───────────────────┬─────────────────────────┐
 │ streamflow ┆ date       ┆ REGION_NAME ┆ gauge_id          ┆ total_precipitation_sum │
 │ ---        ┆ ---        ┆ ---         ┆ ---               ┆ ---                     │
 │ f32        ┆ date       ┆ str         ┆ str               ┆ f32                     │
 ╞════════════╪════════════╪═════════════╪═══════════════════╪═════════════════════════╡
 │ 0.63       ┆ 2000-01-02 ┆ tajikkyrgyz ┆ tajikkyrgyz_15013 ┆ 5.97                    │
 │ 0.63       ┆ 2000-01-03 ┆ tajikkyrgyz ┆ tajikkyrgyz_15013 ┆ 20.43                   │
 │ 0.63       ┆ 2000-01-04 ┆ tajikkyrgyz ┆ tajikkyrgyz_15013 ┆ 20.200001               │
 │ 0.63       ┆ 2000-01-05 ┆ tajikkyrgyz ┆ tajikkyrgyz_15013 ┆ 1.24                    │
 │ 0.63       ┆ 2000-01-06 ┆ tajikkyrgyz ┆ tajikkyrgyz_15013 ┆ 0.04                    │
 └────────────┴────────────┴─────────────┴───────────────────┴─────────────────────────┘,
 shap

In [None]:
caravan.write_timeseries(
    data_transformed,
    "/Users/cooper/Desktop/transfer-learning-publication/data",
    True
)