# Using transformers for TimeBasedCesnetDataset

### Import

In [1]:
import numpy as np
import logging

from cesnet_tszoo.utils.enums import AgreggationType, SourceType, TransformerType, DatasetType
from cesnet_tszoo.datasets import CESNET_TimeSeries24
from cesnet_tszoo.configs import TimeBasedConfig # Time based dataset MUST use TimeBasedConfig

from cesnet_tszoo.utils.transformer import Transformer # For creating custom Transformer

### Setting logger

In [2]:
logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s][%(name)s][%(levelname)s] - %(message)s")

### Preparing dataset

In [3]:
time_based_dataset = CESNET_TimeSeries24.get_dataset(data_root="/some_directory/", source_type=SourceType.IP_ADDRESSES_SAMPLE, aggregation=AgreggationType.AGG_10_MINUTES, dataset_type=DatasetType.TIME_BASED, display_details=True)

[2025-11-14 18:45:03,622][cesnet_dataset][INFO] - Dataset is time-based. Use cesnet_tszoo.configs.TimeBasedConfig



Dataset details:

    AgreggationType.AGG_10_MINUTES
        Time indices: range(0, 40297)
        Datetime: (datetime.datetime(2023, 10, 9, 0, 3, 49, tzinfo=datetime.timezone.utc), datetime.datetime(2024, 7, 14, 21, 50, 52, tzinfo=datetime.timezone.utc))

    SourceType.IP_ADDRESSES_SAMPLE
        Time series indices: [ 11  20 101 103 118 ... 2003134 2008461 2011839 2022235 2044888], Length=1000; use 'get_available_ts_indices' for full list
        Features with default values: {'n_flows': 0, 'n_packets': 0, 'n_bytes': 0, 'n_dest_ip': 0, 'n_dest_asn': 0, 'n_dest_ports': 0, 'tcp_udp_ratio_packets': 0.5, 'tcp_udp_ratio_bytes': 0.5, 'dir_ratio_packets': 0.5, 'dir_ratio_bytes': 0.5, 'avg_duration': 0, 'avg_ttl': 0}
        
        Additional data: ['ids_relationship', 'weekends_and_holidays']
        


### Transformers

- Transformers are implemented as class.
    - You can create your own or use built-in one.
- Transformer must implement `transform`.
- Transformer can implement `inverse_transform`.
- Transformers are applied after `default_values` and fillers took care of missing values (default preprocess order).
- To use transformers, train set must be implemented (unless transformers are already fitted).
- You can change used transformer later with `update_dataset_config_and_initialize` or `apply_transformer`.

#### Built-in

In [4]:
# Options

## Supports fit and partial_fit -> can be used when create_transformer_per_time_series=True or create_transformer_per_time_series=False
TransformerType.STANDARD_SCALER
TransformerType.L2_NORMALIZER
TransformerType.LOG_TRANSFORMER
TransformerType.MAX_ABS_SCALER
TransformerType.MIN_MAX_SCALER

## Supports only fit -> can be used create_transformer_per_time_series=True
TransformerType.POWER_TRANSFORMER
TransformerType.QUANTILE_TRANSFORMER
TransformerType.ROBUST_SCALER

<TransformerType.ROBUST_SCALER: 'robust_scaler'>

##### Transformer per time series

- One transformer per time series is created.
- Transformer must implement `fit` (unless transformers are already fitted).

In [5]:
config = TimeBasedConfig(ts_ids=[1367, 1368], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=TransformerType.MIN_MAX_SCALER, create_transformer_per_time_series=True, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:45:03,635][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:03,716][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:03,716][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 173.85it/s]
[2025-11-14 18:45:03,734][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_10_MINUTES
    Source: SourceType.IP_ADDRESSES_SAMPLE

    Time series
        Time series IDS: [1367 1368], Length=2
    Time periods
        Train time periods: range(0, 20149)
        Val time periods: range(20149, 28208)
        Test time periods: range(28208, 32237)
        All time periods: range(0, 32237)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Sliding window
        Sliding window size: None
        Sliding window prediction size: None
        Sliding window step size: 1
        Set shared size: 0
    Fillers
        Filler type: NoFiller
    Transformers
        Transformer type: MinMaxScaler
        Is transformer per Time series: True
        Are transformers premade: False
        Are premade transformers partial_f

In [6]:
time_based_dataset.get_train_df(workers=0).head(10)

Unnamed: 0,id_ip,id_time,n_flows,n_packets
0,1367.0,0.0,0.166391,0.008877
1,1367.0,1.0,0.19214,0.004397
2,1367.0,2.0,0.156302,0.006889
3,1367.0,3.0,0.148472,0.006699
4,1367.0,4.0,0.142448,0.01236
5,1367.0,5.0,0.16624,0.004723
6,1367.0,6.0,0.18973,0.00934
7,1367.0,7.0,0.192441,0.005277
8,1367.0,8.0,0.232646,0.021287
9,1367.0,9.0,0.213823,0.0309


In [7]:
time_based_dataset.get_transformers()

array([<cesnet_tszoo.utils.transformer.transformer.MinMaxScaler object at 0x0000021BA15B9E80>,
       <cesnet_tszoo.utils.transformer.transformer.MinMaxScaler object at 0x0000021BA15B9EB0>],
      dtype=object)

Or later with:

In [8]:
time_based_dataset.update_dataset_config_and_initialize(transform_with=TransformerType.MIN_MAX_SCALER, create_transformer_per_time_series=True, partial_fit_initialized_transformers="config", workers=0)
# Or
time_based_dataset.apply_transformer(transform_with=TransformerType.MIN_MAX_SCALER, create_transformer_per_time_series=True, partial_fit_initialized_transformers="config", workers=0)

[2025-11-14 18:45:03,767][cesnet_dataset][INFO] - Re-initialization is required.
[2025-11-14 18:45:03,844][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:03,845][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 199.40it/s]
[2025-11-14 18:45:03,860][cesnet_dataset][INFO] - Config initialized successfully.
[2025-11-14 18:45:03,861][cesnet_dataset][INFO] - Configuration has been changed successfuly.
[2025-11-14 18:45:03,862][cesnet_dataset][INFO] - Re-initialization is required.
[2025-11-14 18:45:03,942][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:03,943][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 199.66it/s]
[2025-11-14 18:45:03,957][cesnet_dataset][INFO] - Config initialized successfully.
[2025-11-14 18:45:03,958][cesnet_dataset][INFO] - Configuration has been changed successfuly.
[2025-

##### One transformer for every time series

- One transformer is used for all time series.
- Transformer must implement `partial_fit` (unless transformer is already fitted).

In [9]:
config = TimeBasedConfig(ts_ids=[1367, 1368], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=TransformerType.MIN_MAX_SCALER, create_transformer_per_time_series=False, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:45:03,963][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:04,043][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:04,043][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 249.61it/s]
[2025-11-14 18:45:04,056][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_10_MINUTES
    Source: SourceType.IP_ADDRESSES_SAMPLE

    Time series
        Time series IDS: [1367 1368], Length=2
    Time periods
        Train time periods: range(0, 20149)
        Val time periods: range(20149, 28208)
        Test time periods: range(28208, 32237)
        All time periods: range(0, 32237)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Sliding window
        Sliding window size: None
        Sliding window prediction size: None
        Sliding window step size: 1
        Set shared size: 0
    Fillers
        Filler type: NoFiller
    Transformers
        Transformer type: MinMaxScaler
        Is transformer per Time series: False
        Are transformers premade: False
        Are premade transformers partial_

In [10]:
time_based_dataset.get_train_df(workers=0).head(10)

Unnamed: 0,id_ip,id_time,n_flows,n_packets
0,1367.0,0.0,0.044306,0.003506
1,1367.0,1.0,0.051163,0.001737
2,1367.0,2.0,0.04162,0.002721
3,1367.0,3.0,0.039535,0.002645
4,1367.0,4.0,0.037931,0.004881
5,1367.0,5.0,0.044266,0.001865
6,1367.0,6.0,0.050521,0.003689
7,1367.0,7.0,0.051243,0.002084
8,1367.0,8.0,0.061949,0.008406
9,1367.0,9.0,0.056937,0.012202


In [11]:
time_based_dataset.get_transformers()

<cesnet_tszoo.utils.transformer.transformer.MinMaxScaler at 0x21ba15d1f70>

#### Custom

- You can create your own custom transformer. It is recommended to derive from Transformer base class.
- Take care that custom transformer should be imported from other file when while using this library in Jupyter notebook. When not importing from other file/s use workers == 0.

In [12]:
class CustomTransformer(Transformer):
    def __init__(self):
        super().__init__()
        
        self.max = None
        self.min = None
    
    def transform(self, data):
        return (data - self.min) / (self.max - self.min)
    
    def fit(self, data):
        self.partial_fit(data)
    
    def partial_fit(self, data):
        
        if self.max is None and self.min is None:
            self.max = np.max(data, axis=0)
            self.min = np.min(data, axis=0)
            return
        
        temp_max = np.max(data, axis=0)
        temp = np.vstack((self.max, temp_max)) 
        self.max = np.max(temp, axis=0)
        
        temp_min = np.min(data, axis=0)
        temp = np.vstack((self.min, temp_min)) 
        self.min = np.min(temp, axis=0)   
        
    def inverse_transform(self, transformed_data):
        return transformed_data * (self.max - self.min) + self.min              

##### Transformer per time series

- One transformer per time series is created.
- Transformer must implement `fit` (unless transformers are already fitted).

In [13]:
config = TimeBasedConfig(ts_ids=[1367, 1368], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=CustomTransformer, create_transformer_per_time_series=True, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:45:04,094][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:04,174][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:04,174][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 235.09it/s]
[2025-11-14 18:45:04,187][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_10_MINUTES
    Source: SourceType.IP_ADDRESSES_SAMPLE

    Time series
        Time series IDS: [1367 1368], Length=2
    Time periods
        Train time periods: range(0, 20149)
        Val time periods: range(20149, 28208)
        Test time periods: range(28208, 32237)
        All time periods: range(0, 32237)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Sliding window
        Sliding window size: None
        Sliding window prediction size: None
        Sliding window step size: 1
        Set shared size: 0
    Fillers
        Filler type: NoFiller
    Transformers
        Transformer type: CustomTransformer (Custom)
        Is transformer per Time series: True
        Are transformers premade: False
        Are premade transfor

In [14]:
time_based_dataset.get_train_df(workers=0).head(10)

Unnamed: 0,id_ip,id_time,n_flows,n_packets
0,1367.0,0.0,0.166391,0.008877
1,1367.0,1.0,0.19214,0.004397
2,1367.0,2.0,0.156302,0.006889
3,1367.0,3.0,0.148472,0.006699
4,1367.0,4.0,0.142448,0.01236
5,1367.0,5.0,0.16624,0.004723
6,1367.0,6.0,0.18973,0.00934
7,1367.0,7.0,0.192441,0.005277
8,1367.0,8.0,0.232646,0.021287
9,1367.0,9.0,0.213823,0.0309


In [15]:
time_based_dataset.get_transformers()

array([<__main__.CustomTransformer object at 0x0000021BA17AD250>,
       <__main__.CustomTransformer object at 0x0000021BA17AD130>],
      dtype=object)

Or later with:

In [16]:
time_based_dataset.update_dataset_config_and_initialize(transform_with=CustomTransformer, create_transformer_per_time_series=True, partial_fit_initialized_transformers="config", workers=0)
# Or
time_based_dataset.apply_transformer(transform_with=CustomTransformer, create_transformer_per_time_series=True, partial_fit_initialized_transformers="config", workers=0)

[2025-11-14 18:45:04,216][cesnet_dataset][INFO] - Re-initialization is required.
[2025-11-14 18:45:04,294][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:04,295][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 267.15it/s]
[2025-11-14 18:45:04,307][cesnet_dataset][INFO] - Config initialized successfully.
[2025-11-14 18:45:04,308][cesnet_dataset][INFO] - Configuration has been changed successfuly.
[2025-11-14 18:45:04,309][cesnet_dataset][INFO] - Re-initialization is required.
[2025-11-14 18:45:04,438][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:04,438][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 250.00it/s]
[2025-11-14 18:45:04,450][cesnet_dataset][INFO] - Config initialized successfully.
[2025-11-14 18:45:04,451][cesnet_dataset][INFO] - Configuration has been changed successfuly.
[2025-

##### One transformer for every time series

- One transformer is used for all time series.
- Transformer must implement `partial_fit` (unless transformer is already fitted).

In [17]:
config = TimeBasedConfig(ts_ids=[1367, 1368], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=CustomTransformer, create_transformer_per_time_series=False, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:45:04,456][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:04,536][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:04,537][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 285.75it/s]
[2025-11-14 18:45:04,549][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_10_MINUTES
    Source: SourceType.IP_ADDRESSES_SAMPLE

    Time series
        Time series IDS: [1367 1368], Length=2
    Time periods
        Train time periods: range(0, 20149)
        Val time periods: range(20149, 28208)
        Test time periods: range(28208, 32237)
        All time periods: range(0, 32237)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Sliding window
        Sliding window size: None
        Sliding window prediction size: None
        Sliding window step size: 1
        Set shared size: 0
    Fillers
        Filler type: NoFiller
    Transformers
        Transformer type: CustomTransformer (Custom)
        Is transformer per Time series: False
        Are transformers premade: False
        Are premade transfo

In [18]:
time_based_dataset.get_train_df(workers=0).head(10)

Unnamed: 0,id_ip,id_time,n_flows,n_packets
0,1367.0,0.0,0.044306,0.003506
1,1367.0,1.0,0.051163,0.001737
2,1367.0,2.0,0.04162,0.002721
3,1367.0,3.0,0.039535,0.002645
4,1367.0,4.0,0.037931,0.004881
5,1367.0,5.0,0.044266,0.001865
6,1367.0,6.0,0.050521,0.003689
7,1367.0,7.0,0.051243,0.002084
8,1367.0,8.0,0.061949,0.008406
9,1367.0,9.0,0.056937,0.012202


In [19]:
time_based_dataset.get_transformers()

<__main__.CustomTransformer at 0x21ba154ef30>

#### Using already fitted transformer/s

- When `partial_fit_initialized_transformer` is False (default value), transformer/s have no requirement for `partial_fit` nor for train set.

In [20]:
config = TimeBasedConfig(ts_ids=[1367, 1368], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=CustomTransformer, create_transformer_per_time_series=True, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details=None, workers=0)

list_of_prefitted_transformers = time_based_dataset.get_transformers()

[2025-11-14 18:45:04,579][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:04,660][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:04,660][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 266.47it/s]
[2025-11-14 18:45:04,673][cesnet_dataset][INFO] - Config initialized successfully.


In [21]:
config = TimeBasedConfig(ts_ids=[1367, 1368], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=CustomTransformer, create_transformer_per_time_series=False, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details=None, workers=0)

one_prefitted_transformer = time_based_dataset.get_transformers()

[2025-11-14 18:45:04,678][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:04,758][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:04,759][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 285.84it/s]
[2025-11-14 18:45:04,771][cesnet_dataset][INFO] - Config initialized successfully.


##### Transformer per time series

- One transformer per time series in `ts_ids`.
- All transformers in list must be of the same type.
- Must provide list of transformers with length equal to time series in `ts_ids`.

In [22]:
config = TimeBasedConfig(ts_ids=[103, 118], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=list_of_prefitted_transformers, create_transformer_per_time_series=True, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details=None, workers=0)

[2025-11-14 18:45:04,777][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:04,855][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:04,855][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 266.31it/s]
[2025-11-14 18:45:04,867][cesnet_dataset][INFO] - Config initialized successfully.


In [23]:
time_based_dataset.get_train_df(workers=0).head(10)

Unnamed: 0,id_ip,id_time,n_flows,n_packets
0,103.0,0.0,0.092305,0.019907
1,103.0,1.0,0.09095,0.012769
2,103.0,2.0,0.07785,0.032889
3,103.0,3.0,0.079958,0.009592
4,103.0,4.0,0.102997,0.011615
5,103.0,5.0,0.106309,0.029562
6,103.0,6.0,0.107966,0.037642
7,103.0,7.0,0.15585,0.012679
8,103.0,8.0,0.113085,0.025278
9,103.0,9.0,0.128445,0.03264


Below you can see how transformers work even without train set.

In [24]:
config = TimeBasedConfig(ts_ids=[103, 118], train_time_period=None, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=list_of_prefitted_transformers, create_transformer_per_time_series=True, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details=None, workers=0)

[2025-11-14 18:45:04,891][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:04,942][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:04,943][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 1000.07it/s]
[2025-11-14 18:45:04,948][cesnet_dataset][INFO] - Config initialized successfully.


In [25]:
time_based_dataset.get_val_df(workers=0)

Unnamed: 0,id_ip,id_time,n_flows,n_packets
0,103.0,0.0,0.092305,0.019907
1,103.0,1.0,0.090950,0.012769
2,103.0,2.0,0.077850,0.032889
3,103.0,3.0,0.079958,0.009592
4,103.0,4.0,0.102997,0.011615
...,...,...,...,...
16113,118.0,8054.0,0.005259,0.001669
16114,118.0,8055.0,0.005178,0.007140
16115,118.0,8056.0,0.001254,0.004929
16116,118.0,8057.0,0.000647,0.000593


##### One transformer for every time series

- One transformer is used for all time series.
- Must provide one transformer.

In [26]:
config = TimeBasedConfig(ts_ids=[103, 118], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=one_prefitted_transformer, create_transformer_per_time_series=False, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details=None, workers=0)

[2025-11-14 18:45:04,971][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:05,049][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:05,050][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 363.51it/s]
[2025-11-14 18:45:05,061][cesnet_dataset][INFO] - Config initialized successfully.


In [27]:
time_based_dataset.get_train_df(workers=0).head(10)

Unnamed: 0,id_ip,id_time,n_flows,n_packets
0,103.0,0.0,0.024579,0.007862
1,103.0,1.0,0.024218,0.005043
2,103.0,2.0,0.02073,0.012988
3,103.0,3.0,0.021291,0.003788
4,103.0,4.0,0.027426,0.004587
5,103.0,5.0,0.028308,0.011674
6,103.0,6.0,0.028749,0.014865
7,103.0,7.0,0.0415,0.005007
8,103.0,8.0,0.030112,0.009983
9,103.0,9.0,0.034202,0.01289


Below you can see how transformer works even without train set.

In [28]:
config = TimeBasedConfig(ts_ids=[103, 118], train_time_period=None, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=one_prefitted_transformer, create_transformer_per_time_series=False, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details=None, workers=0)

[2025-11-14 18:45:05,085][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:05,137][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:05,138][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 1000.19it/s]
[2025-11-14 18:45:05,143][cesnet_dataset][INFO] - Config initialized successfully.


In [29]:
time_based_dataset.get_val_df(workers=0)

Unnamed: 0,id_ip,id_time,n_flows,n_packets
0,103.0,0.0,0.024579,0.007862
1,103.0,1.0,0.024218,0.005043
2,103.0,2.0,0.020730,0.012988
3,103.0,3.0,0.021291,0.003788
4,103.0,4.0,0.027426,0.004587
...,...,...,...,...
16113,118.0,8054.0,0.014034,0.002224
16114,118.0,8055.0,0.013953,0.007691
16115,118.0,8056.0,0.010064,0.005482
16116,118.0,8057.0,0.009463,0.001148


##### Partial fitting on train set

Makes already fitted transformer/s to be fitted on new train set too. Must implement `partial_fit`.

In [30]:
config = TimeBasedConfig(ts_ids=[103, 118], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=one_prefitted_transformer, create_transformer_per_time_series=False, partial_fit_initialized_transformers=True, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details=None, workers=0)

[2025-11-14 18:45:05,167][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:05,245][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:05,246][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 264.25it/s]
[2025-11-14 18:45:05,259][cesnet_dataset][INFO] - Config initialized successfully.


In [31]:
time_based_dataset.get_train_df(workers=0).head(10)

Unnamed: 0,id_ip,id_time,n_flows,n_packets
0,103.0,0.0,0.021068,0.007761
1,103.0,1.0,0.020778,0.004986
2,103.0,2.0,0.017976,0.012806
3,103.0,3.0,0.018427,0.003752
4,103.0,4.0,0.023355,0.004538
5,103.0,5.0,0.024064,0.011513
6,103.0,6.0,0.024419,0.014653
7,103.0,7.0,0.034663,0.004951
8,103.0,8.0,0.025514,0.009848
9,103.0,9.0,0.0288,0.012709


In [32]:
config = TimeBasedConfig(ts_ids=[103, 118], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=list_of_prefitted_transformers, create_transformer_per_time_series=True, partial_fit_initialized_transformers=True, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details=None, workers=0)

[2025-11-14 18:45:05,283][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:05,363][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:05,364][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 266.47it/s]
[2025-11-14 18:45:05,376][cesnet_dataset][INFO] - Config initialized successfully.


In [33]:
time_based_dataset.get_train_df(workers=0).head(10)

Unnamed: 0,id_ip,id_time,n_flows,n_packets
0,103.0,0.0,0.019837,0.007737
1,103.0,1.0,0.019546,0.004963
2,103.0,2.0,0.01674,0.012783
3,103.0,3.0,0.017192,0.003728
4,103.0,4.0,0.022127,0.004514
5,103.0,5.0,0.022836,0.01149
6,103.0,6.0,0.023191,0.01463
7,103.0,7.0,0.033448,0.004928
8,103.0,8.0,0.024288,0.009825
9,103.0,9.0,0.027578,0.012686


#### Getting pre-transform value

- You can use `inverse_transform` for transformers you can get via `get_transformers()` to get pre-transform value.
- `inverse_transformer` expects input as numpy array of shape `(times, features)` where features do not contain ids.

##### One transformer for every time series

In [34]:
config = TimeBasedConfig(ts_ids=[1367, 1368], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=TransformerType.MIN_MAX_SCALER, create_transformer_per_time_series=False, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:45:05,402][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:05,482][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:05,482][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 234.86it/s]
[2025-11-14 18:45:05,496][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_10_MINUTES
    Source: SourceType.IP_ADDRESSES_SAMPLE

    Time series
        Time series IDS: [1367 1368], Length=2
    Time periods
        Train time periods: range(0, 20149)
        Val time periods: range(20149, 28208)
        Test time periods: range(28208, 32237)
        All time periods: range(0, 32237)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Sliding window
        Sliding window size: None
        Sliding window prediction size: None
        Sliding window step size: 1
        Set shared size: 0
    Fillers
        Filler type: NoFiller
    Transformers
        Transformer type: MinMaxScaler
        Is transformer per Time series: False
        Are transformers premade: False
        Are premade transformers partial_

In [35]:
transformer = time_based_dataset.get_transformers()

data = None
for batch in time_based_dataset.get_train_dataloader():
    data = batch[0, :, 2:]
    break

transformer.inverse_transform(data)[:10]

[2025-11-14 18:45:05,501][cesnet_dataset][INFO] - Created new cached train_dataloader.


array([[  1146.,  30133.],
       [  1317.,  15029.],
       [  1079.,  23431.],
       [  1027.,  22788.],
       [   987.,  41873.],
       [  1145.,  16126.],
       [  1301.,  31694.],
       [  1319.,  17996.],
       [  1586.,  71972.],
       [  1461., 104379.]])

##### Transformer per time series

In [36]:
config = TimeBasedConfig(ts_ids=[1367, 1368], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=TransformerType.MIN_MAX_SCALER, create_transformer_per_time_series=True, random_state=111)

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:45:09,354][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:09,975][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:09,976][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 244.34it/s]
[2025-11-14 18:45:09,989][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_10_MINUTES
    Source: SourceType.IP_ADDRESSES_SAMPLE

    Time series
        Time series IDS: [1367 1368], Length=2
    Time periods
        Train time periods: range(0, 20149)
        Val time periods: range(20149, 28208)
        Test time periods: range(28208, 32237)
        All time periods: range(0, 32237)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Sliding window
        Sliding window size: None
        Sliding window prediction size: None
        Sliding window step size: 1
        Set shared size: 0
    Fillers
        Filler type: NoFiller
    Transformers
        Transformer type: MinMaxScaler
        Is transformer per Time series: True
        Are transformers premade: False
        Are premade transformers partial_f

In [37]:
transformer = time_based_dataset.get_transformers()[0]

data = None
for batch in time_based_dataset.get_train_dataloader():
    data = batch[0, :, 2:]
    break

transformer.inverse_transform(data)[:10]

[2025-11-14 18:45:09,996][cesnet_dataset][INFO] - Created new cached train_dataloader.


array([[  1146.,  30133.],
       [  1317.,  15029.],
       [  1079.,  23431.],
       [  1027.,  22788.],
       [   987.,  41873.],
       [  1145.,  16126.],
       [  1301.,  31694.],
       [  1319.,  17996.],
       [  1586.,  71972.],
       [  1461., 104379.]])

#### Changing when is transformer applied

- You can change when is a transformer applied with `preprocess_order` parameter

In [38]:
config = TimeBasedConfig(ts_ids=[1367, 1368], train_time_period=0.5, val_time_period=0.2, test_time_period=0.1, features_to_take=['n_flows', 'n_packets'],
                         transform_with=TransformerType.MIN_MAX_SCALER, create_transformer_per_time_series=True, random_state=111, preprocess_order=["handling_anomalies", "filling_gaps", "transforming"])

time_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:45:13,823][time_config][INFO] - Quick validation succeeded.
[2025-11-14 18:45:14,468][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:14,468][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 234.99it/s]
[2025-11-14 18:45:14,481][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_10_MINUTES
    Source: SourceType.IP_ADDRESSES_SAMPLE

    Time series
        Time series IDS: [1367 1368], Length=2
    Time periods
        Train time periods: range(0, 20149)
        Val time periods: range(20149, 28208)
        Test time periods: range(28208, 32237)
        All time periods: range(0, 32237)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Sliding window
        Sliding window size: None
        Sliding window prediction size: None
        Sliding window step size: 1
        Set shared size: 0
    Fillers
        Filler type: NoFiller
    Transformers
        Transformer type: MinMaxScaler
        Is transformer per Time series: True
        Are transformers premade: False
        Are premade transformers partial_f

Or later with:

In [39]:
time_based_dataset.update_dataset_config_and_initialize(preprocess_order=["handling_anomalies", "transforming", "filling_gaps"], workers=0)
# Or
time_based_dataset.set_preprocess_order(preprocess_order=["handling_anomalies", "transforming", "filling_gaps"], workers=0)

[2025-11-14 18:45:14,487][cesnet_dataset][INFO] - Re-initialization is required.
[2025-11-14 18:45:14,567][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:14,567][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 249.97it/s]
[2025-11-14 18:45:14,581][cesnet_dataset][INFO] - Config initialized successfully.
[2025-11-14 18:45:14,582][cesnet_dataset][INFO] - Configuration has been changed successfuly.
[2025-11-14 18:45:14,583][cesnet_dataset][INFO] - Re-initialization is required.
[2025-11-14 18:45:14,667][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time series.
[2025-11-14 18:45:14,667][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 2/2 [00:00<00:00, 235.07it/s]
[2025-11-14 18:45:14,680][cesnet_dataset][INFO] - Config initialized successfully.
[2025-11-14 18:45:14,681][cesnet_dataset][INFO] - Configuration has been changed successfuly.
[2025-