# Choosing data for SeriesBasedCesnetDataset

### Import

In [1]:
import logging
from datetime import datetime

from cesnet_tszoo.utils.enums import AgreggationType, SourceType, TimeFormat, DatasetType
from cesnet_tszoo.datasets import CESNET_TimeSeries24
from cesnet_tszoo.configs import SeriesBasedConfig # Series based dataset MUST use SeriesBasedConfig

### Setting logger

In [2]:
logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s][%(name)s][%(levelname)s] - %(message)s")

### Preparing dataset

In [3]:
series_based_dataset = CESNET_TimeSeries24.get_dataset(data_root="/some_directory/", source_type=SourceType.INSTITUTION_SUBNETS, aggregation=AgreggationType.AGG_1_HOUR, dataset_type=DatasetType.SERIES_BASED, display_details=True)

[2025-11-14 18:37:28,510][cesnet_dataset][INFO] - Dataset is series-based. Use cesnet_tszoo.configs.SeriesBasedConfig



Dataset details:

    AgreggationType.AGG_1_HOUR
        Time indices: range(0, 6717)
        Datetime: (datetime.datetime(2023, 10, 9, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2024, 7, 14, 21, 0, tzinfo=datetime.timezone.utc))

    SourceType.INSTITUTION_SUBNETS
        Time series indices: [0 1 2 3 4 ... 543 544 545 546 547], Length=548; use 'get_available_ts_indices' for full list
        Features with default values: {'n_flows': 0, 'n_packets': 0, 'n_bytes': 0, 'tcp_udp_ratio_packets': 0.5, 'tcp_udp_ratio_bytes': 0.5, 'dir_ratio_packets': 0.5, 'dir_ratio_bytes': 0.5, 'avg_duration': 0, 'avg_ttl': 0, 'sum_n_dest_asn': 0, 'avg_n_dest_asn': 0, 'std_n_dest_asn': 0, 'sum_n_dest_ports': 0, 'avg_n_dest_ports': 0, 'std_n_dest_ports': 0, 'sum_n_dest_ip': 0, 'avg_n_dest_ip': 0, 'std_n_dest_ip': 0}
        
        Additional data: ['ids_relationship', 'weekends_and_holidays']
        


### Selecting time period

- `time_period` sets time period for all sets (used time series).

#### Setting time period as "all"

- Sets time period for time series as a whole time period from dataset.

In [4]:
config = SeriesBasedConfig(time_period="all")
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:28,515][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:28,516][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-11-14 18:37:28,528][cesnet_dataset][INFO] - Updating config for all set.
100%|██████████| 548/548 [00:00<00:00, 803.38it/s]
[2025-11-14 18:37:29,230][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:29,230][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 6718)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### Setting time period with time indices

- Sets time period for time series as range of time indices.

In [5]:
config = SeriesBasedConfig(time_period=range(0, 2000))
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:29,240][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:29,242][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-11-14 18:37:29,250][cesnet_dataset][INFO] - Updating config for all set.
100%|██████████| 548/548 [00:00<00:00, 2008.01it/s]
[2025-11-14 18:37:29,542][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:29,542][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 2000)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### Setting time period with datetime

- Sets time period for time series with tuple of datetime objects.
- Datetime objects are expected to be of UTC.

In [6]:
config = SeriesBasedConfig(time_period=(datetime(2023, 10, 9, 0), datetime(2023, 11, 9, 23)))
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:29,552][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:29,554][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-11-14 18:37:29,558][cesnet_dataset][INFO] - Updating config for all set.
100%|██████████| 548/548 [00:00<00:00, 2320.18it/s]
[2025-11-14 18:37:29,812][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:29,812][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 767)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: N

#### Setting time period with percentage

- Sets time period for time series as a percentage of whole time period from dataset.
- Always starts from first time.
- Must be: 0 < `time_period` <= 1.

In [7]:
config = SeriesBasedConfig(time_period=0.5)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:29,875][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:29,877][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-11-14 18:37:29,885][cesnet_dataset][INFO] - Updating config for all set.
100%|██████████| 548/548 [00:00<00:00, 1535.19it/s]
[2025-11-14 18:37:30,261][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:30,262][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

### Creating train/val/test sets

- Sets how many time series will be in each set.
- You can leave any set value set as None.
- Can use `nan_threshold` to set how many nan values will be tolerated.
    - `nan_threshold` = 1.0, means that time series can be completely empty.
    - is applied after sets.

#### Setting sets with count of time series

- Sets time series in set with count.
- Each set will contain unique time series.
- Count must be greater than zero.
- Total sum of time series in sets must be smaller than number of time series in dataset.
- Is affected by `random_state`.
    - When `random_state` is set, sets will contain same time series.

In [8]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, random_state=None, nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:30,273][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:30,279][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-14 18:37:30,280][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 54/54 [00:00<00:00, 1294.18it/s]
[2025-11-14 18:37:30,335][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 25/25 [00:00<00:00, 1189.77it/s]
[2025-11-14 18:37:30,364][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 10/10 [00:00<00:00, 1175.43it/s]
[2025-11-14 18:37:30,375][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:30,375][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [218 450  10 529 373 ... 312  68   3 121 264], Length=54
        Val time series IDS: [236 495 536 424 142 ... 530 174 345  96 331], Length=25
        Test time series IDS [144 411 492 214 406  74 192  89 240 378], Length=10
        All time series IDS [218 450  10 529 373 ...  74 192  89 240 378], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 

#### Setting sets with percentage of time series in dataset

- Sets time series in set with percentage of time series in dataset.
- Each set will contain unique time series.
- Percentage must be greater than 0.
- Total sum of set percentages must be smaller or equal to 1.0.
- Is affected by `random_state`.
    - When `random_state` is set, sets will contain same time series.

In [9]:
config = SeriesBasedConfig(time_period=0.5, train_ts=0.5, val_ts=0.2, test_ts=0.1, random_state=None, nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:30,384][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:30,390][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-14 18:37:30,391][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 274/274 [00:00<00:00, 1396.70it/s]
[2025-11-14 18:37:30,607][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 109/109 [00:00<00:00, 1280.10it/s]
[2025-11-14 18:37:30,704][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 54/54 [00:00<00:00, 1284.50it/s]
[2025-11-14 18:37:30,752][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:30,752][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [421 230 461 116 309 ...   4 393  34 219 436], Length=274
        Val time series IDS: [484  89 115 218 135 ... 251  63 176 201 226], Length=109
        Test time series IDS [417  97 390  99 362 ... 143 327 237 158 170], Length=54
        All time series IDS [421 230 461 116 309 ... 143 327 237 158 170], Length=437
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0

#### Setting sets with specific time series indices

- Each set must have unique time series

In [10]:
config = SeriesBasedConfig(time_period=0.5, train_ts=[0,1,2,3,4], val_ts=[5,6,7,8,9], test_ts=[10,11,12,13,14], nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:30,765][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:30,771][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-14 18:37:30,771][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 5/5 [00:00<00:00, 1250.39it/s]
[2025-11-14 18:37:30,787][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 5/5 [00:00<00:00, 1427.51it/s]
[2025-11-14 18:37:30,794][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 5/5 [00:00<00:00, 1250.17it/s]
[2025-11-14 18:37:30,800][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:30,800][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [0 1 2 3 4], Length=5
        Val time series IDS: [5 6 7 8 9], Length=5
        Test time series IDS [10 11 12 13 14], Length=5
        All time series IDS [0 1 2 3 4 ... 10 11 12 13 14], Length=15
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.

### Selecting features

- Affects which features will be returned when loading data.
- Setting `include_time` as True will add time to features that return when loading data.
- Setting `include_ts_id` as True will add time series id to features that return when loading data.

#### Setting features to take as "all"

In [11]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take="all")
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:30,806][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:30,812][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-14 18:37:30,812][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 54/54 [00:00<00:00, 1300.18it/s]
[2025-11-14 18:37:30,865][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 25/25 [00:00<00:00, 1105.80it/s]
[2025-11-14 18:37:30,895][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 10/10 [00:00<00:00, 1175.04it/s]
[2025-11-14 18:37:30,908][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:30,908][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [435 482 162 475  64 ... 472  30 347 322 121], Length=54
        Val time series IDS: [279 335 359 276 200 ...  58 538 273 403 292], Length=25
        Test time series IDS [  2  83 493 303 297 285 170 220 507 439], Length=10
        All time series IDS [435 482 162 475  64 ... 285 170 220 507 439], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 

#### Setting features via list

In [12]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take=["n_flows", "n_packets"])
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:30,916][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:30,922][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-14 18:37:30,923][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 54/54 [00:00<00:00, 1254.37it/s]
[2025-11-14 18:37:30,977][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 25/25 [00:00<00:00, 1386.88it/s]
[2025-11-14 18:37:31,002][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 10/10 [00:00<00:00, 1243.35it/s]
[2025-11-14 18:37:31,013][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:31,013][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [174 290 461  84 464 ... 189 183 332 369 107], Length=54
        Val time series IDS: [484 393 253 436 238 ...  79 156 298 382 524], Length=25
        Test time series IDS [127 214 317 207 489 385 227 434 202 500], Length=10
        All time series IDS [174 290 461  84 464 ... 385 227 434 202 500], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: NoFiller
    Transformers
        Transformer type: NoTransformer
    Anomaly handler
        Anomaly handler type (train set): NoAnomalyHandler   
    Batch sizes
        Train batch size: 32
   

#### Including time and time series id

In [13]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take=["n_flows", "n_packets"], include_time=True, include_ts_id=True, time_format=TimeFormat.ID_TIME)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:31,020][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:31,026][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-14 18:37:31,027][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 54/54 [00:00<00:00, 1520.39it/s]
[2025-11-14 18:37:31,074][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 25/25 [00:00<00:00, 1387.68it/s]
[2025-11-14 18:37:31,099][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 10/10 [00:00<00:00, 1396.19it/s]
[2025-11-14 18:37:31,107][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:31,108][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [261 428 436 217 525 ... 514 515 494 346 141], Length=54
        Val time series IDS: [526 537 200 387 240 ... 455 512 314  19 232], Length=25
        Test time series IDS [407 225 393 115 460 378 532 379 355 474], Length=10
        All time series IDS [261 428 436 217 525 ... 378 532 379 355 474], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: NoFiller
    Transformers
        Transformer type: NoTransformer
    Anomaly handler
        Anomaly handler type (train set): NoAnomalyHandler   
    Batch sizes
        Train batch size: 32
   

### Selecting all set

#### All set when other sets are None

- All set will contain all time series from dataset.

In [14]:
config = SeriesBasedConfig(time_period=0.5, train_ts=None, val_ts=None, test_ts=None)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:31,116][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:31,117][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-11-14 18:37:31,125][cesnet_dataset][INFO] - Updating config for all set.
100%|██████████| 548/548 [00:00<00:00, 1606.63it/s]
[2025-11-14 18:37:31,486][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:31,487][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### All set when at least one other set is not None

- All set will contain all time series that were set by other sets.

In [15]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-14 18:37:31,496][series_config][INFO] - Quick validation succeeded.
[2025-11-14 18:37:31,502][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-14 18:37:31,503][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 54/54 [00:00<00:00, 1173.10it/s]
[2025-11-14 18:37:31,560][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 25/25 [00:00<00:00, 1189.71it/s]
[2025-11-14 18:37:31,587][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 10/10 [00:00<00:00, 1176.26it/s]
[2025-11-14 18:37:31,599][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-14 18:37:31,599][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [333 264  78 442 166 ... 374  37 124 327 483], Length=54
        Val time series IDS: [429 171 410 449 260 ...  64 345 435 143 350], Length=25
        Test time series IDS [200 423 546 472 488 323 349  42 253 426], Length=10
        All time series IDS [333 264  78 442 166 ... 323 349  42 253 426], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 