# Choosing data for SeriesBasedCesnetDataset

### Import

In [1]:
import logging
from datetime import datetime

from cesnet_tszoo.utils.enums import AgreggationType, SourceType, TimeFormat, DatasetType
from cesnet_tszoo.datasets import CESNET_TimeSeries24
from cesnet_tszoo.configs import SeriesBasedConfig # Series based dataset MUST use SeriesBasedConfig

### Setting logger

In [2]:
logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s][%(name)s][%(levelname)s] - %(message)s")

### Preparing dataset

In [3]:
series_based_dataset = CESNET_TimeSeries24.get_dataset(data_root="/some_directory/", source_type=SourceType.INSTITUTION_SUBNETS, aggregation=AgreggationType.AGG_1_HOUR, dataset_type=DatasetType.SERIES_BASED, display_details=True)

[2025-11-28 18:01:46,430][cesnet_dataset][INFO] - Dataset is series-based. Use cesnet_tszoo.configs.SeriesBasedConfig



Dataset details:

    AgreggationType.AGG_1_HOUR
        Time indices: range(0, 6717)
        Datetime: (datetime.datetime(2023, 10, 9, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2024, 7, 14, 21, 0, tzinfo=datetime.timezone.utc))

    SourceType.INSTITUTION_SUBNETS
        Time series indices: [0 1 2 3 4 ... 543 544 545 546 547], Length=548; use 'get_available_ts_indices' for full list
        Features with default values: {'n_flows': 0, 'n_packets': 0, 'n_bytes': 0, 'tcp_udp_ratio_packets': 0.5, 'tcp_udp_ratio_bytes': 0.5, 'dir_ratio_packets': 0.5, 'dir_ratio_bytes': 0.5, 'avg_duration': 0, 'avg_ttl': 0, 'sum_n_dest_asn': 0, 'avg_n_dest_asn': 0, 'std_n_dest_asn': 0, 'sum_n_dest_ports': 0, 'avg_n_dest_ports': 0, 'std_n_dest_ports': 0, 'sum_n_dest_ip': 0, 'avg_n_dest_ip': 0, 'std_n_dest_ip': 0}
        
        Additional data: ['ids_relationship', 'weekends_and_holidays']
        


### Selecting time period

- `time_period` sets time period for all sets (used time series).

#### Setting time period as "all"

- Sets time period for time series as a whole time period from dataset.

In [4]:
config = SeriesBasedConfig(time_period="all")
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:46,435][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:46,437][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-11-28 18:01:46,448][cesnet_dataset][INFO] - Updating config for all set.
100%|██████████| 548/548 [00:00<00:00, 794.45it/s]
[2025-11-28 18:01:47,158][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:47,158][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 6718)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### Setting time period with time indices

- Sets time period for time series as range of time indices.

In [5]:
config = SeriesBasedConfig(time_period=range(0, 2000))
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:47,173][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:47,174][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-11-28 18:01:47,185][cesnet_dataset][INFO] - Updating config for all set.
100%|██████████| 548/548 [00:00<00:00, 1924.31it/s]
[2025-11-28 18:01:47,490][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:47,490][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 2000)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### Setting time period with datetime

- Sets time period for time series with tuple of datetime objects.
- Datetime objects are expected to be of UTC.

In [6]:
config = SeriesBasedConfig(time_period=(datetime(2023, 10, 9, 0), datetime(2023, 11, 9, 23)))
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:47,501][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:47,503][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-11-28 18:01:47,508][cesnet_dataset][INFO] - Updating config for all set.
100%|██████████| 548/548 [00:00<00:00, 2246.63it/s]
[2025-11-28 18:01:47,773][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:47,774][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 767)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: N

#### Setting time period with percentage

- Sets time period for time series as a percentage of whole time period from dataset.
- Always starts from first time.
- Must be: 0 < `time_period` <= 1.

In [7]:
config = SeriesBasedConfig(time_period=0.5)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:47,833][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:47,835][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-11-28 18:01:47,844][cesnet_dataset][INFO] - Updating config for all set.
100%|██████████| 548/548 [00:00<00:00, 1544.30it/s]
[2025-11-28 18:01:48,219][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:48,219][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

### Creating train/val/test sets

- Sets how many time series will be in each set.
- You can leave any set value set as None.
- Can use `nan_threshold` to set how many nan values will be tolerated.
    - `nan_threshold` = 1.0, means that time series can be completely empty.
    - is applied after sets.

#### Setting sets with count of time series

- Sets time series in set with count.
- Each set will contain unique time series.
- Count must be greater than zero.
- Total sum of time series in sets must be smaller than number of time series in dataset.
- Is affected by `random_state`.
    - When `random_state` is set, sets will contain same time series.

In [8]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, random_state=None, nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:48,228][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:48,235][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:01:48,236][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 54/54 [00:00<00:00, 1269.38it/s]
[2025-11-28 18:01:48,290][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 25/25 [00:00<00:00, 1202.55it/s]
[2025-11-28 18:01:48,317][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 10/10 [00:00<00:00, 1175.83it/s]
[2025-11-28 18:01:48,328][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:48,328][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [210 172  48  83 264 ... 512  49   8 196  29], Length=54
        Val time series IDS: [159 333  87 224 457 ... 372 205 491 446 347], Length=25
        Test time series IDS [410  12 478 212  30 398 281 161 307 349], Length=10
        All time series IDS [210 172  48  83 264 ... 398 281 161 307 349], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 

#### Setting sets with percentage of time series in dataset

- Sets time series in set with percentage of time series in dataset.
- Each set will contain unique time series.
- Percentage must be greater than 0.
- Total sum of set percentages must be smaller or equal to 1.0.
- Is affected by `random_state`.
    - When `random_state` is set, sets will contain same time series.

In [9]:
config = SeriesBasedConfig(time_period=0.5, train_ts=0.5, val_ts=0.2, test_ts=0.1, random_state=None, nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:48,336][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:48,343][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:01:48,344][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 274/274 [00:00<00:00, 1440.58it/s]
[2025-11-28 18:01:48,555][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 109/109 [00:00<00:00, 1335.50it/s]
[2025-11-28 18:01:48,647][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 54/54 [00:00<00:00, 1077.07it/s]
[2025-11-28 18:01:48,702][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:48,702][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [547 449  25 187 348 ... 224 249 493  91 207], Length=274
        Val time series IDS: [ 13 121 519 258 255 ... 110 368 298 245 342], Length=109
        Test time series IDS [372  92 331  10 428 ... 109 235 545 400  82], Length=54
        All time series IDS [547 449  25 187 348 ... 109 235 545 400  82], Length=437
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0

#### Setting sets with specific time series indices

- Each set must have unique time series

In [10]:
config = SeriesBasedConfig(time_period=0.5, train_ts=[0,1,2,3,4], val_ts=[5,6,7,8,9], test_ts=[10,11,12,13,14], nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:48,718][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:48,723][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:01:48,724][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 5/5 [00:00<00:00, 1109.72it/s]
[2025-11-28 18:01:48,738][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 5/5 [00:00<00:00, 1250.54it/s]
[2025-11-28 18:01:48,747][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 5/5 [00:00<00:00, 1250.69it/s]
[2025-11-28 18:01:48,753][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:48,753][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [0 1 2 3 4], Length=5
        Val time series IDS: [5 6 7 8 9], Length=5
        Test time series IDS [10 11 12 13 14], Length=5
        All time series IDS [0 1 2 3 4 ... 10 11 12 13 14], Length=15
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.

### Selecting features

- Affects which features will be returned when loading data.
- Setting `include_time` as True will add time to features that return when loading data.
- Setting `include_ts_id` as True will add time series id to features that return when loading data.

#### Setting features to take as "all"

In [11]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take="all")
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:48,758][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:48,764][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:01:48,765][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 54/54 [00:00<00:00, 1207.15it/s]
[2025-11-28 18:01:48,821][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 25/25 [00:00<00:00, 1135.63it/s]
[2025-11-28 18:01:48,850][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 10/10 [00:00<00:00, 1175.90it/s]
[2025-11-28 18:01:48,860][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:48,861][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [371 434 441 448 259 ... 130 303 363 191 218], Length=54
        Val time series IDS: [374 104 425 296  50 ... 297 542  37 106 131], Length=25
        Test time series IDS [547 156 184 401 173   5 360  55  59  18], Length=10
        All time series IDS [371 434 441 448 259 ...   5 360  55  59  18], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 

#### Setting features via list

In [12]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take=["n_flows", "n_packets"])
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:48,869][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:48,875][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:01:48,876][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 54/54 [00:00<00:00, 1511.87it/s]
[2025-11-28 18:01:48,923][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 25/25 [00:00<00:00, 1315.11it/s]
[2025-11-28 18:01:48,948][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 10/10 [00:00<00:00, 1332.33it/s]
[2025-11-28 18:01:48,959][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:48,959][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [ 95  71 508 256 471 ... 255  82  19 320 314], Length=54
        Val time series IDS: [442 515 229 175 293 ... 113 449 425  63 134], Length=25
        Test time series IDS [496 297 153   3 201 196  49  35 514 504], Length=10
        All time series IDS [ 95  71 508 256 471 ... 196  49  35 514 504], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: NoFiller
    Transformers
        Transformer type: NoTransformer
    Anomaly handler
        Anomaly handler type (train set): NoAnomalyHandler   
    Batch sizes
        Train batch size: 32
   

#### Including time and time series id

In [13]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take=["n_flows", "n_packets"], include_time=True, include_ts_id=True, time_format=TimeFormat.ID_TIME)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:48,966][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:48,973][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:01:48,973][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 54/54 [00:00<00:00, 1348.84it/s]
[2025-11-28 18:01:49,026][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 25/25 [00:00<00:00, 1245.15it/s]
[2025-11-28 18:01:49,053][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 10/10 [00:00<00:00, 1332.58it/s]
[2025-11-28 18:01:49,064][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:49,065][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [285 205  69 339  22 ... 102 476 287  17 163], Length=54
        Val time series IDS: [309 231 537 289 544 ... 261 363 503 192  75], Length=25
        Test time series IDS [294 161 390 179 128 431 337 426 229 540], Length=10
        All time series IDS [285 205  69 339  22 ... 431 337 426 229 540], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: NoFiller
    Transformers
        Transformer type: NoTransformer
    Anomaly handler
        Anomaly handler type (train set): NoAnomalyHandler   
    Batch sizes
        Train batch size: 32
   

### Selecting all set

#### All set when other sets are None

- All set will contain all time series from dataset.

In [14]:
config = SeriesBasedConfig(time_period=0.5, train_ts=None, val_ts=None, test_ts=None)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:49,072][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:49,074][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-11-28 18:01:49,085][cesnet_dataset][INFO] - Updating config for all set.
100%|██████████| 548/548 [00:00<00:00, 1623.52it/s]
[2025-11-28 18:01:49,440][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:49,440][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### All set when at least one other set is not None

- All set will contain all time series that were set by other sets.

In [15]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:01:49,450][series_config][INFO] - Quick validation succeeded.
[2025-11-28 18:01:49,457][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:01:49,457][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 54/54 [00:00<00:00, 1250.28it/s]
[2025-11-28 18:01:49,513][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 25/25 [00:00<00:00, 1211.76it/s]
[2025-11-28 18:01:49,540][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 10/10 [00:00<00:00, 1246.97it/s]
[2025-11-28 18:01:49,550][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:01:49,550][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [358  79 191 542  10 ... 141 268 432 363 178], Length=54
        Val time series IDS: [340  93 269 131 143 ... 507  37 453  34 386], Length=25
        Test time series IDS [354 411 525 479 247 229 456 445 461 538], Length=10
        All time series IDS [358  79 191 542  10 ... 229 456 445 461 538], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 