# Choosing data for SeriesBasedCesnetDataset

### Import

In [1]:
import logging
from datetime import datetime

from cesnet_tszoo.utils.enums import AgreggationType, SourceType, TimeFormat, DatasetType
from cesnet_tszoo.datasets import CESNET_TimeSeries24
from cesnet_tszoo.configs import SeriesBasedConfig # Series based dataset MUST use SeriesBasedConfig

### Setting logger

In [2]:
logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s][%(name)s][%(levelname)s] - %(message)s")

### Preparing dataset

In [3]:
series_based_dataset = CESNET_TimeSeries24.get_dataset(data_root="/some_directory/", source_type=SourceType.INSTITUTION_SUBNETS, aggregation=AgreggationType.AGG_1_HOUR, dataset_type=DatasetType.SERIES_BASED, display_details=True)

[2025-08-26 20:06:44,462][wrapper_dataset][INFO] - Dataset is series-based. Use cesnet_tszoo.configs.SeriesBasedConfig



Dataset details:

    AgreggationType.AGG_1_HOUR
        Time indices: range(0, 6717)
        Datetime: (datetime.datetime(2023, 10, 9, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2024, 7, 14, 21, 0, tzinfo=datetime.timezone.utc))

    SourceType.INSTITUTION_SUBNETS
        Time series indices: [0 1 2 3 4 ... 543 544 545 546 547], Length=548; use 'get_available_ts_indices' for full list
        Features with default values: {'n_flows': 0, 'n_packets': 0, 'n_bytes': 0, 'tcp_udp_ratio_packets': 0.5, 'tcp_udp_ratio_bytes': 0.5, 'dir_ratio_packets': 0.5, 'dir_ratio_bytes': 0.5, 'avg_duration': 0, 'avg_ttl': 0, 'sum_n_dest_asn': 0, 'avg_n_dest_asn': 0, 'std_n_dest_asn': 0, 'sum_n_dest_ports': 0, 'avg_n_dest_ports': 0, 'std_n_dest_ports': 0, 'sum_n_dest_ip': 0, 'avg_n_dest_ip': 0, 'std_n_dest_ip': 0}
        
        Additional data: ['ids_relationship', 'weekends_and_holidays']
        


### Selecting time period

- `time_period` sets time period for all sets (used time series).

#### Setting time period as "all"

- Sets time period for time series as a whole time period from dataset.

In [4]:
config = SeriesBasedConfig(time_period="all")
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:44,468][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:44,475][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-08-26 20:06:44,482][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:44,486][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 634.41it/s]
[2025-08-26 20:06:45,369][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:45,369][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 6718)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### Setting time period with time indices

- Sets time period for time series as range of time indices.

In [5]:
config = SeriesBasedConfig(time_period=range(0, 2000))
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:45,375][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:45,381][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-08-26 20:06:45,386][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:45,390][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 1713.26it/s]
[2025-08-26 20:06:45,729][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:45,730][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 2000)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### Setting time period with datetime

- Sets time period for time series with tuple of datetime objects.
- Datetime objects are expected to be of UTC.

In [6]:
config = SeriesBasedConfig(time_period=(datetime(2023, 10, 9, 0), datetime(2023, 11, 9, 23)))
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:45,734][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:45,741][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-08-26 20:06:45,742][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:45,745][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 1886.14it/s]
[2025-08-26 20:06:46,056][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:46,057][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 767)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: N

#### Setting time period with percentage

- Sets time period for time series as a percentage of whole time period from dataset.
- Always starts from first time.
- Must be: 0 < `time_period` <= 1.

In [7]:
config = SeriesBasedConfig(time_period=0.5)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:46,062][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:46,067][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-08-26 20:06:46,072][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:46,077][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 1410.95it/s]
[2025-08-26 20:06:46,485][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:46,486][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

### Creating train/val/test sets

- Sets how many time series will be in each set.
- You can leave any set value set as None.
- Can use `nan_threshold` to set how many nan values will be tolerated.
    - `nan_threshold` = 1.0, means that time series can be completely empty.
    - is applied after sets.

#### Setting sets with count of time series

- Sets time series in set with count.
- Each set will contain unique time series.
- Count must be greater than zero.
- Total sum of time series in sets must be smaller than number of time series in dataset.
- Is affected by `random_state`.
    - When `random_state` is set, sets will contain same time series.

In [8]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, random_state=None, nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:46,491][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:46,502][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:46,507][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1217.79it/s]
[2025-08-26 20:06:46,586][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:46,586][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [429  42 395  79 199 ... 222 202 116  38 430], Length=54
        Val time series IDS: [276  99 285 108 251 ... 230 408 316 280 312], Length=25
        Test time series IDS [216  55 433   1 166 442 117 147 338 493], Length=10
        All time series IDS [429  42 395  79 199 ... 442 117 147 338 493], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 

#### Setting sets with percentage of time series in dataset

- Sets time series in set with percentage of time series in dataset.
- Each set will contain unique time series.
- Percentage must be greater than 0.
- Total sum of set percentages must be smaller or equal to 1.0.
- Is affected by `random_state`.
    - When `random_state` is set, sets will contain same time series.

In [9]:
config = SeriesBasedConfig(time_period=0.5, train_ts=0.5, val_ts=0.2, test_ts=0.1, random_state=None, nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:46,591][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:46,602][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:46,606][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 437/437 [00:00<00:00, 1368.29it/s]
[2025-08-26 20:06:46,943][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:46,944][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [440 272  79   8 245 ...  36 280 427 335 189], Length=274
        Val time series IDS: [520  41 453  59 435 ... 174 273 145 282 152], Length=109
        Test time series IDS [445 362 509 418 375 ... 491 113 271 199 484], Length=54
        All time series IDS [440 272  79   8 245 ... 491 113 271 199 484], Length=437
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0

#### Setting sets with specific time series indices

- Each set must have unique time series

In [10]:
config = SeriesBasedConfig(time_period=0.5, train_ts=[0,1,2,3,4], val_ts=[5,6,7,8,9], test_ts=[10,11,12,13,14], nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:46,949][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:46,960][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:46,964][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 15/15 [00:00<00:00, 1497.64it/s]
[2025-08-26 20:06:46,976][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:46,977][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [0 1 2 3 4], Length=5
        Val time series IDS: [5 6 7 8 9], Length=5
        Test time series IDS [10 11 12 13 14], Length=5
        All time series IDS [0 1 2 3 4 ... 10 11 12 13 14], Length=15
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.

### Selecting features

- Affects which features will be returned when loading data.
- Setting `include_time` as True will add time to features that return when loading data.
- Setting `include_ts_id` as True will add time series id to features that return when loading data.

#### Setting features to take as "all"

In [11]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take="all")
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:46,981][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:46,991][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:46,995][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1181.08it/s]
[2025-08-26 20:06:47,080][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:47,080][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [265  32 498 268 472 ... 322 325 342  42 153], Length=54
        Val time series IDS: [116 220 327 463  56 ...  67  99 466 306 467], Length=25
        Test time series IDS [ 90 412 276  86 449 353 393 243 380 369], Length=10
        All time series IDS [265  32 498 268 472 ... 353 393 243 380 369], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 

#### Setting features via list

In [12]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take=["n_flows", "n_packets"])
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:47,086][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:47,097][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:47,101][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1256.06it/s]
[2025-08-26 20:06:47,180][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:47,181][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [270 403 111 370 267 ... 128 455 213 441 524], Length=54
        Val time series IDS: [253 122 510 413 527 ... 104 531  92  10 540], Length=25
        Test time series IDS [396  45 146 151 293  73 320 451 534 464], Length=10
        All time series IDS [270 403 111 370 267 ...  73 320 451 534 464], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: None
    Transformers
        Transformer type: None
    Anomaly handler
        Anomaly handler type (train set): None   
    Batch sizes
        Train batch size: 32
        Val batch size: 64
 

#### Including time and time series id

In [13]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take=["n_flows", "n_packets"], include_time=True, include_ts_id=True, time_format=TimeFormat.ID_TIME)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:47,186][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:47,196][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:47,200][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1245.46it/s]
[2025-08-26 20:06:47,280][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:47,280][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [  4 342 325 101 299 ... 291 362 154 478 432], Length=54
        Val time series IDS: [521  75  31 151 128 ... 372  52 449  19 150], Length=25
        Test time series IDS [206 265  43  73 241 192 421 288  97 267], Length=10
        All time series IDS [  4 342 325 101 299 ... 192 421 288  97 267], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: None
    Transformers
        Transformer type: None
    Anomaly handler
        Anomaly handler type (train set): None   
    Batch sizes
        Train batch size: 32
        Val batch size: 64
 

### Selecting all set

#### All set when other sets are None

- All set will contain all time series from dataset.

In [14]:
config = SeriesBasedConfig(time_period=0.5, train_ts=None, val_ts=None, test_ts=None)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:47,290][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:47,298][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-08-26 20:06:47,304][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:47,311][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 1399.04it/s]
[2025-08-26 20:06:47,721][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:47,722][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### All set when at least one other set is not None

- All set will contain all time series that were set by other sets.

In [15]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-26 20:06:47,726][series_config][INFO] - Quick validation succeeded.
[2025-08-26 20:06:47,738][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-26 20:06:47,741][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1199.43it/s]
[2025-08-26 20:06:47,822][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-26 20:06:47,823][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [392 388 308 275 500 ... 220   7 433  85  27], Length=54
        Val time series IDS: [398 479 306 130 266 ... 519 221 454  83 522], Length=25
        Test time series IDS [238 508 334 272 230 469  94 401  39 430], Length=10
        All time series IDS [392 388 308 275 500 ... 469  94 401  39 430], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 