# Choosing data for SeriesBasedCesnetDataset

### Import

In [1]:
import logging
from datetime import datetime

from cesnet_tszoo.utils.enums import AgreggationType, SourceType, TimeFormat, DatasetType
from cesnet_tszoo.datasets import CESNET_TimeSeries24
from cesnet_tszoo.configs import SeriesBasedConfig # Series based dataset MUST use SeriesBasedConfig

### Setting logger

In [2]:
logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s][%(name)s][%(levelname)s] - %(message)s")

### Preparing dataset

In [3]:
series_based_dataset = CESNET_TimeSeries24.get_dataset(data_root="/some_directory/", source_type=SourceType.INSTITUTION_SUBNETS, aggregation=AgreggationType.AGG_1_HOUR, dataset_type=DatasetType.SERIES_BASED, display_details=True)

[2025-09-15 11:33:30,437][wrapper_dataset][INFO] - Dataset is series-based. Use cesnet_tszoo.configs.SeriesBasedConfig



Dataset details:

    AgreggationType.AGG_1_HOUR
        Time indices: range(0, 6717)
        Datetime: (datetime.datetime(2023, 10, 9, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2024, 7, 14, 21, 0, tzinfo=datetime.timezone.utc))

    SourceType.INSTITUTION_SUBNETS
        Time series indices: [0 1 2 3 4 ... 543 544 545 546 547], Length=548; use 'get_available_ts_indices' for full list
        Features with default values: {'n_flows': 0, 'n_packets': 0, 'n_bytes': 0, 'tcp_udp_ratio_packets': 0.5, 'tcp_udp_ratio_bytes': 0.5, 'dir_ratio_packets': 0.5, 'dir_ratio_bytes': 0.5, 'avg_duration': 0, 'avg_ttl': 0, 'sum_n_dest_asn': 0, 'avg_n_dest_asn': 0, 'std_n_dest_asn': 0, 'sum_n_dest_ports': 0, 'avg_n_dest_ports': 0, 'std_n_dest_ports': 0, 'sum_n_dest_ip': 0, 'avg_n_dest_ip': 0, 'std_n_dest_ip': 0}
        
        Additional data: ['ids_relationship', 'weekends_and_holidays']
        


### Selecting time period

- `time_period` sets time period for all sets (used time series).

#### Setting time period as "all"

- Sets time period for time series as a whole time period from dataset.

In [4]:
config = SeriesBasedConfig(time_period="all")
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:30,443][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:30,449][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-09-15 11:33:30,455][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:30,461][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 738.94it/s]
[2025-09-15 11:33:31,223][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:31,223][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 6718)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### Setting time period with time indices

- Sets time period for time series as range of time indices.

In [5]:
config = SeriesBasedConfig(time_period=range(0, 2000))
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:31,230][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:31,236][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-09-15 11:33:31,242][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:31,249][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 1602.86it/s]
[2025-09-15 11:33:31,611][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:31,612][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 2000)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### Setting time period with datetime

- Sets time period for time series with tuple of datetime objects.
- Datetime objects are expected to be of UTC.

In [6]:
config = SeriesBasedConfig(time_period=(datetime(2023, 10, 9, 0), datetime(2023, 11, 9, 23)))
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:31,618][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:31,627][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-09-15 11:33:31,628][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:31,633][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 2216.41it/s]
[2025-09-15 11:33:31,899][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:31,900][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 767)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: n

#### Setting time period with percentage

- Sets time period for time series as a percentage of whole time period from dataset.
- Always starts from first time.
- Must be: 0 < `time_period` <= 1.

In [7]:
config = SeriesBasedConfig(time_period=0.5)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:31,906][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:31,913][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-09-15 11:33:31,918][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:31,924][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 1506.84it/s]
[2025-09-15 11:33:32,308][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:32,308][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

### Creating train/val/test sets

- Sets how many time series will be in each set.
- You can leave any set value set as None.
- Can use `nan_threshold` to set how many nan values will be tolerated.
    - `nan_threshold` = 1.0, means that time series can be completely empty.
    - is applied after sets.

#### Setting sets with count of time series

- Sets time series in set with count.
- Each set will contain unique time series.
- Count must be greater than zero.
- Total sum of time series in sets must be smaller than number of time series in dataset.
- Is affected by `random_state`.
    - When `random_state` is set, sets will contain same time series.

In [8]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, random_state=None, nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:32,314][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:32,326][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:32,384][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1136.63it/s]
[2025-09-15 11:33:32,470][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:32,470][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [365 328 363 271 317 ... 348 361 229 156 444], Length=54
        Val time series IDS: [  7 504 264 412  75 ... 152 385 241 534 366], Length=25
        Test time series IDS [ 29  46 143  67 505  50 112 432 493 209], Length=10
        All time series IDS [365 328 363 271 317 ...  50 112 432 493 209], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 

#### Setting sets with percentage of time series in dataset

- Sets time series in set with percentage of time series in dataset.
- Each set will contain unique time series.
- Percentage must be greater than 0.
- Total sum of set percentages must be smaller or equal to 1.0.
- Is affected by `random_state`.
    - When `random_state` is set, sets will contain same time series.

In [9]:
config = SeriesBasedConfig(time_period=0.5, train_ts=0.5, val_ts=0.2, test_ts=0.1, random_state=None, nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:32,476][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:32,487][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:32,492][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 437/437 [00:00<00:00, 1380.31it/s]
[2025-09-15 11:33:32,828][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:32,829][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [345 367 543 480 438 ... 213 471 536 374 254], Length=274
        Val time series IDS: [ 64 278 400  48 322 ... 309 525 283 123 233], Length=109
        Test time series IDS [ 27 258  97  71  56 ... 311   8 524 538  16], Length=54
        All time series IDS [345 367 543 480 438 ... 311   8 524 538  16], Length=437
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0

#### Setting sets with specific time series indices

- Each set must have unique time series

In [10]:
config = SeriesBasedConfig(time_period=0.5, train_ts=[0,1,2,3,4], val_ts=[5,6,7,8,9], test_ts=[10,11,12,13,14], nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:32,837][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:32,849][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:32,853][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 15/15 [00:00<00:00, 1426.73it/s]
[2025-09-15 11:33:32,867][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:32,868][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [0 1 2 3 4], Length=5
        Val time series IDS: [5 6 7 8 9], Length=5
        Test time series IDS [10 11 12 13 14], Length=5
        All time series IDS [0 1 2 3 4 ... 10 11 12 13 14], Length=15
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.

### Selecting features

- Affects which features will be returned when loading data.
- Setting `include_time` as True will add time to features that return when loading data.
- Setting `include_ts_id` as True will add time series id to features that return when loading data.

#### Setting features to take as "all"

In [11]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take="all")
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:32,872][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:32,883][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:32,888][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1163.41it/s]
[2025-09-15 11:33:32,972][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:32,973][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [192 222 433  39 230 ... 474 539 438 347 143], Length=54
        Val time series IDS: [430 101 521 152 388 ... 416 218  34 354 387], Length=25
        Test time series IDS [112 392 125 401 371  94  85 286 452 169], Length=10
        All time series IDS [192 222 433  39 230 ...  94  85 286 452 169], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 

#### Setting features via list

In [12]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take=["n_flows", "n_packets"])
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:32,979][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:32,989][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:32,995][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1381.63it/s]
[2025-09-15 11:33:33,065][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:33,066][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [ 22  56 477 265 308 ...  50  11 518  53 500], Length=54
        Val time series IDS: [540 156 350 276  29 ... 111 173 448 206 188], Length=25
        Test time series IDS [107 128 109  41 451 394 326  54 177 212], Length=10
        All time series IDS [ 22  56 477 265 308 ... 394 326  54 177 212], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: no_filler
    Transformers
        Transformer type: no_transformer
    Anomaly handler
        Anomaly handler type (train set): no_anomaly_handler   
    Batch sizes
        Train batch size: 32

#### Including time and time series id

In [13]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take=["n_flows", "n_packets"], include_time=True, include_ts_id=True, time_format=TimeFormat.ID_TIME)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:33,072][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:33,082][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:33,086][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1351.27it/s]
[2025-09-15 11:33:33,159][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:33,159][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [392 352 249 491 535 ... 470 406  38 168  52], Length=54
        Val time series IDS: [106 358 504 103 299 ... 210  56  44 484  53], Length=25
        Test time series IDS [127  27 486 479 133 360 447 499 105  62], Length=10
        All time series IDS [392 352 249 491 535 ... 360 447 499 105  62], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: no_filler
    Transformers
        Transformer type: no_transformer
    Anomaly handler
        Anomaly handler type (train set): no_anomaly_handler   
    Batch sizes
        Train batch size: 32

### Selecting all set

#### All set when other sets are None

- All set will contain all time series from dataset.

In [14]:
config = SeriesBasedConfig(time_period=0.5, train_ts=None, val_ts=None, test_ts=None)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:33,166][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:33,172][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-09-15 11:33:33,178][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:33,183][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 1454.45it/s]
[2025-09-15 11:33:33,582][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:33,583][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### All set when at least one other set is not None

- All set will contain all time series that were set by other sets.

In [15]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-09-15 11:33:33,591][series_config][INFO] - Quick validation succeeded.
[2025-09-15 11:33:33,601][series_config][INFO] - Finalization and validation completed successfully.
[2025-09-15 11:33:33,606][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1174.27it/s]
[2025-09-15 11:33:33,689][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-09-15 11:33:33,689][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [427 240 231  76 346 ... 504  81 129 100 538], Length=54
        Val time series IDS: [375 343 285 230 413 ... 283 244 137  47 319], Length=25
        Test time series IDS [246 473 330 455  29 486 341 526  23 507], Length=10
        All time series IDS [427 240 231  76 346 ... 486 341 526  23 507], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 