# Choosing data for SeriesBasedCesnetDataset

### Import

In [1]:
import logging
from datetime import datetime

from cesnet_tszoo.utils.enums import AgreggationType, SourceType, TimeFormat, DatasetType
from cesnet_tszoo.datasets import CESNET_TimeSeries24
from cesnet_tszoo.configs import SeriesBasedConfig # Series based dataset MUST use SeriesBasedConfig

### Setting logger

In [2]:
logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s][%(name)s][%(levelname)s] - %(message)s")

### Preparing dataset

In [3]:
series_based_dataset = CESNET_TimeSeries24.get_dataset(data_root="/some_directory/", source_type=SourceType.INSTITUTION_SUBNETS, aggregation=AgreggationType.AGG_1_HOUR, dataset_type=DatasetType.SERIES_BASED, display_details=True)

[2025-08-17 13:56:42,408][wrapper_dataset][INFO] - Dataset is series-based. Use cesnet_tszoo.configs.SeriesBasedConfig



Dataset details:

    AgreggationType.AGG_1_HOUR
        Time indices: range(0, 6717)
        Datetime: (datetime.datetime(2023, 10, 9, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2024, 7, 14, 21, 0, tzinfo=datetime.timezone.utc))

    SourceType.INSTITUTION_SUBNETS
        Time series indices: [0 1 2 3 4 ... 543 544 545 546 547], Length=548; use 'get_available_ts_indices' for full list
        Features with default values: {'n_flows': 0, 'n_packets': 0, 'n_bytes': 0, 'tcp_udp_ratio_packets': 0.5, 'tcp_udp_ratio_bytes': 0.5, 'dir_ratio_packets': 0.5, 'dir_ratio_bytes': 0.5, 'avg_duration': 0, 'avg_ttl': 0, 'sum_n_dest_asn': 0, 'avg_n_dest_asn': 0, 'std_n_dest_asn': 0, 'sum_n_dest_ports': 0, 'avg_n_dest_ports': 0, 'std_n_dest_ports': 0, 'sum_n_dest_ip': 0, 'avg_n_dest_ip': 0, 'std_n_dest_ip': 0}
        
        Additional data: ['ids_relationship', 'weekends_and_holidays']
        


### Selecting time period

- `time_period` sets time period for all sets (used time series).

#### Setting time period as "all"

- Sets time period for time series as a whole time period from dataset.

In [4]:
config = SeriesBasedConfig(time_period="all")
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:42,413][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:42,419][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-08-17 13:56:42,424][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:42,430][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 800.31it/s]
[2025-08-17 13:56:43,133][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:43,134][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 6718)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### Setting time period with time indices

- Sets time period for time series as range of time indices.

In [5]:
config = SeriesBasedConfig(time_period=range(0, 2000))
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:43,140][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:43,146][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-08-17 13:56:43,151][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:43,154][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 1978.46it/s]
[2025-08-17 13:56:43,450][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:43,451][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 2000)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### Setting time period with datetime

- Sets time period for time series with tuple of datetime objects.
- Datetime objects are expected to be of UTC.

In [6]:
config = SeriesBasedConfig(time_period=(datetime(2023, 10, 9, 0), datetime(2023, 11, 9, 23)))
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:43,456][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:43,463][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-08-17 13:56:43,464][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:43,467][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 2313.66it/s]
[2025-08-17 13:56:43,722][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:43,723][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 767)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: N

#### Setting time period with percentage

- Sets time period for time series as a percentage of whole time period from dataset.
- Always starts from first time.
- Must be: 0 < `time_period` <= 1.

In [7]:
config = SeriesBasedConfig(time_period=0.5)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:43,728][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:43,735][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-08-17 13:56:43,740][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:43,744][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 1647.30it/s]
[2025-08-17 13:56:44,095][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:44,096][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

### Creating train/val/test sets

- Sets how many time series will be in each set.
- You can leave any set value set as None.
- Can use `nan_threshold` to set how many nan values will be tolerated.
    - `nan_threshold` = 1.0, means that time series can be completely empty.
    - is applied after sets.

#### Setting sets with count of time series

- Sets time series in set with count.
- Each set will contain unique time series.
- Count must be greater than zero.
- Total sum of time series in sets must be smaller than number of time series in dataset.
- Is affected by `random_state`.
    - When `random_state` is set, sets will contain same time series.

In [8]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, random_state=None, nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:44,101][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:44,112][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:44,116][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1253.57it/s]
[2025-08-17 13:56:44,192][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:44,193][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [486 415 279 529 489 ... 306 412 346  81 330], Length=54
        Val time series IDS: [ 44  39 122 512  45 ... 160 136 296  50 228], Length=25
        Test time series IDS [451 255 376 318 316 153 428 465 244 467], Length=10
        All time series IDS [486 415 279 529 489 ... 153 428 465 244 467], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 

#### Setting sets with percentage of time series in dataset

- Sets time series in set with percentage of time series in dataset.
- Each set will contain unique time series.
- Percentage must be greater than 0.
- Total sum of set percentages must be smaller or equal to 1.0.
- Is affected by `random_state`.
    - When `random_state` is set, sets will contain same time series.

In [9]:
config = SeriesBasedConfig(time_period=0.5, train_ts=0.5, val_ts=0.2, test_ts=0.1, random_state=None, nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:44,200][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:44,211][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:44,215][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 437/437 [00:00<00:00, 1542.74it/s]
[2025-08-17 13:56:44,515][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:44,516][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [501 444 221 505  52 ...  94 463 338 155 354], Length=274
        Val time series IDS: [378  18 125 405 344 ... 261 538  64  82 246], Length=109
        Test time series IDS [370 529 158 264 324 ... 230 184 134 116 156], Length=54
        All time series IDS [501 444 221 505  52 ... 230 184 134 116 156], Length=437
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0

#### Setting sets with specific time series indices

- Each set must have unique time series

In [10]:
config = SeriesBasedConfig(time_period=0.5, train_ts=[0,1,2,3,4], val_ts=[5,6,7,8,9], test_ts=[10,11,12,13,14], nan_threshold=1.0)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:44,521][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:44,531][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:44,534][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 15/15 [00:00<00:00, 1427.44it/s]
[2025-08-17 13:56:44,547][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:44,547][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [0 1 2 3 4], Length=5
        Val time series IDS: [5 6 7 8 9], Length=5
        Test time series IDS [10 11 12 13 14], Length=5
        All time series IDS [0 1 2 3 4 ... 10 11 12 13 14], Length=15
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.

### Selecting features

- Affects which features will be returned when loading data.
- Setting `include_time` as True will add time to features that return when loading data.
- Setting `include_ts_id` as True will add time series id to features that return when loading data.

#### Setting features to take as "all"

In [11]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take="all")
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:44,552][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:44,562][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:44,566][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1252.25it/s]
[2025-08-17 13:56:44,643][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:44,643][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [340  18 121 465 518 ... 476 160 166 504 381], Length=54
        Val time series IDS: [432 483 131  24 299 ... 213 353 529 358 460], Length=25
        Test time series IDS [ 94 275 311 437 401 110 356  22 291  17], Length=10
        All time series IDS [340  18 121 465 518 ... 110 356  22 291  17], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 

#### Setting features via list

In [12]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take=["n_flows", "n_packets"])
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:44,649][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:44,660][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:44,663][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1546.41it/s]
[2025-08-17 13:56:44,726][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:44,727][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [224 356 166  31 133 ... 241 128 503 300 192], Length=54
        Val time series IDS: [120 499 532 210 207 ... 187 418  19  52 355], Length=25
        Test time series IDS [ 72 451 290 216 189 219 135   2  68 497], Length=10
        All time series IDS [224 356 166  31 133 ... 219 135   2  68 497], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: None
    Transformers
        Transformer type: None
    Batch sizes
        Train batch size: 32
        Val batch size: 64
        Test batch size: 128
        All batch size: 128
    Default wo

#### Including time and time series id

In [13]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10, features_to_take=["n_flows", "n_packets"], include_time=True, include_ts_id=True, time_format=TimeFormat.ID_TIME)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:44,732][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:44,743][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:44,747][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1411.61it/s]
[2025-08-17 13:56:44,817][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:44,817][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [198 410 329 537  22 ...  24 248 510 263 116], Length=54
        Val time series IDS: [211 350 392 311 541 ... 447 296 313 345 133], Length=25
        Test time series IDS [187  41 375 371  96  68 218  93 255 486], Length=10
        All time series IDS [198 410 329 537  22 ...  68 218  93 255 486], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: None
    Transformers
        Transformer type: None
    Batch sizes
        Train batch size: 32
        Val batch size: 64
        Test batch size: 128
        All batch size: 128
    Default wo

### Selecting all set

#### All set when other sets are None

- All set will contain all time series from dataset.

In [14]:
config = SeriesBasedConfig(time_period=0.5, train_ts=None, val_ts=None, test_ts=None)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:44,822][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:44,828][series_config][INFO] - Using all time series for all_ts because train_ts, val_ts, and test_ts are all set to None.
[2025-08-17 13:56:44,833][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:44,838][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 548/548 [00:00<00:00, 1584.50it/s]
[2025-08-17 13:56:45,201][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:45,202][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: None
        Val time series IDS: None
        Test time series IDS None
        All time series IDS [0 1 2 3 4 ... 543 544 545 546 547], Length=548
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Fillers         
        Filler type: 

#### All set when at least one other set is not None

- All set will contain all time series that were set by other sets.

In [15]:
config = SeriesBasedConfig(time_period=0.5, train_ts=54, val_ts=25, test_ts=10)
series_based_dataset.set_dataset_config_and_initialize(config, display_config_details=True, workers=0)

[2025-08-17 13:56:45,207][series_config][INFO] - Quick validation succeeded.
[2025-08-17 13:56:45,217][series_config][INFO] - Finalization and validation completed successfully.
[2025-08-17 13:56:45,221][cesnet_dataset][INFO] - Updating config on train/val/test/all and selected time period.
100%|██████████| 89/89 [00:00<00:00, 1236.26it/s]
[2025-08-17 13:56:45,301][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-08-17 13:56:45,302][cesnet_dataset][INFO] - Config initialized successfully.



Config Details:
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDS: [338 157 547  49 145 ...  75 421 193 336  82], Length=54
        Val time series IDS: [ 45 143 380 290  59 ... 317  86 425  83 136], Length=25
        Test time series IDS [194 205 295 132 277 252 387 289 432 249], Length=10
        All time series IDS [338 157 547  49 145 ... 252 387 289 432 249], Length=89
    Time periods
        Time period: range(0, 3359)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 