# Choosing data for DisjointTimeBasedCesnetDataset

### Import

In [1]:
import logging
from datetime import datetime

from cesnet_tszoo.utils.enums import AgreggationType, SourceType, TimeFormat, DatasetType
from cesnet_tszoo.datasets import CESNET_TimeSeries24
from cesnet_tszoo.configs import DisjointTimeBasedConfig # Disjoint dataset MUST use DisjointTimeBasedConfig

### Setting logger

In [2]:
logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s][%(name)s][%(levelname)s] - %(message)s")

### Preparing dataset

In [3]:
disjoint_dataset = CESNET_TimeSeries24.get_dataset(data_root="/some_directory/", source_type=SourceType.INSTITUTION_SUBNETS, aggregation=AgreggationType.AGG_1_HOUR, dataset_type=DatasetType.DISJOINT_TIME_BASED, display_details=True)

[2025-11-28 18:00:24,436][cesnet_dataset][INFO] - Dataset is disjoint_time_based. Use cesnet_tszoo.configs.DisjointTimeBasedConfig



Dataset details:

    AgreggationType.AGG_1_HOUR
        Time indices: range(0, 6717)
        Datetime: (datetime.datetime(2023, 10, 9, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2024, 7, 14, 21, 0, tzinfo=datetime.timezone.utc))

    SourceType.INSTITUTION_SUBNETS
        Time series indices: [0 1 2 3 4 ... 543 544 545 546 547], Length=548; use 'get_available_ts_indices' for full list
        Features with default values: {'n_flows': 0, 'n_packets': 0, 'n_bytes': 0, 'tcp_udp_ratio_packets': 0.5, 'tcp_udp_ratio_bytes': 0.5, 'dir_ratio_packets': 0.5, 'dir_ratio_bytes': 0.5, 'avg_duration': 0, 'avg_ttl': 0, 'sum_n_dest_asn': 0, 'avg_n_dest_asn': 0, 'std_n_dest_asn': 0, 'sum_n_dest_ports': 0, 'avg_n_dest_ports': 0, 'std_n_dest_ports': 0, 'sum_n_dest_ip': 0, 'avg_n_dest_ip': 0, 'std_n_dest_ip': 0}
        
        Additional data: ['ids_relationship', 'weekends_and_holidays']
        


### Selecting which time series to load for each set

- Sets time series that will be used for train/val/test/all sets

#### Setting time series with number

- Sets time series used in sets with number.
- Count must be greater than zero.
- Total sum of time series in `train_ts, val_ts, test_ts` must be smaller than number of time series in dataset.
- Is affected by `random_state`.
    - When `random_state` is set, `train_ts, val_ts, test_ts` will contain same time series on repeated tries and they will not be repeated across them.

In [4]:
config = DisjointTimeBasedConfig(train_ts=100, val_ts=50, test_ts=20, train_time_period=0.7, val_time_period=0.2, test_time_period=0.1, random_state = 111)
disjoint_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:00:24,442][disjoint_time_based_config][INFO] - Quick validation succeeded.
[2025-11-28 18:00:24,456][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:00:24,457][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 100/100 [00:00<00:00, 1073.33it/s]
[2025-11-28 18:00:24,567][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 50/50 [00:00<00:00, 1350.30it/s]
[2025-11-28 18:00:24,613][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 20/20 [00:00<00:00, 1160.28it/s]
[2025-11-28 18:00:24,633][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:00:24,633][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDs: [302 520 513 387 543 ...   7 118 322 275  86], Length=100
        Val time series IDs: [245 145  33 541 399 ... 277 370 309 421 539], Length=50
        Test time series IDs: [247 482  30 252 256 ... 188 529 257 407 478], Length=20
    Time periods
        Train time periods: range(0, 4702)
        Val time periods: range(4702, 6045)
        Test time periods: range(6045, 6716)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  

#### Setting time series with percentage

- Sets time series used in sets with percentage of time series in dataset.
- Percentages must be greater than 0.
- Sum of percentages must be smaller than 1.0.
- Is affected by `random_state`.
    - When `random_state` is set, `train_ts, val_ts, test_ts` will contain same time series on repeated tries and they will not be repeated across them.

In [5]:
config = DisjointTimeBasedConfig(train_ts=0.5, val_ts=0.2, test_ts=0.1, train_time_period=0.7, val_time_period=0.2, test_time_period=0.1, random_state = 111)
disjoint_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:00:24,639][disjoint_time_based_config][INFO] - Quick validation succeeded.
[2025-11-28 18:00:24,655][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:00:24,655][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 274/274 [00:00<00:00, 1203.60it/s]
[2025-11-28 18:00:24,905][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 109/109 [00:00<00:00, 1261.12it/s]
[2025-11-28 18:00:25,009][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 54/54 [00:00<00:00, 1330.75it/s]
[2025-11-28 18:00:25,056][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:00:25,056][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDs: [253 277 132 114  76 ...   7 118 322 275  86], Length=274
        Val time series IDs: [434  61  38  53 522 ... 540  52 111 370 309], Length=109
        Test time series IDs: [339 133 457  97  18 ... 359 505 328 192 181], Length=54
    Time periods
        Train time periods: range(0, 4702)
        Val time periods: range(4702, 6045)
        Test time periods: range(6045, 6716)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 

#### Setting time series with specific indices

In [6]:
config = DisjointTimeBasedConfig(train_ts=[0], val_ts=[1], test_ts=[2], train_time_period=0.7, val_time_period=0.2, test_time_period=0.1, random_state = 111)
disjoint_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:00:25,064][disjoint_time_based_config][INFO] - Quick validation succeeded.
[2025-11-28 18:00:25,078][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:00:25,079][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 1/1 [00:00<00:00, 939.16it/s]
[2025-11-28 18:00:25,088][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 1/1 [00:00<00:00, 998.88it/s]
[2025-11-28 18:00:25,094][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 1/1 [00:00<00:00, 998.88it/s]
[2025-11-28 18:00:25,096][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:00:25,096][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDs: [0], Length=1
        Val time series IDs: [1], Length=1
        Test time series IDs: [2], Length=1
    Time periods
        Train time periods: range(0, 4702)
        Val time periods: range(4702, 6045)
        Test time periods: range(6045, 6716)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.5 0.5 0.  0. ]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_

### Selecting which time period to use for each set

- Sets time period for every set and their time series
- `train_time_period` is used for `train_ts`
- `val_time_period` is used for `val_ts`
- `test_time_period` is used for `test_ts`
- Either both time series and their time period must be set or both has to be None
- Can use `nan_threshold` to set how many nan values will be tolerated for time series and their time period.
    - `nan_threshold` = 1.0, means that time series can be completely empty.
    - is applied after sets.
    - Is checked seperately for every set.

#### Setting time periods with time indices

- Sets sets as range of time indices.
- Sets must follow these rules:
    - Used time periods must be connected.
    - Sets can share subset of times.
    - start of `train_time_period` < start of `val_time_period` < start of `test_time_period`.

In [7]:
config = DisjointTimeBasedConfig(train_ts=0.5, val_ts=0.2, test_ts=0.1, train_time_period=range(0, 2000), val_time_period=range(2000, 4000), test_time_period=range(4000, 5000))
disjoint_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:00:25,101][disjoint_time_based_config][INFO] - Quick validation succeeded.
[2025-11-28 18:00:25,116][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:00:25,117][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 274/274 [00:00<00:00, 1681.66it/s]
[2025-11-28 18:00:25,298][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 109/109 [00:00<00:00, 1331.32it/s]
[2025-11-28 18:00:25,391][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 54/54 [00:00<00:00, 1358.19it/s]
[2025-11-28 18:00:25,437][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:00:25,437][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDs: [256 222  14  78 135 ... 496  39 365 302 495], Length=274
        Val time series IDs: [ 64  17 316 434 383 ... 405 408   2 362 116], Length=109
        Test time series IDs: [ 57  24 236 293 233 ...  83 185 494 360 367], Length=54
    Time periods
        Train time periods: range(0, 2000)
        Val time periods: range(2000, 4000)
        Test time periods: range(4000, 5000)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 

#### Setting time periods with datetime

- Sets sets with tuple of datetime objects.
- Datetime objects are expected to be of UTC.
- Sets must follow these rules:
    - Used time periods must be connected.
    - Sets can share subset of times.
    - start of `train_time_period` < start of `val_time_period` < start of `test_time_period`.

In [8]:
config = DisjointTimeBasedConfig(train_ts=0.5, val_ts=0.2, test_ts=0.1, train_time_period=(datetime(2023, 10, 9, 0), datetime(2023, 11, 9, 23)), val_time_period=(datetime(2023, 11, 9, 23), datetime(2023, 12, 9, 23)), test_time_period=(datetime(2023, 12, 9, 23), datetime(2023, 12, 25, 23)))
disjoint_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:00:25,496][disjoint_time_based_config][INFO] - Quick validation succeeded.
[2025-11-28 18:00:25,498][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:00:25,499][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 274/274 [00:00<00:00, 2181.06it/s]
[2025-11-28 18:00:25,646][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 109/109 [00:00<00:00, 1811.79it/s]
[2025-11-28 18:00:25,717][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 54/54 [00:00<00:00, 1768.59it/s]
[2025-11-28 18:00:25,751][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:00:25,751][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDs: [477 228 352 414 528 ... 522 229 505 431 385], Length=274
        Val time series IDs: [346 102  18 372 364 ... 355 134 502 386  57], Length=109
        Test time series IDs: [250 310 498 354 541 ... 452 395 235 378 408], Length=54
    Time periods
        Train time periods: range(0, 767)
        Val time periods: range(767, 1487)
        Test time periods: range(1487, 1871)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0

#### Setting time periods with percentage

- Sets sets a percentage of whole time period from dataset.
- Always starts from first time.
- Must be: 0 < sum of percentages of set time periods <= 1.

In [9]:
config = DisjointTimeBasedConfig(train_ts=0.5, val_ts=0.2, test_ts=0.1, train_time_period=0.5, val_time_period=0.3, test_time_period=0.2)
disjoint_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:00:25,759][disjoint_time_based_config][INFO] - Quick validation succeeded.
[2025-11-28 18:00:25,773][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:00:25,774][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 274/274 [00:00<00:00, 1425.64it/s]
[2025-11-28 18:00:25,988][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 109/109 [00:00<00:00, 1246.09it/s]
[2025-11-28 18:00:26,088][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 54/54 [00:00<00:00, 1225.81it/s]
[2025-11-28 18:00:26,137][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:00:26,138][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDs: [469 335 157  56 124 ...  62 212 311 519 460], Length=274
        Val time series IDs: [360 343 190 179  44 ... 446 166 299 181   8], Length=109
        Test time series IDs: [232 523 109 444 198 ... 228 324 473 463 530], Length=54
    Time periods
        Train time periods: range(0, 3359)
        Val time periods: range(3359, 5374)
        Test time periods: range(5374, 6717)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 

### Selecting features

- Affects which features will be returned when loading data.
- Setting `include_time` as True will add time to features that return when loading data.
- Setting `include_ts_id` as True will add time series id to features that return when loading data.

#### Setting features to take as "all"

In [10]:
config = DisjointTimeBasedConfig(train_ts=0.5, val_ts=0.2, test_ts=0.1, train_time_period=0.5, val_time_period=0.3, test_time_period=0.2, features_to_take="all")
disjoint_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:00:26,146][disjoint_time_based_config][INFO] - Quick validation succeeded.
[2025-11-28 18:00:26,161][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:00:26,161][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 274/274 [00:00<00:00, 1455.74it/s]
[2025-11-28 18:00:26,373][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 109/109 [00:00<00:00, 1275.52it/s]
[2025-11-28 18:00:26,470][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 54/54 [00:00<00:00, 1170.93it/s]
[2025-11-28 18:00:26,523][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:00:26,523][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDs: [429 207  46 523 299 ...   5 120 379 134 163], Length=274
        Val time series IDs: [499 525 337 491 203 ...  25  85 440 191 438], Length=109
        Test time series IDs: [318 545 103 108 245 ... 496 121 536 416 537], Length=54
    Time periods
        Train time periods: range(0, 3359)
        Val time periods: range(3359, 5374)
        Test time periods: range(5374, 6717)
    Features
        Taken features: ['n_flows', 'n_packets', 'n_bytes', 'sum_n_dest_asn', 'avg_n_dest_asn', 'std_n_dest_asn', 'sum_n_dest_ports', 'avg_n_dest_ports', 'std_n_dest_ports', 'sum_n_dest_ip', 'avg_n_dest_ip', 'std_n_dest_ip', 'tcp_udp_ratio_packets', 'tcp_udp_ratio_bytes', 'dir_ratio_packets', 'dir_ratio_bytes', 'avg_duration', 'avg_ttl']
        Default values: [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 

#### Setting features via list

In [11]:
config = DisjointTimeBasedConfig(train_ts=0.5, val_ts=0.2, test_ts=0.1, train_time_period=0.5, val_time_period=0.3, test_time_period=0.2, features_to_take=["n_flows", "n_packets"])
disjoint_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:00:26,531][disjoint_time_based_config][INFO] - Quick validation succeeded.
[2025-11-28 18:00:26,547][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:00:26,548][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 274/274 [00:00<00:00, 1609.16it/s]
[2025-11-28 18:00:26,738][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 109/109 [00:00<00:00, 1396.39it/s]
[2025-11-28 18:00:26,828][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 54/54 [00:00<00:00, 1148.17it/s]
[2025-11-28 18:00:26,882][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:00:26,883][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDs: [189 338  43 245 499 ... 307 496 478 512 290], Length=274
        Val time series IDs: [452 513 278 108 302 ... 148 443 510 206 134], Length=109
        Test time series IDs: [ 42 119  11 324 225 ... 393  27 215 461 407], Length=54
    Time periods
        Train time periods: range(0, 3359)
        Val time periods: range(3359, 5374)
        Test time periods: range(5374, 6717)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Sliding window
        Sliding window size: None
        Sliding window prediction size: None
        Sliding window step size: 1
    Fillers
        Filler type: NoFiller
    Transformers
        Transformer type: No

#### Including time and time series id

In [12]:
config = DisjointTimeBasedConfig(train_ts=0.5, val_ts=0.2, test_ts=0.1, train_time_period=0.5, val_time_period=0.3, test_time_period=0.2, features_to_take=["n_flows", "n_packets"], include_time=True, include_ts_id=True, time_format=TimeFormat.ID_TIME)
disjoint_dataset.set_dataset_config_and_initialize(config, display_config_details="text", workers=0)

[2025-11-28 18:00:26,891][disjoint_time_based_config][INFO] - Quick validation succeeded.
[2025-11-28 18:00:26,906][cesnet_dataset][INFO] - Updating config for train set and fitting values.
[2025-11-28 18:00:26,907][cesnet_dataset][INFO] - Starting fitting cycle 1/1.
100%|██████████| 274/274 [00:00<00:00, 1534.00it/s]
[2025-11-28 18:00:27,110][cesnet_dataset][INFO] - Updating config for val set.
100%|██████████| 109/109 [00:00<00:00, 1349.86it/s]
[2025-11-28 18:00:27,201][cesnet_dataset][INFO] - Updating config for test set.
100%|██████████| 54/54 [00:00<00:00, 1104.93it/s]
[2025-11-28 18:00:27,257][cesnet_dataset][INFO] - Dataset initialization complete. Configuration updated.
[2025-11-28 18:00:27,258][cesnet_dataset][INFO] - Config initialized successfully.



Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_HOUR
    Source: SourceType.INSTITUTION_SUBNETS

    Time series
        Train time series IDs: [ 54 447  99 166  94 ... 514 369 421 321  97], Length=274
        Val time series IDs: [510 377 195 150 101 ... 275 302  21 226 278], Length=109
        Test time series IDs: [106  51 312 334 382 ... 379 534 494 455 306], Length=54
    Time periods
        Train time periods: range(0, 3359)
        Val time periods: range(3359, 5374)
        Test time periods: range(5374, 6717)
    Features
        Taken features: ['n_flows', 'n_packets']
        Default values: [0. 0.]
        Time series ID included: True
        Time included: True    
        Time format: TimeFormat.ID_TIME
    Sliding window
        Sliding window size: None
        Sliding window prediction size: None
        Sliding window step size: 1
    Fillers
        Filler type: NoFiller
    Transformers
        Transformer type: No