feat: Add config backward compatibility support

milankures · milankures · commit 43161d4d5aae · 2025-08-03T18:58:59.000+02:00
diff --git a/cesnet_tszoo/benchmarks.py b/cesnet_tszoo/benchmarks.py
@@ -190,6 +190,7 @@ def _get_built_in_benchmark(identifier: str, data_root: str) -> Benchmark:
     logger.debug("Loading config file from '%s'.", config_file_path)
     config = pickle_load(config_file_path)
     config.import_identifier = export_benchmark.config_identifier
+    config._try_update_version()
 
     # Check and load annotations if available
     if export_benchmark.annotations_ts_identifier is not None:
@@ -254,6 +255,7 @@ def _get_custom_benchmark(identifier: str, data_root: str) -> Benchmark:
         config = pickle_load(config_file_path)
 
     config.import_identifier = export_benchmark.config_identifier
+    config._try_update_version()
 
     # Load annotations if available
     if export_benchmark.annotations_ts_identifier is not None:
diff --git a/cesnet_tszoo/configs/base_config.py b/cesnet_tszoo/configs/base_config.py
@@ -7,8 +7,10 @@
 
 import numpy as np
 import numpy.typing as npt
+from packaging.version import Version
 from sklearn.model_selection import train_test_split
 
+import cesnet_tszoo.version as version
 from cesnet_tszoo.utils.constants import ROW_END, ROW_START, ID_TIME_COLUMN_NAME, TIME_COLUMN_NAME
 from cesnet_tszoo.utils.enums import AgreggationType, FillerType, TimeFormat, ScalerType, DataloaderOrder
 from cesnet_tszoo.utils.scaler import Scaler
@@ -60,6 +62,8 @@ class DatasetConfig(ABC):
         test_fillers: Fillers used in the test set. `None` if no filler is used or test set is not used.
         all_fillers: Fillers used for the all set. `None` if no filler is used or all set is not used.
         is_initialized: Flag indicating if the configuration has already been initialized. If true, config initialization will be skipped.  
+        version: Version of cesnet-tszoo this config was made in.
+        export_update_needed: Whether config was updated to newer version and should be exported.
 
     # Configuration options
 
@@ -184,6 +188,8 @@ def __init__(self,
         self.test_fillers = None
         self.all_fillers = None
         self.is_initialized = False
+        self.version = version.current_version
+        self.export_update_needed = False
 
         self._validate_construction()
 
@@ -617,3 +623,26 @@ def _set_fillers(self) -> None:
     def _validate_finalization(self) -> None:
         """Performs final validation of the configuration. """
         ...
+
+    def _try_update_version(self) -> None:
+        """Tries to update config to match newer version of library. """
+
+        self.logger.debug("Trying to update config if necessary.")
+
+        if not hasattr(self, "version"):
+            self.logger.warning("Config attribute 'version' is missing in this instance. Default version '%s' will be set.", version.DEFAULT_VERSION)
+            self.version = version.DEFAULT_VERSION
+
+        if Version(self.version) < Version(version.current_version):
+            self.logger.warning("Imported config was made for cesnet-tszoo package of version '%s', but current used cesnet-tszoo package version is '%s'!", self.version, version.current_version)
+            self.logger.warning("Package will try to update the config. It is recommended to recreate this config or at least export this config alone or through benchmark to create updated config file.")
+            self.export_update_needed = True
+        elif Version(self.version) > Version(version.current_version):
+            self.logger.error("Imported config was made for cesnet-tszoo package of version '%s', but current used cesnet-tszoo package version is '%s'!", self.version, version.current_version)
+            self.logger.error("Update cesnet-tszoo package to use this config.")
+            raise ValueError(f"Imported config was made for cesnet-tszoo package of version '{self.version}', but current used cesnet-tszoo package version is '{version.current_version}'!")
+        else:
+            self.export_update_needed = False
+
+        self.logger.debug("Setting config version to current used cesnet-tszoo package version.")
+        self.version = version.current_version
diff --git a/cesnet_tszoo/configs/series_based_config.py b/cesnet_tszoo/configs/series_based_config.py
@@ -414,6 +414,9 @@ def _validate_finalization(self) -> None:
             self.logger.error("Overlap detected! Train, Val, and Test sets can't have the same IDs.")
             raise ValueError("Train, Val, and Test can't have the same IDs.")
 
+    def _try_update_version(self):
+        super()._try_update_version()
+
     def __str__(self) -> str:
 
         if self.scale_with is None:
@@ -465,5 +468,6 @@ def __str__(self) -> str:
     Other
         Nan threshold: {str(self.nan_threshold)}
         Random state: {self.random_state}
-        Train dataloader order {str(self.train_dataloader_order)}
+        Train dataloader order: {str(self.train_dataloader_order)}
+        Version: {self.version}
                 '''
diff --git a/cesnet_tszoo/configs/time_based_config.py b/cesnet_tszoo/configs/time_based_config.py
@@ -631,6 +631,9 @@ def _validate_finalization(self) -> None:
                 self.logger.error("ts_ids and test_ts_ids can't have the same IDs!")
                 raise ValueError(f"ts_ids and test_ts_ids can't have the same IDs. Overlapping IDs: {self.ts_ids[mask]}")
 
+    def _try_update_version(self):
+        super()._try_update_version()
+
     def __str__(self) -> str:
 
         if self.scale_with is None:
@@ -689,4 +692,5 @@ def __str__(self) -> str:
     Other
         Nan threshold: {str(self.nan_threshold)}
         Random state: {self.random_state}
+        Version: {self.version}
                 '''
diff --git a/cesnet_tszoo/datasets/cesnet_dataset.py b/cesnet_tszoo/datasets/cesnet_dataset.py
@@ -16,6 +16,7 @@
 from torch.utils.data import DataLoader, BatchSampler, SequentialSampler, Dataset, RandomSampler
 import torch
 
+import cesnet_tszoo.version as version
 from cesnet_tszoo.files.utils import get_annotations_path_and_whether_it_is_built_in, get_config_path_and_whether_it_is_built_in, exists_built_in_annotations, exists_built_in_benchmark, exists_built_in_config
 from cesnet_tszoo.configs.base_config import DatasetConfig
 from cesnet_tszoo.annotation import Annotations
@@ -260,8 +261,8 @@ def get_train_dataloader(self, ts_id: int | None = None, workers: int | Literal[
 
         assert self.train_dataset is not None, "The train_dataset must be initialized before accessing data from training set."
 
-        defaultKwargs = {'take_all': False, "cache_loader": True}
-        kwargs = {**defaultKwargs, **kwargs}
+        default_kwargs = {'take_all': False, "cache_loader": True}
+        kwargs = {**default_kwargs, **kwargs}
 
         if ts_id is not None:
 
@@ -364,8 +365,8 @@ def get_val_dataloader(self, ts_id: int | None = None, workers: int | Literal["c
 
         assert self.val_dataset is not None, "The val_dataset must be initialized before accessing data from validation set."
 
-        defaultKwargs = {'take_all': False, "cache_loader": True}
-        kwargs = {**defaultKwargs, **kwargs}
+        default_kwargs = {'take_all': False, "cache_loader": True}
+        kwargs = {**default_kwargs, **kwargs}
 
         if ts_id is not None:
 
@@ -463,13 +464,13 @@ def get_test_dataloader(self, ts_id: int | None = None, workers: int | Literal["
         if self.dataset_config is None or not self.dataset_config.is_initialized:
             raise ValueError("Dataset is not initialized. Please call set_dataset_config_and_initialize() before attempting to access test_dataloader.")
 
-        if not self.dataset_config.has_all:
+        if not self.dataset_config.has_test:
             raise ValueError("Dataloader for test set is not available in the dataset configuration.")
 
         assert self.test_dataset is not None, "The test_dataset must be initialized before accessing data from test set."
 
-        defaultKwargs = {'take_all': False, "cache_loader": True}
-        kwargs = {**defaultKwargs, **kwargs}
+        default_kwargs = {'take_all': False, "cache_loader": True}
+        kwargs = {**default_kwargs, **kwargs}
 
         if ts_id is not None:
 
@@ -572,8 +573,8 @@ def get_all_dataloader(self, ts_id: int | None = None, workers: int | Literal["c
 
         assert self.all_dataset is not None, "The all_dataset must be initialized before accessing data from all set."
 
-        defaultKwargs = {'take_all': False, "cache_loader": True}
-        kwargs = {**defaultKwargs, **kwargs}
+        default_kwargs = {'take_all': False, "cache_loader": True}
+        kwargs = {**default_kwargs, **kwargs}
 
         if ts_id is not None:
 
@@ -705,7 +706,7 @@ def get_test_df(self, workers: int | Literal["config"] = "config", as_single_dat
         if self.dataset_config is None or not self.dataset_config.is_initialized:
             raise ValueError("Dataset is not initialized. Please call set_dataset_config_and_initialize() before attempting to access test_dataloader.")
 
-        if not self.dataset_config.has_all:
+        if not self.dataset_config.has_test:
             raise ValueError("Dataloader for test set is not available in the dataset configuration.")
 
         assert self.test_dataset is not None, "The test_dataset must be initialized before accessing data from test set."
@@ -823,7 +824,7 @@ def get_test_numpy(self, workers: int | Literal["config"] = "config") -> np.ndar
         if self.dataset_config is None or not self.dataset_config.is_initialized:
             raise ValueError("Dataset is not initialized. Please call set_dataset_config_and_initialize() before attempting to access test_dataloader.")
 
-        if not self.dataset_config.has_all:
+        if not self.dataset_config.has_test:
             raise ValueError("Dataloader for test set is not available in the dataset configuration.")
 
         assert self.test_dataset is not None, "The test_dataset must be initialized before accessing data from test set."
@@ -1544,6 +1545,8 @@ def import_config(self, identifier: str, display_config_details: bool = True, wo
             self.logger.info("Custom config found: %s. Loading it.", identifier)
             config = pickle_load(config_file_path)
 
+        config._try_update_version()
+
         self.logger.info("Initializing dataset configuration with the imported config.")
         self.set_dataset_config_and_initialize(config, display_config_details, workers)
 
@@ -1638,6 +1641,7 @@ def save_config(self, identifier: str, create_with_details_file: bool = True, fo
             self.logger.info("Config details saved to %s", path_details)
 
         self._update_config_imported_status(identifier)
+        self.dataset_config.export_update_needed = False
         self.logger.info("Config successfully saved")
 
     def save_benchmark(self, identifier: str, force_write: bool = False) -> None:
@@ -1677,8 +1681,8 @@ def save_benchmark(self, identifier: str, force_write: bool = False) -> None:
         else:
             annotations_both_name = None
 
-        # Use the imported identifier if available, otherwise default to the current identifier
-        config_name = self.dataset_config.import_identifier if self.dataset_config.import_identifier is not None else identifier
+        # Use the imported identifier if available and update is not necessary, otherwise default to the current identifier
+        config_name = self.dataset_config.import_identifier if (self.dataset_config.import_identifier is not None and not self.dataset_config.export_update_needed) else identifier
 
         export_benchmark = ExportBenchmark(self.database_name,
                                            self.is_series_based,
@@ -1687,10 +1691,11 @@ def save_benchmark(self, identifier: str, force_write: bool = False) -> None:
                                            config_name,
                                            annotations_ts_name,
                                            annotations_time_name,
-                                           annotations_both_name)
+                                           annotations_both_name,
+                                           version=version.current_version)
 
         # If the config was not imported, save it
-        if self.dataset_config.import_identifier is None:
+        if self.dataset_config.import_identifier is None or self.dataset_config.export_update_needed:
             self.save_config(export_benchmark.config_identifier, force_write=force_write)
         else:
             self.logger.info("Using already existing config with identifier: %s", self.dataset_config.import_identifier)
diff --git a/cesnet_tszoo/datasets/series_based_cesnet_dataset.py b/cesnet_tszoo/datasets/series_based_cesnet_dataset.py
@@ -498,7 +498,7 @@ def _get_singular_time_series_dataset(self, parent_dataset: SeriesBasedDataset,
     def _get_dataloader(self, dataset: SeriesBasedDataset, workers: int | Literal["config"], take_all: bool, batch_size: int, **kwargs) -> DataLoader:
         """Set series based dataloader for this dataset. """
 
-        defaultKwargs = {'order': DataloaderOrder.SEQUENTIAL}
-        kwargs = {**defaultKwargs, **kwargs}
+        default_kwargs = {'order': DataloaderOrder.SEQUENTIAL}
+        kwargs = {**default_kwargs, **kwargs}
 
         return self._get_series_based_dataloader(dataset, workers, take_all, batch_size, kwargs["order"])
diff --git a/cesnet_tszoo/datasets/time_based_cesnet_dataset.py b/cesnet_tszoo/datasets/time_based_cesnet_dataset.py
@@ -235,13 +235,13 @@ def get_test_other_dataloader(self, ts_id: int | None = None, workers: int | Lit
         if self.dataset_config is None or not self.dataset_config.is_initialized:
             raise ValueError("Dataset is not initialized. Please call set_dataset_config_and_initialize() before attempting to access test_other_dataloader.")
 
-        if not self.dataset_config.has_all:
+        if not self.dataset_config.has_test or not self.dataset_config.has_test_ts_ids:
             raise ValueError("Dataloader for test_other set is not available in the dataset configuration.")
 
         assert self.test_dataset is not None, "The test_other_dataset must be initialized before accessing data from test_other set."
 
-        defaultKwargs = {'take_all': False, "cache_loader": True}
-        kwargs = {**defaultKwargs, **kwargs}
+        default_kwargs = {'take_all': False, "cache_loader": True}
+        kwargs = {**default_kwargs, **kwargs}
 
         if ts_id is not None:
 
@@ -313,7 +313,7 @@ def get_test_other_df(self, workers: int | Literal["config"] = "config", as_sing
         if self.dataset_config is None or not self.dataset_config.is_initialized:
             raise ValueError("Dataset is not initialized. Please call set_dataset_config_and_initialize() before attempting to access test_other_dataloader.")
 
-        if not self.dataset_config.has_all:
+        if not self.dataset_config.has_test or not self.dataset_config.has_test_ts_ids:
             raise ValueError("Dataloader for test_other set is not available in the dataset configuration.")
 
         assert self.test_dataset is not None, "The test_other_dataset must be initialized before accessing data from test_other set."
diff --git a/cesnet_tszoo/files/utils.py b/cesnet_tszoo/files/utils.py
@@ -21,7 +21,7 @@ def get_benchmark_path_and_whether_it_is_built_in(identifier: str, data_root: st
     is_built_in = exists_built_in_benchmark(identifier)
 
     if not is_built_in:
-        logger.warning("Built-in benchmark %s not found.", identifier)
+        logger.info("Built-in benchmark %s not found.", identifier)
 
     if is_built_in:
         return path_for_built_in_benchmark, True
@@ -31,7 +31,7 @@ def get_benchmark_path_and_whether_it_is_built_in(identifier: str, data_root: st
     is_custom = os.path.exists(path_for_custom_benchmark)
 
     if not is_custom:
-        logger.warning("No benchmark with identifier %s found at expected path: %s.", identifier, path_for_custom_benchmark)
+        logger.info("No benchmark with identifier %s found at expected path: %s.", identifier, path_for_custom_benchmark)
 
     if is_custom:
         return path_for_custom_benchmark, False
diff --git a/cesnet_tszoo/utils/utils.py b/cesnet_tszoo/utils/utils.py
@@ -45,6 +45,7 @@ class ExportBenchmark:
     annotations_time_identifier: str
     annotations_both_identifier: str
     related_results_identifier: Optional[str] = None
+    version: str = None
     description: Optional[str] = None
 
     def to_dict(self):
diff --git a/cesnet_tszoo/version.py b/cesnet_tszoo/version.py
@@ -0,0 +1,4 @@
+import importlib.metadata
+
+current_version = importlib.metadata.version("cesnet-tszoo")
+DEFAULT_VERSION = "0.1.2"
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "cesnet_tszoo"
-version = "0.1.2"
+version = "0.1.3"
 authors = [
     { name = "Milan Kureš", email = "kuresmil@fit.cvut.cz" },
     { name = "Josef Koumar", email = "josef.koumar@fit.cvut.cz" },
@@ -44,6 +44,8 @@ dependencies = [
     "PyYAML",
     "requests",
     "matplotlib",
+    "packaging",
+    "importlib",
 ]
 
 [project.optional-dependencies]