In [4]:
from omegaconf import OmegaConf
import os
import polars as pl
import pandas as pd
from omegaconf import OmegaConf


In [5]:
def load_data_paths(path_type):
    """Load dataset paths from config.yaml based on the given path type.

    Parameters
    ----------
    path_type : str
        Either 'in_paths' for input files or 'out_paths' for output files.

    Returns
    -------
    dict
        a dictionary where each key is dataset name, each value is its full file path.
    Raises
    ------
    ValueError
        If 'path_type' is not found in the config file.
    """
    current_path=os.getcwd()
    dir=os.path.abspath(os.path.join(current_path, os.pardir))
    config_path = os.path.join(dir, "config.yaml")
    config = OmegaConf.load(config_path)
    if path_type not in config:
        raise ValueError(
            f"Invalid path_type '{path_type}'. Choose 'in_paths' or 'out_paths'."
        )

    paths_dict = config[path_type]
    data_paths_dict = {
        key: os.path.join(dir, value).replace("\\", "/")
        for key, value in paths_dict.items()
    }
    return data_paths_dict


In [11]:
def load_data(path_type,lib="pd"):

    """Load datasets based on path type and return DataFrames.

    Parameters
    ----------
    path_type : str
        Either 'in_paths' or 'out_paths' to specify dataset locations.
    lib : str, optional
        Library to use for reading files ('pd' for Pandas, 'pl' for Polars), by default 'pd'.

    Returns
    -------
    tuple of DataFrames
        (dos_df, fuzzy_df, attack_free_df)

    Raises
    ------
    ValueError
        If the specified library is invalid.
    KeyError
        If required dataset paths are missing in config.yaml.
    """
    data_paths = load_data_paths(path_type)

    dataset_keys=["dos_df", "fuzzy_df", "attack_free_df"]
    missing_keys=[key for key in dataset_keys if key not in data_paths]
    if missing_keys:
        raise KeyError(f"Missing dataset paths in config: {missing_keys}")

    dos_df_path = data_paths["dos_df"]
    fuzzy_df_path = data_paths["fuzzy_df"]
    attack_free_df_path = data_paths["attack_free_df"]


    if lib=="pl":
        dos_df = pl.read_csv(dos_df_path)
        fuzzy_df = pl.read_csv(fuzzy_df_path)
        attack_free_df = pl.read_csv(attack_free_df_path)
        
    elif lib=="pd":
        dos_df = pd.read_csv(dos_df_path)
        fuzzy_df = pd.read_csv(fuzzy_df_path)
        attack_free_df = pd.read_csv(attack_free_df_path)
    else:
        raise ValueError("Invalid library abbreviation! Use 'pl' for Polars or 'pd' for Pandas.")
    
    return dos_df,fuzzy_df,attack_free_df



