# State

In [16]:
from typing import Self
from numpy import ndarray
import numpy as np
import torch
from torch import Tensor

from linguaml.tolearn.performance import PerformanceResult
from linguaml.tolearn.families.base import Family
from linguaml.tolearn.hp import HPConfig, CategoricalHP
from linguaml.tolearn.hp.bounds import NumericHPBounds
from linguaml.rl.state.config import StateConfig

StateConfig.n_time_steps = 2

def encode_hp_config_and_accuracy(
        hp_config: HPConfig, 
        accuracy: float,
        numeric_hp_bounds: NumericHPBounds
    ) -> ndarray:
    """Encode a hyperparameter configuration and its accuracy into a row of the state matrix.

    Parameters
    ----------
    hp_config : HPConfig
        Hyperparameter configuration.
    accuracy : float
        Accuracy of the hyperparameter configuration on the validation set.
    numeric_hp_bounds : NumericHPBounds
        The bounds of the numerical hyperparameters.

    Returns
    -------
    ndarray
        A row of the state matrix.
    """
    
    # Continuous part
    # Normalization of numerical hyperparameters
    # Each numerical hyperparameter is normalized to [0, 1]
    entries = []
    for hp_name in hp_config.numeric_hp_names():
        hp = getattr(hp_config, hp_name)
        bounds = numeric_hp_bounds.get_bounds(hp_name)
        entry = (hp - bounds.min) / (bounds.max - bounds.min)
        entries.append(entry)
    continuous_part = np.array(entries)
    
    # Discrete part
    # One-hot encoding vectors of categorical hyperparameters
    one_hot_arrays = []
    for hp_name in hp_config.categorical_hp_names():
        hp_value: CategoricalHP = getattr(hp_config, hp_name)
        one_hot_arrays.append(hp_value.one_hot)
    discrete_part = np.concatenate(one_hot_arrays)
    
    # Reward
    # It is a singleton array containing the accuracy
    reward_singleton_array = np.array([accuracy])
    
    # Concatenate continuous part, discrete part, and accuracy
    encoded_array = np.concatenate((continuous_part, discrete_part, reward_singleton_array))
    
    return encoded_array

def encode_performance_result(
        performance_result: PerformanceResult,
        numeric_hp_bounds: NumericHPBounds
    ) -> ndarray:
    """Encode a performance result into a row of the state matrix.

    Parameters
    ----------
    performance_result : PerformanceResult
        Performance result.
    numeric_hp_bounds : NumericHPBounds
        The bounds of the numerical hyperparameters.

    Returns
    -------
    ndarray
        A row of the state matrix.
    """
    
    hp_config = performance_result.hp_config
    accuracy = performance_result.accuracy
    
    return encode_hp_config_and_accuracy(hp_config, accuracy, numeric_hp_bounds)

def decode_state_row(
        encoded_array: ndarray, 
        family: Family, 
        numeric_hp_bounds: NumericHPBounds
    ) -> tuple[HPConfig, float]:
    """Decodes a row of the state matrix.

    Parameters
    ----------
    encoded_array : ndarray
        A row of the state matrix.
    family : Family
        The family of models.
    numeric_hp_bounds : NumericHPBounds
        The bounds of the numerical hyperparameters.

    Returns
    -------
    tuple[HPConfig, float]
        The hyperparameter configuration and the accuracy.
    """
    
    # Hyperparameters
    hps = {}
    
    # Pointer to the current index in the encoded array
    start = 0
    
    # Decode continuous part
    for hp_name in family.hp().numeric_hp_names():
        bounds = numeric_hp_bounds.get_bounds(hp_name)
        value = encoded_array[start]
        
        # Recover the original value
        hp = value * (bounds.max - bounds.min) + bounds.min
        hps[hp_name] = hp
        
        # Update the pointer
        start += 1
    
    # Decode discrete part
    for hp_name in family.hp().categorical_hp_names():
        n_levels = family.hp().n_levels_in_category(hp_name)
        one_hot = encoded_array[start : start + n_levels]
        hp_type: CategoricalHP = family.hp().hp_type(hp_name)
        
        # Recover the original value
        hp = hp_type.from_one_hot(one_hot)
        hps[hp_name] = hp
        
        # Update the pointer
        start += n_levels
        
    # Recover the hyperparameter configuration
    hp_config = family.hp()(**hps)
    
    # Decode reward
    accuracy = encoded_array[start]
    
    return hp_config, accuracy
    
class State(StateConfig):
    
    def __init__(self, data: ndarray) -> None:
        
        self._data = data
        
    @property
    def shape(self) -> tuple[int]:
        
        return self._data.shape
    
    @classmethod
    def from_performance_results(
            cls, 
            performance_results: list[PerformanceResult],
            numeric_hp_bounds: NumericHPBounds,
        ) -> Self:
        
        assert len(performance_results) == cls.n_time_steps,\
            f"Expected {cls.n_time_steps} performance results, got {len(performance_results)}"
        
        data = np.array([
            encode_performance_result(performance_result, numeric_hp_bounds)
            for performance_result in performance_results
        ])
        
        return cls(data)
    
    def to_performance_results(self, family: Family, numeric_hp_bounds: NumericHPBounds) -> list[PerformanceResult]:
        
        performance_results = []
        for encoded_array in self._data:
            hp_config, accuracy = decode_state_row(encoded_array, family, numeric_hp_bounds)
            result = PerformanceResult(
                hp_config=hp_config,
                accuracy=accuracy,
            )
            performance_results.append(result)
            
        return performance_results
        
    
    def to_tensor(self) -> Tensor:
        
        return torch.tensor(self._data, dtype=torch.float32)
    
 

In [17]:
from linguaml.tolearn.families import SVCFamily

result1 = PerformanceResult(
    hp_config=SVCFamily.hp()(
        C=0.1,
        tol=0.01,
        gamma=0.01,
        kernel="linear",
        decision_function_shape="ovo"
    ),
    accuracy=0.6
)

result2 = PerformanceResult(
    hp_config=SVCFamily.hp()(
        C=10,
        tol=0.1,
        gamma=0.001,
        kernel="rbf",
        decision_function_shape="ovr"
    ),
    accuracy=0.7
)

state = State.from_performance_results(
    performance_results=[result1, result2],
    numeric_hp_bounds=NumericHPBounds.model_validate({
        "C": (0.1, 100),
        "gamma": (1e-3, 0.1),
        "tol": (1e-5, 1e-1),
    })
)

state

<__main__.State at 0x17acb46d0>

In [18]:
state.to_tensor()

tensor([[0.0000, 0.0909, 0.0999, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000,
         0.6000],
        [0.0991, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000,
         0.7000]])

In [19]:
state.to_performance_results(
    SVCFamily,
    numeric_hp_bounds=NumericHPBounds.model_validate({
        "C": (0.1, 100),
        "gamma": (1e-3, 0.1),
        "tol": (1e-5, 1e-1)
    })      
)

[PerformanceResult(hp_config=SVCConfig(C=0.1, kernel='linear', gamma=0.010000000000000002, tol=0.01, decision_function_shape='ovo'), accuracy=0.6),
 PerformanceResult(hp_config=SVCConfig(C=10.0, kernel='rbf', gamma=0.001, tol=0.1, decision_function_shape='ovr'), accuracy=0.7)]