# enums

> Module containing enums refererenced throughout the project

In [15]:
#| default_exp enums 

In [16]:
#| hide 
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
#| export 
from enum import Enum, auto

In [18]:
#| export
class SleepOrWake(Enum):
    """
    Very simple enum for fixing, once-and-for-all, meaning of class 0 and 1.
    Use this enum when sensible for clarity in code.
    """

    DO_NOT_USE = -2  # excluded post-hoc, e.g. padding
    UNSCORED = -1
    WAKE = 0
    SLEEP = 1

    
class SleepStagesWLDR(Enum):
    """
    Similar to SleepStages(Enum), but for Wake/Light/Deep/REM instead of Ns
    """

    DO_NOT_USE = -2  # excluded post-hoc, e.g. padding
    UNSCORED = -1
    WAKE = 0
    LIGHT = 1
    DEEP = 2
    REM = 3

    def to_masked_sleep_wake(self) -> SleepOrWake:
        match self:
            case SleepStagesWLDR.DO_NOT_USE:
                return SleepOrWake.DO_NOT_USE
            case SleepStagesWLDR.UNSCORED:
                return SleepOrWake.UNSCORED
            case SleepStagesWLDR.WAKE:
                return SleepOrWake.WAKE
            case _:
                return SleepOrWake.SLEEP
    
class SleepStages(Enum):
    """
    Very simple enum for fixing, once-and-for-all, meaning of integer sleep stages.
    Use this enum when possible for clarity in code.
    """

    DO_NOT_USE = -2  # excluded post-hoc, e.g. padding
    UNSCORED = -1
    WAKE = 0
    N1 = 1
    N2 = 2
    N3 = 3
    N4 = 4
    REM = 5

    def to_masked_sleep_wake(self) -> SleepOrWake:
        return self.to_WLDR().to_masked_sleep_wake()

    def to_WLDR(self) -> SleepStagesWLDR:
        match self:
            case SleepStages.DO_NOT_USE:
                return SleepOrWake.DO_NOT_USE
            case SleepStages.UNSCORED:
                return SleepStagesWLDR.UNSCORED
            case SleepStages.WAKE:
                return SleepStagesWLDR.WAKE
            case SleepStages.N1 | SleepStages.N2:
                return SleepStagesWLDR.LIGHT
            case SleepStages.N3 | SleepStages.N4:
                return SleepStagesWLDR.DEEP
            case SleepStages.REM:
                return SleepStagesWLDR.REM


PSG_Enums = SleepOrWake | SleepStagesWLDR | SleepStages


class SleepClassificationProblem(Enum):
    SLEEP_OR_WAKE = auto()
    SLEEP_STAGES_WLDR = auto()
    SLEEP_STAGES = auto()

    def type_enum(self) -> PSG_Enums:
        match self:
            case SleepClassificationProblem.SLEEP_OR_WAKE:
                return SleepOrWake
            case SleepClassificationProblem.SLEEP_STAGES_WLDR:
                return SleepStagesWLDR
            case SleepClassificationProblem.SLEEP_STAGES:
                return SleepStages


In [19]:
#| export

class KnownFeatures(Enum):
    """
    Expresses to the system which features from data to include.
    This lives here for cyclic import reasons...
    """

    ACTIVITY = auto()
    ACCELEROMETER = auto()
    HEARTRATE = auto()
    PSG = auto()

    # Not settable, derived
    ACCEL_SPECTRO = auto()
    MO_SPECTRO = auto()  # Mads Olsen preprocessing

    @property
    def base_feature(self) -> "KnownFeatures":
        if self.is_accelerometer_based:
            return KnownFeatures.ACCELEROMETER
        elif self.is_activity_based:
            return KnownFeatures.ACTIVITY
        elif self.is_heartrate_based:
            return KnownFeatures.HEARTRATE
        else:
            return self

    @property
    def is_derived(self) -> bool:
        match self:
            case KnownFeatures.ACTIVITY | KnownFeatures.ACCELEROMETER | KnownFeatures.HEARTRATE | KnownFeatures.PSG:
                return False
            case _:
                return True

    @property
    def is_spectral(self) -> bool:
        return self in [KnownFeatures.ACCEL_SPECTRO, KnownFeatures.MO_SPECTRO]

    @property
    def is_accelerometer_based(self) -> bool:
        # Convenience + readability helper
        return self in [
            KnownFeatures.ACCELEROMETER,
            KnownFeatures.ACCEL_SPECTRO,
            KnownFeatures.MO_SPECTRO,
        ]

    @property
    def is_activity_based(self) -> bool:
        return self in [KnownFeatures.ACTIVITY]

    @property
    def is_heartrate_based(self) -> bool:
        return self in [KnownFeatures.HEARTRATE]



In [20]:
#| export


class KnownModel(Enum):
    """
    Enumerates known ML models. Allows for programmatic selection via JSON, produces Python classes as neeeded.

    For example, KnownModel['COLE_KRIPKE'] will produce the KnownModel.COLE_KRIPKE enum value. This allows us to 
    "COLE_KRIPKE" as a model to use in a JSON file, and then turn that into an enum which we can switch on in a 
    factory to make the actual, trainable models. TODO?: Mixin the trainable model class, so the enum can be trained?
    """

    COLE_KRIPKE = auto()
    LOG_REG_SKLEARN = auto()
    # LOG_REG_CONV = auto()
    # SVM = auto()
    # XGBOOST = auto()
    # UPDOWN = auto()  # UpDownTimeSeriesClassifier
    # CONV_SPECTRO = auto()
    # SPECTRO_AUTOENCODER = auto()
    # LREP_LOG_REG = auto()  # LatentRepresentationLogRegClassifier
    # LREP_XGB = auto()  # Latent Representation with XGBoosted tree latent rep classifier
    # TRIAX_AUTOENC = auto()
    # ### Mads Olsen et al 2022 networks below
    # # PyTorch translation
    # MO_UNET = auto()
    # # Trainable TensorFlow model. Code copy-pasted from MO, wrapped into SleepWakeClassifier
    # MO_UNET_TF = auto()
    # # Pretrained weights from Mads Olsen's GitHub. Tensforflow lite.
    # PRETRAINED_MO_UNET = auto()

    def __str__(self):
        return self.name

In [None]:
#| export

from typing import List, Optional
from sklearn.model_selection import LeaveOneOut


class ValidationMethod(Enum):
    """
    Describes to a pipeline how it should iterate over the data provided, splitting into training and testing sets.
    We want to avoid splitting a subject's data across train and test folds! This leaks too much info to the classifiers
    """

    LOOX = -1
    LEAVE_ONE_OUT = 0

    def make_splits(
        self, param: Optional[float | int | str], data_record_set_names: List[str]
    ) -> List[List[List[int]]]:
        """
        Using the validation method represented by self, produce list-of-lists of train/test splits by
        :param param: Parameter used in the method.
         - For LEAVE_ONE_OUT this is ignored.
         - For K_FOLD this is K
         - For RANDOM_PERCENTILE this is the training fraction
        :param samples: List of identifiers of the data records being split. An id can be anything, up to you.
        :return: List of lists of length 2: [..., [[split j training indices], [split j testing indices]], ...]
        """
        if self == ValidationMethod.LOOX:
            """==================================================
            START: Confusing boilderplate to parse the train and test set
            =================================================="""
            try:
                train_set, test_set = param.split(",")
            except Exception as e:
                print(e)
                print(
                    'Specify "<train set name>,<test set name>" as "param" value in validation config.'
                )
                print(
                    'or "train:<train set name>,test:<test set name>" with "train:" and "test" in either order, depending on your preference.'
                )
            if ":" in train_set or ":" in test_set:
                *tr_or_tst, first_set = train_set.split(":")
                *tst_or_tr, second_set = test_set.split(":")

                # One of these branches should catch based on enclosing `if`
                if tr_or_tst:
                    if tr_or_tst[0] == "train":
                        train_set = first_set
                        test_set = second_set
                    elif tr_or_tst[0] == "test":
                        train_set = second_set
                        test_set = first_set
                elif tst_or_tr:
                    if tst_or_tr[0] == "test":
                        train_set = first_set
                        test_set = second_set
                    elif tst_or_tr[0] == "train":
                        train_set = second_set
                        test_set = first_set

            """==================================================
            END: Confusing boilderplate to parse the train and test set
            =================================================="""

            print(
                f"training LOOX: Leave-one-out, swapping left-out {train_set} for {test_set}"
            )

            tests = [
                j for j in range(len(data_record_set_names)) if data_record_set_names[j] == test_set
            ]
            trains = [
                j for j in range(len(data_record_set_names)) if data_record_set_names[j] == train_set
            ]
            splits = [
                [skipping_index(trains, index=j), [tests[j]]] for j in range(len(tests))
            ]

            return splits

        if self == ValidationMethod.LEAVE_ONE_OUT:
            return LeaveOneOut().split(X=range(len(data_record_set_names)))


In [21]:
#| hide
import nbdev; nbdev.nbdev_export()