In [9]:
class SKDescriptors:
    ### This class is separated into 6 sections, separated by long rows of asterisks. These sections are:
        # 1. File Specifiers -- These describe key details of collected data and its labels
        # 2. Short Descriptions -- These are values or dictionaries that describe certain file specifier values (e.g. num of classes in Type 5)
        # 3. Collections of Tags and Classes -- These list all currently used tags and classes
        # 4. Pairings of Classes with Tags -- This section defines ClassTagSet and pairs classes with tags and with other classes to inherit from
        # 5. Label Types' ClassTagSets -- This section specifically lists which classes and tags each label type has
        # 6. Validating Functions -- This section holds all the validators that this class provides





    # 1. File Specifiers *****************************************************************************************************************************



    # file specification list:
    # NOTE: These should appear in listed order for the code to work
        # (though the code will run well even if some are missing)
    # NOTE: 0 should (almost) always be treated as the unknown value.
        # if you add something that uses the value 0, consider what
        # might need changed in the code to make it work

        # (1) Type (ClassType, LabelType, OutputType):
            # This specifies how the data is stored.
            # USAGE: This appears ONLY in Labeled specifier lists
            # INFO: The current types' definitions can be found in the files
                # DataTranslations.docx
                # DataCollectionAndLabelingTechniquesDocumentation.docx

        # (2) WithClassNum: this specifies how many output classes a type has
            # so one immediately knows without having to look it up.
            # USAGE: This appears ONLY in Labeled specifier lists
            # NOTE: WithClassNum should not change separately to Type.
                # This is just here to better convey classification info

        # (3) Freq: this specifies the frequency the data collector
            # was set to when collecting that set of data.
            # USAGE: This appears BOTH in Labeled and Unlabeled specifier lists
            # NOTE: there are currently no functions to convert between frequencies
                # as we have only used one frequency so far,
                # and as we will have to test whether the values change with
                # frequency in some way due to how the measurements are taken
            # NOTE: if you ever change the Freq of the file, be sure to adjust
                # BufferNum accordingly

        # (4) BufferType: this specifies how and where buffers are added to labeled data.
            # USAGE: This appears ONLY in Labeled specifier lists
            # NOTE: there are currently no functions to convert between buffer types
                # as there is only one buffer type.

        # (5) BufferNum: this specifies how many measurements before and after
            # certain activities should be labeled the same as that activity
            # USAGE: This appears ONLY in Labeled specifier lists

        # (6) UserType: this states who collected the data --
            # us researchers (marked as 1) or manual wheelchair users (marked as 2).
            # Data from researchers who are also manual wheelchair users should be
            # marked as 3. Data combined from files marked with ((1 and/or 3) AND 2)
            # should be marked as 4 (combining less-biased data with more-biased data)
            # USAGE: This appears BOTH in Labeled and Unlabeled specifier lists
            # NOTE: Unmarked files are automatically labeled with a 0 when passing
                # through the converter and must be manually remarked. If you are
                # unsure which mark is correct, leave it as 0 and treat it as a 1
                # when using the data

        # (7) UserID: This allows us to loosely tell which data files were made by the
            # same person. The UserID may be allowed to change occasionally if keeping
            # it the same might be unfeasible. UserID of 0 means unknown.
            # USAGE: This appears BOTH in Labeled and Unlabeled specifier lists.
            # INFO: Researchers' UserIDs will be stored in [not-yet-created-document name]
            # NOTE: UserID should NOT be stored alongside and with correspondence to
                # the user's personal information unless Dr. Fu (or other person heading
                # the project if that changes) says otherwise as that may have legal
                # implications. In other words, data we store should NOT be directly
                # traceable back to the user who created that data (unless it's data
                # we made ourselves)

        # (#) Labeled OR Unlabeled: This tells us whether the file has been labeled yet.
            # An unlabeled file will only have the Freq, UserType, and UserID specifiers
            # These file readers are not equipped to detect whether a file is Labeled or
                # Unlabeled and instead treats all as Labeled

    CLASSIFICATION_TYPE_FS = "Type"
    WITH_CLASS_NUMBER_FS = "WithClassNum"
    FREQUENCE_FS = "Freq"
    BUFFER_TYPE_FS = "BufferType"
    BUFFER_NUMBER_FS = "BufferNum"
    USER_TYPE_FS = "UserType"
    USER_ID_FS = "UserID"
    LABELED_FS = "Labeled"
    UNLABELED_FS = "Unlabeled"
    # tuple to reduce dynamic changes possible
    LABELED_FILE_SPECIFIERS = (
        CLASSIFICATION_TYPE_FS,
        WITH_CLASS_NUMBER_FS,
        FREQUENCE_FS,
        BUFFER_TYPE_FS,
        BUFFER_NUMBER_FS,
        USER_TYPE_FS,
        USER_ID_FS,
        LABELED_FS # ,
        # UNLABELED_FS
    )
    UNLABELED_FILE_SPECIFIERS = (
        FREQUENCE_FS,
        USER_TYPE_FS,
        USER_ID_FS,
        # LABELED_FS,
        UNLABELED_FS
    )
    # print(len(FILE_SPECIFIERS))




    # 2. Short Descriptions *****************************************************************************************************************************



    INPUT_NAMES = {
        1: ("x", "y", "z")
    }


    # BEGINNING_DESCRIPTORS and ENDING_DESCRIPTORS will be used in file names.
    # The "<School Year>" ending descriptor is supposed to later be dynamically replaced by something like "23-24", and
        # "<Semester>" is supposed to later be dynamically replaced by either "Fall", "Spring", or "Summer".
        # If it is across multiple school years, use the start and end years.
            # For example, if it was from 23-24 to 26-27, use "23-27".
        # If it is across multiple semesters, just don't include the semester ending descriptor


    # BEGINNING_DESCRIPTORS = np.array(["COMBINED"])
    # ENDING_DESCRIPTORS = np.array(["Motion-Sessions", "<School Year>", "<Semester>"])


    NUM_OF_LABEL_TYPES = 8
    NUM_OF_BUFFER_TYPES = 1

    NUM_OF_INPUTS_PER_TYPE = {
        # if any of these are ever not 3, switch code will need to be different
            # between two different numbers of inputs
        1: 3,
        2: 3,
        3: 3,
        4: 3,
        5: 3,
        6: 3,
        7: 3,
        8: 3
    }
    NUM_OF_CLASSES_PER_TYPE = {
        1: 5,
        2: 10,
        3: 16,
        4: 4,
        5: 8,
        6: 4,
        7: 3,
        8: 2
    }
    # if 1, the ouput is a one-hot vector
    NUM_OF_OUTPUTS_PER_TYPE = {
        1: 1,
        2: 1,
        3: 1,
        4: 1,
        5: 1,
        6: 1,
        7: 1,
        8: 1
    }





    # 3. Collections of Tags and Classes *****************************************************************************************************************************



    # these are explicitly defined to lower the chance for typed mistakes
        # while also allowing descriptive use
    EXERCISE_TAG = "Exercise"
    NONEXERCISE_TAG = "Non-Exercise"
    AMBIGUOUS_EXERCISE_TAG = "Ambiguous Exercise" # this would be used for things that may or may not be part of an exercise (certain stationary classes)
    FULL_TAG = "Full"
    BORDER_TAG = "Border" # Start or End -- different to "Boundary" if used
    START_TAG = "Start"
    END_TAG = "End"
    FORWARD_LEAN_TAG = "Forward Lean"
    FORWARD_KNEE_LEAN_TAG = "Forward Knee Lean"
    FORWARD_TABLE_LEAN_TAG = "Forward Table Lean"
    LATERAL_LEAN_TAG = "Lateral Lean"
    LATERAL_PUSH_LEAN_TAG = "Lateral Push Lean"
    LATERAL_HOLD_LEAN_TAG = "Lateral Hold Lean"
    LEFT_LEAN_TAG = "Left Lean"
    RIGHT_LEAN_TAG = "Right Lean"
    PUSHUP_TAG = "Pushup"
    #boundary_tag = "Boundary" # This might later be used for the edges of data files to note where they are glued together
    OTHER_TAG = "Other"
    STATIONARY_TAG = "Stationary"
    NONSTATIONARY_TAG = "Non-Stationary"
    AMBIGUOUS_STATIONARY_TAG = "Ambiguous Stationary" # "Other" and exercise classes that also contain Stationary portions would use this

    # set
    CLASS_TAGS = {
        EXERCISE_TAG,
        NONEXERCISE_TAG,
        AMBIGUOUS_EXERCISE_TAG,
        FULL_TAG,
        BORDER_TAG,
        START_TAG,
        END_TAG,
        FORWARD_LEAN_TAG,
        FORWARD_KNEE_LEAN_TAG,
        FORWARD_TABLE_LEAN_TAG,
        LATERAL_LEAN_TAG,
        LATERAL_PUSH_LEAN_TAG,
        LATERAL_HOLD_LEAN_TAG,
        LEFT_LEAN_TAG,
        RIGHT_LEAN_TAG,
        PUSHUP_TAG,
        OTHER_TAG,
        STATIONARY_TAG,
        NONSTATIONARY_TAG,
        AMBIGUOUS_STATIONARY_TAG
    }


    # jump to the tuple if you want the list of class names
    FORWARD_LEAN_CLASS              = FORWARD_LEAN_TAG                          # "Forward Lean"
    FORWARD_LEAN_START_CLASS        = FORWARD_LEAN_TAG + ' ' + START_TAG        # "Forward Lean Start"
    FORWARD_LEAN_END_CLASS          = FORWARD_LEAN_TAG + ' ' + END_TAG          # "Forward Lean End"
    FORWARD_KNEE_LEAN_START_CLASS   = FORWARD_KNEE_LEAN_TAG + ' ' + START_TAG   # "Forward Knee Lean Start"
    FORWARD_KNEE_LEAN_END_CLASS     = FORWARD_KNEE_LEAN_TAG + ' ' + END_TAG     # "Forward Knee Lean End"
    FORWARD_TABLE_LEAN_START_CLASS  = FORWARD_TABLE_LEAN_TAG + ' ' + START_TAG  # "Forward Table Lean Start"
    FORWARD_TABLE_LEAN_END_CLASS    = FORWARD_TABLE_LEAN_TAG + ' ' + END_TAG    # "Forward Table Lean End"
    LATERAL_LEAN_CLASS              = LATERAL_LEAN_TAG                          # "Lateral Lean"
    LATERAL_LEAN_START_CLASS        = LATERAL_LEAN_TAG + ' ' + START_TAG        # "Lateral Lean Start"
    LATERAL_LEAN_END_CLASS          = LATERAL_LEAN_TAG + ' ' + END_TAG          # "Lateral Lean End"
    LEFT_LEAN_CLASS                 = LEFT_LEAN_TAG                             # "Left Lean"
    LEFT_LEAN_START_CLASS           = LEFT_LEAN_TAG + ' ' + START_TAG           # "Left Lean Start"
    LEFT_LEAN_END_CLASS             = LEFT_LEAN_TAG + ' ' + END_TAG             # "Left Lean End"
    LEFT_PUSH_LEAN_START_CLASS      = "Left Push Lean Start"
    LEFT_PUSH_LEAN_END_CLASS        = "Left Push Lean End"
    LEFT_HOLD_LEAN_START_CLASS      = "Left Hold Lean Start"
    LEFT_HOLD_LEAN_END_CLASS        = "Left Hold Lean End"
    RIGHT_LEAN_CLASS                = RIGHT_LEAN_TAG                            # "Right Lean"
    RIGHT_LEAN_START_CLASS          = RIGHT_LEAN_TAG + ' ' + START_TAG          # "Right Lean Start"
    RIGHT_LEAN_END_CLASS            = RIGHT_LEAN_TAG + ' ' + END_TAG            # "Right Lean End"
    RIGHT_PUSH_LEAN_START_CLASS     = "Right Push Lean Start"
    RIGHT_PUSH_LEAN_END_CLASS       = "Right Push Lean End"
    RIGHT_HOLD_LEAN_START_CLASS     = "Right Hold Lean Start"
    RIGHT_HOLD_LEAN_END_CLASS       = "Right Hold Lean End"
    PUSHUP_CLASS                    = PUSHUP_TAG                                # "Pushup"
    PUSHUP_START_CLASS              = PUSHUP_TAG + ' ' + START_TAG              # "Pushup Start"
    PUSHUP_END_CLASS                = PUSHUP_TAG + ' ' + END_TAG                # "Pushup End"
    STATIONARY_CLASS                = STATIONARY_TAG                            # "Stationary"
    MOVING_CLASS                    = "Moving"
    OTHER_CLASS                     = OTHER_TAG                                 # "Other"

    CURRENTLY_USED_CLASS_NAMES = {
        FORWARD_LEAN_CLASS,
        FORWARD_LEAN_START_CLASS,
        FORWARD_LEAN_END_CLASS,
        FORWARD_KNEE_LEAN_START_CLASS,
        FORWARD_KNEE_LEAN_END_CLASS,
        FORWARD_TABLE_LEAN_START_CLASS,
        FORWARD_TABLE_LEAN_END_CLASS,
        LATERAL_LEAN_CLASS,
        LATERAL_LEAN_START_CLASS,
        LATERAL_LEAN_END_CLASS,
        LEFT_LEAN_CLASS,
        LEFT_LEAN_START_CLASS,
        LEFT_LEAN_END_CLASS,
        LEFT_PUSH_LEAN_START_CLASS,
        LEFT_PUSH_LEAN_END_CLASS,
        LEFT_HOLD_LEAN_START_CLASS,
        LEFT_HOLD_LEAN_END_CLASS,
        RIGHT_LEAN_CLASS,
        RIGHT_LEAN_START_CLASS,
        RIGHT_LEAN_END_CLASS,
        RIGHT_PUSH_LEAN_START_CLASS,
        RIGHT_PUSH_LEAN_END_CLASS,
        RIGHT_HOLD_LEAN_START_CLASS,
        RIGHT_HOLD_LEAN_END_CLASS,
        PUSHUP_CLASS,
        PUSHUP_START_CLASS,
        PUSHUP_END_CLASS,
        STATIONARY_CLASS,
        MOVING_CLASS,
        OTHER_CLASS
    }





    # 4. Pairings of Classes with Tags *****************************************************************************************************************************



    # class-tag sets
    class ClassTagSet:
        def __init__(self, classname = "", tagset = [], inherited_ctsets = []):
            self.classname = classname
            self.tagset = set(tagset)
            for d in inherited_ctsets:
                self.tagset.update(d) # d.tagset

        def __len__(self):
            return len(self.tagset)

        def __iter__(self):
            return iter(self.tagset)

        def __contains__(self, item):
            return item in self.tagset

        def __str__(self):
            return str(self.classname)
        
        def __eq__(self, other):
            if isinstance(other, SKDescriptors.ClassTagSet):
                return self.classname == other.classname and self.tagset == other.tagset
            return str(self.classname) == str(other)


    # ABS is abstract... DO NOT USE except to build others
    ABS_EXERCISE_CTS                = ClassTagSet("ABS " + EXERCISE_TAG,
                                                  [EXERCISE_TAG])
    ABS_OTHER_CTS                   = ClassTagSet("ABS " + OTHER_TAG,
                                                  [NONEXERCISE_TAG, OTHER_TAG])

    ABS_EXERCISE_FULL_CTS           = ClassTagSet("ABS " + EXERCISE_TAG + " " + FULL_TAG,
                                                  [FULL_TAG, AMBIGUOUS_STATIONARY_TAG],
                                                  [ABS_EXERCISE_CTS])


    ABS_FORWARD_LEAN_CTS            = ClassTagSet("ABS " + FORWARD_LEAN_CLASS,
                                                  [FORWARD_LEAN_TAG],
                                                  [ABS_EXERCISE_CTS])
    ABS_LATERAL_LEAN_CTS            = ClassTagSet("ABS " + LATERAL_LEAN_CLASS,
                                                  [LATERAL_LEAN_TAG],
                                                  [ABS_EXERCISE_CTS])
    ABS_LEFT_LEAN_CTS               = ClassTagSet("ABS " + LEFT_LEAN_CLASS,
                                                  [LEFT_LEAN_TAG],
                                                  [ABS_LATERAL_LEAN_CTS])
    ABS_RIGHT_LEAN_CTS              = ClassTagSet("ABS " + RIGHT_LEAN_CLASS,
                                                  [RIGHT_LEAN_TAG],
                                                  [ABS_LATERAL_LEAN_CTS])
    ABS_PUSHUP_CTS                  = ClassTagSet("ABS " + PUSHUP_CLASS,
                                                  [PUSHUP_TAG],
                                                  [ABS_EXERCISE_CTS])


    ABS_FORWARD_KNEE_LEAN_CTS       = ClassTagSet("ABS Forward Knee Lean",
                                                  [FORWARD_KNEE_LEAN_TAG],
                                                  [ABS_FORWARD_LEAN_CTS])
    ABS_FORWARD_TABLE_LEAN_CTS      = ClassTagSet("ABS Forward Table Lean",
                                                  [FORWARD_TABLE_LEAN_TAG],
                                                  [ABS_FORWARD_LEAN_CTS])
    ABS_LATERAL_PUSH_LEAN_CTS       = ClassTagSet("ABS Lateral Push Lean",
                                                  [LATERAL_PUSH_LEAN_TAG],
                                                  [ABS_LATERAL_LEAN_CTS])
    ABS_LATERAL_HOLD_LEAN_CTS       = ClassTagSet("ABS Lateral Hold Lean",
                                                  [LATERAL_HOLD_LEAN_TAG],
                                                  [ABS_LATERAL_LEAN_CTS])


    EXERCISE_BORDER_CTS             = ClassTagSet(EXERCISE_TAG + " " + BORDER_TAG,
                                                  [BORDER_TAG, NONSTATIONARY_TAG],
                                                  [ABS_EXERCISE_CTS])
    EXERCISE_START_CTS              = ClassTagSet(EXERCISE_TAG + " " + START_TAG,
                                                  [START_TAG],
                                                  [EXERCISE_BORDER_CTS])
    EXERCISE_END_CTS                = ClassTagSet(EXERCISE_TAG + " " + END_TAG,
                                                  [END_TAG],
                                                  [EXERCISE_BORDER_CTS])


    FORWARD_LEAN_FULL_CTS           = ClassTagSet(FORWARD_LEAN_CLASS, [],
                                                  [ABS_FORWARD_LEAN_CTS, ABS_EXERCISE_FULL_CTS])
    FORWARD_LEAN_START_CTS          = ClassTagSet(FORWARD_LEAN_START_CLASS, [],
                                                  [ABS_FORWARD_LEAN_CTS, EXERCISE_START_CTS])
    FORWARD_LEAN_END_CTS            = ClassTagSet(FORWARD_LEAN_END_CLASS, [],
                                                  [ABS_FORWARD_LEAN_CTS, EXERCISE_END_CTS])

    FORWARD_KNEE_LEAN_START_CTS     = ClassTagSet(FORWARD_KNEE_LEAN_START_CLASS, [],
                                                  [FORWARD_LEAN_START_CTS, ABS_FORWARD_KNEE_LEAN_CTS])
    FORWARD_KNEE_LEAN_END_CTS       = ClassTagSet(FORWARD_KNEE_LEAN_END_CLASS, [],
                                                  [FORWARD_LEAN_END_CTS, ABS_FORWARD_KNEE_LEAN_CTS])
    FORWARD_TABLE_LEAN_START_CTS    = ClassTagSet(FORWARD_TABLE_LEAN_START_CLASS, [],
                                                  [FORWARD_LEAN_START_CTS, ABS_FORWARD_TABLE_LEAN_CTS])
    FORWARD_TABLE_LEAN_END_CTS      = ClassTagSet(FORWARD_TABLE_LEAN_END_CLASS, [],
                                                  [FORWARD_LEAN_END_CTS, ABS_FORWARD_TABLE_LEAN_CTS])


    LATERAL_LEAN_FULL_CTS           = ClassTagSet(LATERAL_LEAN_CLASS, [],
                                                  [ABS_LATERAL_LEAN_CTS, ABS_EXERCISE_FULL_CTS])
    LATERAL_LEAN_START_CTS          = ClassTagSet(LATERAL_LEAN_START_CLASS, [],
                                                  [ABS_LATERAL_LEAN_CTS, EXERCISE_START_CTS])
    LATERAL_LEAN_END_CTS            = ClassTagSet(LATERAL_LEAN_END_CLASS, [],
                                                  [ABS_LATERAL_LEAN_CTS, EXERCISE_END_CTS])


    LEFT_LEAN_FULL_CTS              = ClassTagSet(LEFT_LEAN_CLASS, [],
                                                  [ABS_LEFT_LEAN_CTS, LATERAL_LEAN_FULL_CTS])
    LEFT_LEAN_START_CTS             = ClassTagSet(LEFT_LEAN_START_CLASS, [],
                                                  [ABS_LEFT_LEAN_CTS, LATERAL_LEAN_START_CTS])
    LEFT_LEAN_END_CTS               = ClassTagSet(LEFT_LEAN_END_CLASS, [],
                                                  [ABS_LEFT_LEAN_CTS, LATERAL_LEAN_END_CTS])

    LEFT_PUSH_LEAN_START_CTS        = ClassTagSet(LEFT_PUSH_LEAN_START_CLASS, [],
                                                  [ABS_LATERAL_PUSH_LEAN_CTS, LEFT_LEAN_START_CTS])
    LEFT_PUSH_LEAN_END_CTS          = ClassTagSet(LEFT_PUSH_LEAN_END_CLASS, [],
                                                  [ABS_LATERAL_PUSH_LEAN_CTS, LEFT_LEAN_END_CTS])
    LEFT_HOLD_LEAN_START_CTS        = ClassTagSet(LEFT_HOLD_LEAN_START_CLASS, [],
                                                  [ABS_LATERAL_HOLD_LEAN_CTS, LEFT_LEAN_START_CTS])
    LEFT_HOLD_LEAN_END_CTS          = ClassTagSet(LEFT_HOLD_LEAN_END_CLASS, [],
                                                  [ABS_LATERAL_HOLD_LEAN_CTS, LEFT_LEAN_END_CTS])


    RIGHT_LEAN_FULL_CTS             = ClassTagSet(RIGHT_LEAN_CLASS, [],
                                                  [ABS_RIGHT_LEAN_CTS, LATERAL_LEAN_FULL_CTS])
    RIGHT_LEAN_START_CTS            = ClassTagSet(RIGHT_LEAN_START_CLASS, [],
                                                  [ABS_RIGHT_LEAN_CTS, LATERAL_LEAN_START_CTS])
    RIGHT_LEAN_END_CTS              = ClassTagSet(RIGHT_LEAN_END_CLASS, [],
                                                  [ABS_RIGHT_LEAN_CTS, LATERAL_LEAN_END_CTS])

    RIGHT_PUSH_LEAN_START_CTS       = ClassTagSet(RIGHT_PUSH_LEAN_START_CLASS, [],
                                                  [ABS_LATERAL_PUSH_LEAN_CTS, RIGHT_LEAN_START_CTS])
    RIGHT_PUSH_LEAN_END_CTS         = ClassTagSet(RIGHT_PUSH_LEAN_END_CLASS, [],
                                                  [ABS_LATERAL_PUSH_LEAN_CTS, RIGHT_LEAN_END_CTS])
    RIGHT_HOLD_LEAN_START_CTS       = ClassTagSet(RIGHT_HOLD_LEAN_START_CLASS, [],
                                                  [ABS_LATERAL_HOLD_LEAN_CTS, RIGHT_LEAN_START_CTS])
    RIGHT_HOLD_LEAN_END_CTS         = ClassTagSet(RIGHT_HOLD_LEAN_END_CLASS, [],
                                                  [ABS_LATERAL_HOLD_LEAN_CTS, RIGHT_LEAN_END_CTS])


    PUSHUP_FULL_CTS                 = ClassTagSet(PUSHUP_CLASS, [],
                                                  [ABS_PUSHUP_CTS, ABS_EXERCISE_FULL_CTS])
    PUSHUP_START_CTS                = ClassTagSet(PUSHUP_START_CLASS, [],
                                                  [ABS_PUSHUP_CTS, EXERCISE_START_CTS])
    PUSHUP_END_CTS                  = ClassTagSet(PUSHUP_END_CLASS, [],
                                                  [ABS_PUSHUP_CTS, EXERCISE_END_CTS])


    STATIONARY_CTS                  = ClassTagSet(STATIONARY_CLASS,
                                                  [AMBIGUOUS_EXERCISE_TAG, STATIONARY_TAG])
    NONSTATIONARY_CTS = MOVING_CTS  = ClassTagSet(MOVING_CLASS,
                                                  [AMBIGUOUS_EXERCISE_TAG, NONSTATIONARY_TAG])
    OTHER_AMBSTAT_CTS               = ClassTagSet(OTHER_CLASS,
                                                  [AMBIGUOUS_STATIONARY_TAG],
                                                  [ABS_OTHER_CTS])
    OTHER_NONSTAT_CTS               = ClassTagSet(OTHER_CLASS,
                                                  [NONSTATIONARY_TAG],
                                                  [ABS_OTHER_CTS])





    # 5. Label Types' ClassTagSets *****************************************************************************************************************************



    CTS_PER_TYPE = {
        1: (
            FORWARD_LEAN_FULL_CTS,          # "Forward Lean"
            LEFT_LEAN_FULL_CTS,             # "Left Lean"
            RIGHT_LEAN_FULL_CTS,            # "Right Lean"
            PUSHUP_FULL_CTS,                # "Pushup"
            OTHER_AMBSTAT_CTS               # "Other"
        ),
        2: (
            FORWARD_LEAN_START_CTS,         # "Forward Lean Start"
            FORWARD_LEAN_END_CTS,           # "Forward Lean End"
            LEFT_LEAN_START_CTS,            # "Left Lean Start"
            LEFT_LEAN_END_CTS,              # "Left Lean End"
            RIGHT_LEAN_START_CTS,           # "Right Lean Start"
            RIGHT_LEAN_END_CTS,             # "Right Lean End"
            PUSHUP_START_CTS,               # "Pushup Start"
            PUSHUP_END_CTS,                 # "Pushup End"
            STATIONARY_CTS,                 # "Stationary"
            OTHER_NONSTAT_CTS               # "Other"
        ),
        3: (
            FORWARD_KNEE_LEAN_START_CTS,    # "Forward Knee Lean Start"
            FORWARD_KNEE_LEAN_END_CTS,      # "Forward Knee Lean End"
            FORWARD_TABLE_LEAN_START_CTS,   # "Forward Table Lean Start"
            FORWARD_TABLE_LEAN_END_CTS,     # "Forward Table Lean End"
            LEFT_PUSH_LEAN_START_CTS,       # "Left Push Lean Start"
            LEFT_PUSH_LEAN_END_CTS,         # "Left Push Lean End"
            LEFT_HOLD_LEAN_START_CTS,       # "Left Hold Lean Start"
            LEFT_HOLD_LEAN_END_CTS,         # "Left Hold Lean End"
            RIGHT_PUSH_LEAN_START_CTS,      # "Right Push Lean Start"
            RIGHT_PUSH_LEAN_END_CTS,        # "Right Push Lean End"
            RIGHT_HOLD_LEAN_START_CTS,      # "Right Hold Lean Start"
            RIGHT_HOLD_LEAN_END_CTS,        # "Right Hold Lean End"
            PUSHUP_START_CTS,               # "Pushup Start"
            PUSHUP_END_CTS,                 # "Pushup End"
            STATIONARY_CTS,                 # "Stationary"
            OTHER_NONSTAT_CTS               # "Other"
        ),
        4: (
            FORWARD_LEAN_FULL_CTS,          # "Forward Lean"
            LATERAL_LEAN_FULL_CTS,          # "Lateral Lean"
            PUSHUP_FULL_CTS,                # "Pushup"
            OTHER_AMBSTAT_CTS               # "Other"
        ),
        5: (
            FORWARD_LEAN_START_CTS,         # "Forward Lean Start"
            FORWARD_LEAN_END_CTS,           # "Forward Lean End"
            LATERAL_LEAN_START_CTS,         # "Lateral Lean Start"
            LATERAL_LEAN_END_CTS,           # "Lateral Lean End"
            PUSHUP_START_CTS,               # "Pushup Start"
            PUSHUP_END_CTS,                 # "Pushup End"
            STATIONARY_CTS,                 # "Stationary"
            OTHER_NONSTAT_CTS               # "Other"
        ),
        6: (
            EXERCISE_START_CTS,             # "Exercise Start"
            EXERCISE_END_CTS,               # "Exercise End"
            STATIONARY_CTS,                 # "Stationary"
            OTHER_NONSTAT_CTS               # "Other"
        ),
        7: (
            EXERCISE_BORDER_CTS,            # "Exercise Border"
            STATIONARY_CTS,                 # "Stationary"
            OTHER_NONSTAT_CTS               # "Other"
        ),
        8: (
            MOVING_CTS,                     # "Moving"
            STATIONARY_CTS                  # "Stationary"
        )
    }


    # each buffer type has a dictionary;
        # the keys of each dictionary have precedence over the values;
        # rules (key-value pairs) listed sooner have priority over later rules
    PRECEDENCE_OF_TAGS_PER_BUFFER_TYPE = {
        1: {
            # exercises should have precedence over "Other" and "Stationary"
            EXERCISE_TAG: (NONEXERCISE_TAG, AMBIGUOUS_EXERCISE_TAG),
            # we actually don't want the following line because then in Type 3 and Type 5,
                # Stationary would have precedence over Other (which would be incorrect)
            #AMBIGUOUS_EXERCISE_TAG: (NONEXERCISE_TAG,) # the comma here is to make it a tuple #
            # "Other" and "Moving" should have precedence over "Stationary"
            NONSTATIONARY_TAG: (STATIONARY_TAG, AMBIGUOUS_STATIONARY_TAG)
        }
    }





    # 6. Validating Functions *****************************************************************************************************************************



    def validate_all_descriptors():
        # validating type values' and type dictionaries' consistency
        # all label type dictionaries have the same length
        can_use_class_dictionaries = all(SKDescriptors.NUM_OF_LABEL_TYPES == n for n in (len(SKDescriptors.NUM_OF_INPUTS_PER_TYPE), len(SKDescriptors.NUM_OF_CLASSES_PER_TYPE), len(SKDescriptors.NUM_OF_OUTPUTS_PER_TYPE)))
        # can_use_classtag_dictionary = all(SKDescriptors.NUM_OF_CLASSES_PER_TYPE == len(c) for c in SKDescriptors.LIST_OF_CLASSES_AND_TAGS_PER_TYPE.values())
        can_use_classtag_dictionary = True
        for classnum, classtuple in zip(SKDescriptors.NUM_OF_CLASSES_PER_TYPE.values(), SKDescriptors.CTS_PER_TYPE.values()):
            can_use_classtag_dictionary &= (classnum == len(classtuple))
        can_use_buffer_dictionary = (SKDescriptors.NUM_OF_BUFFER_TYPES == len(SKDescriptors.PRECEDENCE_OF_TAGS_PER_BUFFER_TYPE))
        input_is_three = (len(n) == 3 for n in SKDescriptors.INPUT_NAMES.values()) and all(n == 3 for n in SKDescriptors.NUM_OF_INPUTS_PER_TYPE.values())
        # DO NOT comment the below assert line or use makeshift bypass code UNLESS you have a backup of a version that actually runs;
            # preferably, you would instead work out some additional logic that takes into account your case
        if not (can_use_class_dictionaries and can_use_classtag_dictionary and can_use_buffer_dictionary):
            raise AssertionError("Descriptor dictionary lengths do not match.")
        elif not (input_is_three):
            raise AssertionError("Woah buddy, it looks like you have a lot of reworking stuff to do before you can use anything but x, y, and z for inputs...\
                                 \n\tStart with adding 'InputType' to the File Specifiers list \
                                 \n\tThen you'll have to rework stuff that requires or assumes that the first element is always there or always 'Type'\
                                 \n\t\tFor instance, the file name reader toward the bottom of HelperFunctions.ipynb\
                                 \n\tConsider also renaming 'Type' to 'LabelType' (or something similar) to keep things from being more confusing\
                                 \n\t\tFor this, you may want to make something that the rest of the code can check for old values in\
                                 \n\t\t\t\tsince files may still have 'Type', so this would allow backwards compatibility")

    # you should pass False to this function only if the code using it properly handles an invalid classtype and/or classnum
    def validate_class_type(classtype, classnum = 0, assertion = True):
        validity = True
        # is classtype in the valid range?
        if classtype <= 0 or classtype > SKDescriptors.NUM_OF_LABEL_TYPES:
            validity = False
        # if classnum is provided, does it correspond with classtype?
        if classnum != 0 and classnum == SKDescriptors.NUM_OF_CLASSES_PER_TYPE.get(classtype, -1):
            validity = False

        if assertion and not validity:
            raise AssertionError(f"classtype {classtype} is invalid or does not correspond with classnum {classnum}.")
        return validity

    # you should pass False to this function only if the code using it properly handles an invalid buffertype and/or buffernum
    def validate_buffer_type(buffertype, buffernum = 0, assertion = True):
        validity = True
        # is buffertype in the valid range?
        if buffertype <= 0 or buffertype > SKDescriptors.NUM_OF_BUFFER_TYPES:
            validity = False
        # if buffernum is provided, is it nonnegative?
        if buffernum < 0:
            validity = False

        if assertion and not validity:
            raise AssertionError(f"buffertype {buffertype} or buffernum {buffernum} is invalid.")
        return validity

# NEVER comment the `validate_all_descriptors()` below. NO exceptions.
    # if you really want to use some makeshift bypass code, go do so elsewhere
SKDescriptors.validate_all_descriptors()

In [2]:
class SKFileNameHandler:
    # this returns:
        # the input file's directory (where it is in the computer),
        # the beginning descriptors (the file descriptors that come before the file specifiers),
        # the file name (commented out code corrected it if it was missing any file specifiers,
            # but this is functionality that could be -- and has been -- replaced by build_file_name)
            # (the file name includes everything but the directory),
        # the ending descriptors (the file descriptors that come after the file specifiers),
        # the file's extension (the file type), and
        # the values for all file specifiers (as a dict)
            # (this does not yet include the "Labeled" or "Unlabeled" file specifiers)
    def read_data_file_name(file_path):
        specifier_values = {} #np.zeros((len(Converter.FILE_SPECIFIERS), 1))
        beginning_descriptors = []
        ending_descriptors = []
        # at the start of each iteration (except the first),
            # separator_index points to the dash just before the file specifier;
            # by the end of each iteration, it points to the next dash
        # rfind() finds the right-most instance;
            # using separator_index, we separate the directory and the file name
        separator_index = file_path.rfind('/')
        file_directory = file_path[ : (separator_index + 1)]
        print(file_directory)
        # we initialize output_file_name like this in case it has some
            # descriptors at the start of the file, before any file specifiers
        temp_index = file_path.find(SKDescriptors.FILE_SPECIFIERS[0])
        file_name = file_path[(separator_index + 1) : ] # used to be [(separator_index + 1) : temp_index]
        #print(file_name)


        # while we haven't reached the beginning of the file specifier list
        while (separator_index + 1) < temp_index:
            # separator_index_2 points to just before each descriptor,
                # and separator_index points to just after
            separator_index_2 = separator_index
            separator_index = file_path.find('_', separator_index)
            beginning_descriptors.append(file_path[(separator_index_2 + 1) : separator_index])


        for fs in SKDescriptors.FILE_SPECIFIERS:
            # ignore this comment
            # if(i != 0): (since we already do this for the first iteration beforehand)
            if fs != SKDescriptors.FILE_SPECIFIERS[0]:
                temp_index = file_path.find(fs, separator_index)

            # if temp_index is a substring of input_file_name
            if temp_index >= 0:
                # temp_index is the index of the start of the number value for the specifier
                    # (we treat this as unrelated to dash_index in case the value has more than one digit)
                temp_index += len(fs)
                # we are guaranteed to have a dash after each file specifier;
                    # this includes the last one since "Labeled" should be a final
                    # file specifier with no value for every labeled data set.
                    # "Unlabeled" may later be used, but code needs to change
                separator_index = file_path.find('-', temp_index)
                # this makes sure we get the full number
                specifier_values[fs] = (int)(file_path[temp_index : separator_index])
            else: #values[i] used to automatically stay 0
                # WithClassNum may not be included in older files,
                    # but this is not supposed to raise exceptions,
                    # so we correct it since we have the necessary information
                if fs == SKDescriptors.WITH_CLASS_NUMBER_FS:
                    specifier_values[fs] = SKDescriptors.NUM_OF_CLASSES_PER_TYPE[specifier_values[SKDescriptors.CLASSIFICATION_TYPE_FS]]
                else:
                    specifier_values[fs] = 0

            #file_name += f'{fs}{specifier_values[fs]}-'

        # THIS IS ONLY HERE BECAUSE WE ARE ASSUMING THE FILE IS LABELED;
            # IF YOU ARE NOW PROCESSING UNLABELED FILES, ADJUST THIS
        #file_name += 'Labeled'

        SKDescriptors.validate_class_type(specifier_values[SKDescriptors.CLASSIFICATION_TYPE_FS], specifier_values[SKDescriptors.WITH_CLASS_NUMBER_FS])
        # if buffertype != 0
        if specifier_values[SKDescriptors.BUFFER_TYPE_FS]:
            SKDescriptors.validate_buffer_type(specifier_values[SKDescriptors.BUFFER_TYPE_FS], specifier_values[SKDescriptors.BUFFER_NUMBER_FS])

        # this line makes sure we keep track of the end of the file specifier section
        temp_index = separator_index + len(SKDescriptors.LABELED_FS)
        # if there are no ending separators, separator_index_2 will be -1
            # (I don't know what separator_index will be)
        # if there is one ending separator, only separator_index will be -1.
            # the next step will not change anything
        # when we reach the last one normally, the separator_index will be -1.
            # the next step will not change anything
        # these two lines already prepare the first segment
        separator_index_2 = file_path.find('_', separator_index)
        separator_index = file_path.find('_', separator_index_2)
        # if it found an underscore indicating ending descriptors
        if(separator_index_2 > 0):
            # we purposefully decide to flip the order of operation versus incrementation
                # here as compared to the order of the beginning_descriptors reader
            # while we haven't reached the extension of file_path
            while file_path[separator_index_2] != '.':
                # here, if we have found the last ending_descriptor,
                    # the find function will not have found another
                    # underscore, and will return -1 for separator_index,
                    # but we do not want to include the extension
                    # (.csv or the like)
                if separator_index < 0:
                    separator_index = file_path.find('.', separator_index_2)

                ending_descriptors.append(file_path[(separator_index_2 + 1) : separator_index])
                # separator_index_2 points to just before each descriptor,
                    # and separator_index points to just after
                separator_index_2 = separator_index
                separator_index = file_path.find('_', separator_index)
        else:
            temp_index = file_path.find('.', temp_index)


        file_extension = file_path[separator_index_2 : ]


        #file_name += file_path[(temp_index + 1) : ]
        print(f'File name is "{file_name}".')
        print(f'File name read as "{SKFileNameHandler.build_file_name(specifier_values, beginning_descriptors, ending_descriptors, file_extension)}".')
        return file_directory, beginning_descriptors, file_name, ending_descriptors, file_extension, specifier_values



    # beginning_descriptors and ending_descriptors do not include the separating underscores;
        # ending_descriptors do not include the value-less file specifier "Labeled";
        # both should be numpy ARRAYS (though they can be empty)
    def build_data_file_name(file_specifier_values, beginning_descriptors = [], ending_descriptors = [], file_extension = '.csv'):
        # this ensures the file name is not empty so we can add to it
        file_name = ''

        for bd in beginning_descriptors:
            file_name += bd + '_'

        # this set of lines will need to be changed if we ever want to use this function
            # on Unlabeled files to convert their data
        for fs in file_specifier_values:
            file_name += f'{fs}{file_specifier_values[fs]}-'
        file_name += SKDescriptors.LABELED_FS

        for ed in ending_descriptors:
            file_name += '_' + ed

        file_name += file_extension

        return file_name



    def read_file_extension(file_name):
        ext_index = file_name.rfind('.')
        if ext_index == -1:
            extension = ''
        else:
            extension = file_name[ext_index : ]
        return extension