This file is currently for converting Type 1 Input, Type 3 Output (Type (1,3) or Type 3 Labeling) data spreadsheets into Type 1 Input, Type 5 Output (Type (1,5) or Type 5 Labeling) data spreadsheets. In the code, "input_label_type" refers to the initial spreadsheet's labeling scheme, and "output_label_type" refers to the produced spreadsheet's labeling scheme.

You will have to MANUALLY remove the column/row labels in the spreadsheet

In [4]:
import pandas as pd
import numpy as np
# I would use jdc for splitting the class into readable sections,
    # but the author of that library hasn't provided a license
    # in their GitHub repository.
    # I have opened an issue on the GitHub requesting they add a license. 
    # If they do not see and respond to it, I may use
        # "Class Converter(Converter)" to accomplish splitting
        # the code into different cells for readability
#import jdc

In [5]:
class Converter():
    # tuple to reduce dynamic changes possible
    FILE_SPECIFIERS = (
        "Type",
        "WithClassNum",
        "Freq",
        "BufferType",
        "BufferNum",
        "UserType",
        "UserID"
    )
    print(len(FILE_SPECIFIERS))

    # These three are ndarrays of tags and descriptors you can pull from if needed.
        # If they are commented, they are not currently in use.
        # CLASS_TAGS will be used to test for certain types of classes, so later we don't have to 
            # manually type in the index of each applicable class, reducing potential for mistakes.
        # BEGINNING_DESCRIPTORS and ENDING_DESCRIPTORS will be used in file names.
    # The "<School Year>" ending descriptor is supposed to later be dynamically replaced by something like "23-24", and
        # "<Semester>" is supposed to later be dynamically replaced by either "Fall", "Spring", or "Summer".
        # If it is across multiple school years, use the start and end years.
            # For example, if it was from 23-24 to 26-27, use "23-27".
        # If it is across multiple semesters, just don't include the semester ending descriptor
    
    # these are explicitly defined to lower the chance for typed mistakes
        # while also allowing descriptive use
    exercise_tag = "Exercise"
    nonexercise_tag = "Non-Exercise"
    ambiguous_exercise_tag = "Ambiguous Exercise" # this would be used for things that may or may not be part of an exercise (certain stationary classes)
    full_tag = "Full"
    border_tag = "Border" # Start or End -- different to "Boundary" if used
    start_tag = "Start"
    end_tag = "End"
    forward_lean_tag = "Forward Lean"
    forward_knee_lean_tag = "Forward Knee Lean"
    forward_table_lean_tag = "Forward Table Lean"
    lateral_lean_tag = "Lateral Lean"
    lateral_push_lean_tag = "Lateral Push Lean"
    lateral_hold_lean_tag = "Lateral Hold Lean"
    left_lean_tag = "Left Lean"
    right_lean_tag = "Right Lean"
    pushup_tag = "Pushup"
    #boundary_tag = "Boundary" # This might later be used for the edges of data files to note where they are glued together
    other_tag = "Other"
    stationary_tag = "Stationary"
    nonstationary_tag = "Non-Stationary"
    ambiguous_stationary_tag = "Ambiguous Stationary" # ("Other" class that also contains Stationary portions would use this)

    CLASS_TAGS = {
        exercise_tag,
        nonexercise_tag,
        ambiguous_exercise_tag,
        full_tag,
        border_tag,
        start_tag,
        end_tag,
        forward_lean_tag,
        forward_knee_lean_tag,
        forward_table_lean_tag,
        lateral_lean_tag,
        lateral_push_lean_tag,
        lateral_hold_lean_tag,
        left_lean_tag,
        right_lean_tag,
        pushup_tag,
        other_tag,
        stationary_tag,
        nonstationary_tag,
        ambiguous_stationary_tag
    }
    # BEGINNING_DESCRIPTORS = np.array(["COMBINED"])
    # ENDING_DESCRIPTORS = np.array(["Motion-Sessions", "<School Year>", "<Semester>"])


    # file specification list:
    # NOTE: These should appear in order for the code to work
        # (though the code will run well even if some are missing)
    # NOTE: 0 should (almost) always be treated as the unknown value.
        # if you add something that uses the value 0, consider what
        # might need changed in the code to make it work

        # (1) Type (ClassType, LabelType, OutputType):
            # This specifies how the data is stored.
            # USAGE: This appears ONLY in Labeled specifier lists
            # INFO: The current types' definitions can be found in the files
                # DataTranslations.docx
                # DataCollectionAndLabelingTechniquesDocumentation.docx

        # (2) WithClassNum: this specifies how many output classes a type has
            # so one immediately knows without having to look it up.
            # USAGE: This appears ONLY in Labeled specifier lists
            # NOTE: WithClassNum should not change separately to Type.
                # This is just here to better convey classification info

        # (3) Freq: this specifies the frequency the data collector
            # was set to when collecting that set of data.
            # USAGE: This appears BOTH in Labeled and Unlabeled specifier lists
            # NOTE: there are currently no functions to convert between frequencies
                # as we have only used one frequency so far,
                # and as we will have to test whether the values change with
                # frequency in some way due to how the measurements are taken
            # NOTE: if you ever change the Freq of the file, be sure to adjust
                # BufferNum accordingly

        # (4) BufferType: this specifies how and where buffers are added to labeled data.
            # USAGE: This appears ONLY in Labeled specifier lists
            # NOTE: there are currently no functions to convert between buffer types
                # as there is only one buffer type.

        # (5) BufferNum: this specifies how many measurements before and after
            # certain activities should be labeled the same as that activity
            # USAGE: This appears ONLY in Labeled specifier lists

        # (6) UserType: this states who collected the data --
            # us researchers (marked as 1) or manual wheelchair users (marked as 2).
            # Data from researchers who are also manual wheelchair users should be
            # marked as 3. Data combined from files marked with ((1 and/or 3) AND 2)
            # should be marked as 4 (combining less-biased data with more-biased data)
            # USAGE: This appears BOTH in Labeled and Unlabeled specifier lists
            # NOTE: Unmarked files are automatically labeled with a 0 when passing
                # through the converter and must be manually remarked. If you are
                # unsure which mark is correct, leave it as 0 and treat it as a 1
                # when using the data
    
        # (7) UserID: This allows us to loosely tell which data files were made by the
            # same person. The UserID may be allowed to change occasionally if keeping
            # it the same might be unfeasible. UserID of 0 means unknown.
            # USAGE: This appears BOTH in Labeled and Unlabeled specifier lists.
            # INFO: Researchers' UserIDs will be stored in [not-yet-created-document name]
            # NOTE: UserID should NOT be stored alongside and with correspondence to
                # the user's personal information unless Dr. Fu (or other person heading
                # the project if that changes) says otherwise as that may have legal
                # implications. In other words, data we store should NOT be directly
                # traceable back to the user who created that data (unless it's data
                # we made ourselves)
    
        # (#) Labeled OR Unlabeled: This tells us whether the file has been labeled yet.
            # An unlabeled file will only have the Freq, UserType, and UserID specifiers
            # This Converter is not equipped to detect whether a file is Labeled or 
                # Unlabeled and instead treats all as Labeled


    # Tuples and Dictionaries describing Type and WithClassNum (and, later, InputType)
    NUM_OF_LABEL_TYPES = 5
    VALID_TYPE_CONVERSIONS = (
        (3, 5),
    )
    OVERRIDE_TYPE_VALIDATION = (
        # This is here only for conversions that 
            # fail validate_class_type_conversion() but not for simple reasons
            # (simple reasons like accidentally choosing the wrong input/output types
            # or not listing the correct values in the below dictionaries)
        # If you add an entry here you may have to change logic of other parts of the code
            # for instance if the number of columns of the output spreadsheet will be more
            # than the input spreadsheet, you may have to change the dataframe.drop line at the end
    )

    
    LIST_OF_CLASSES_AND_TAGS_PER_TYPE = {
        1: {
            # Type 1, Class 1
            "Forward Lean": (
                exercise_tag,
                full_tag,
                forward_lean_tag,
                ambiguous_stationary_tag
            ),
            # Type 1, Class 2
            "Left Lean": (
                exercise_tag,
                full_tag,
                lateral_lean_tag,
                left_lean_tag,
                ambiguous_stationary_tag
            ),
            # Type 1, Class 3
            "Right Lean": (
                exercise_tag,
                full_tag,
                lateral_lean_tag,
                right_lean_tag,
                ambiguous_stationary_tag
            ),
            # Type 1, Class 4
            "Pushup": (
                exercise_tag,
                full_tag,
                pushup_tag,
                ambiguous_stationary_tag
            ),
            # Type 1, Class 5
            "Other": (
                nonexercise_tag,
                other_tag,
                ambiguous_stationary_tag
            )
        },
        2: {
            # Type 2, Class 1
            "Forward Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                forward_lean_tag,
                nonstationary_tag
            ),
            # Type 2, Class 2
            "Forward Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                forward_lean_tag,
                nonstationary_tag,
            ),
            # Type 2, Class 3
            "Left Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                lateral_lean_tag,
                left_lean_tag,
                nonstationary_tag
            ),
            # Type 2, Class 4
            "Left Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                lateral_lean_tag,
                left_lean_tag,
                nonstationary_tag
            ),
            # Type 2, Class 5
            "Right Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                lateral_lean_tag,
                right_lean_tag,
                nonstationary_tag
            ),
            # Type 2, Class 6
            "Right Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                lateral_lean_tag,
                right_lean_tag,
                nonstationary_tag
            ),
            # Type 2, Class 7
            "Pushup Start": (
                exercise_tag,
                border_tag,
                start_tag,
                pushup_tag,
                nonstationary_tag
            ),
            # Type 2, Class 8
            "Pushup End": (
                exercise_tag,
                border_tag,
                end_tag,
                pushup_tag,
                nonstationary_tag
            ),
            # Type 2, Class 9
            "Stationary": (
                ambiguous_exercise_tag,
                stationary_tag
            ),
            # Type 2, Class 10
            "Other": (
                nonexercise_tag,
                other_tag,
                nonstationary_tag
            )
        },
        3: {
            # Type 3, Class 1
            "Forward Knee Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                forward_lean_tag,
                forward_knee_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 2
            "Forward Knee Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                forward_lean_tag,
                forward_knee_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 3
            "Forward Table Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                forward_lean_tag,
                forward_table_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 4
            "Forward Table Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                forward_lean_tag,
                forward_table_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 5
            "Left Push Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                lateral_lean_tag,
                left_lean_tag,
                lateral_push_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 6
            "Left Push Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                lateral_lean_tag,
                left_lean_tag,
                lateral_push_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 7
            "Left Hold Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                lateral_lean_tag,
                left_lean_tag,
                lateral_hold_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 8
            "Left Hold Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                lateral_lean_tag,
                left_lean_tag,
                lateral_hold_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 9
            "Right Push Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                lateral_lean_tag,
                right_lean_tag,
                lateral_push_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 10
            "Right Push Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                lateral_lean_tag,
                right_lean_tag,
                lateral_push_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 11
            "Right Hold Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                lateral_lean_tag,
                right_lean_tag,
                lateral_hold_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 12
            "Right Hold Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                lateral_lean_tag,
                right_lean_tag,
                lateral_hold_lean_tag,
                nonstationary_tag
            ),
            # Type 3, Class 13
            "Pushup Start": (
                exercise_tag,
                border_tag,
                start_tag,
                pushup_tag,
                nonstationary_tag
            ),
            # Type 3, Class 14
            "Pushup End": (
                exercise_tag,
                border_tag,
                end_tag,
                pushup_tag,
                nonstationary_tag
            ),
            # Type 3, Class 15
            "Stationary": (
                ambiguous_exercise_tag,
                stationary_tag
            ),
            # Type 3, Class 16
            "Other": (
                nonexercise_tag,
                other_tag,
                nonstationary_tag
            )
        },
        4: {
            # Type 4, Class 1
            "Forward Lean": (
                exercise_tag,
                full_tag,
                forward_lean_tag,
                ambiguous_stationary_tag
            ),
            # Type 4, Class 2
            "Lateral Lean": (
                exercise_tag,
                full_tag,
                lateral_lean_tag,
                ambiguous_stationary_tag
            ),
            # Type 4, Class 3
            "Pushup": (
                exercise_tag,
                full_tag,
                pushup_tag,
                ambiguous_stationary_tag
            ),
            # Type 4, Class 4
            "Other": (
                nonexercise_tag,
                other_tag,
                ambiguous_stationary_tag
            )
        },
        5: {
            # Type 5, Class 1
            "Forward Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                forward_lean_tag,
                nonstationary_tag
            ),
            # Type 5, Class 2
            "Forward Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                forward_lean_tag,
                nonstationary_tag
            ),
            # Type 5, Class 3
            "Lateral Lean Start": (
                exercise_tag,
                border_tag,
                start_tag,
                lateral_lean_tag,
                nonstationary_tag
            ),
            # Type 5, Class 4
            "Lateral Lean End": (
                exercise_tag,
                border_tag,
                end_tag,
                lateral_lean_tag,
                nonstationary_tag
            ),
            # Type 5, Class 5
            "Pushup Start": (
                exercise_tag,
                border_tag,
                start_tag,
                pushup_tag,
                nonstationary_tag
            ),
            # Type 5, Class 6
            "Pushup End": (
                exercise_tag,
                border_tag,
                end_tag,
                pushup_tag,
                nonstationary_tag
            ),
            # Type 5, Class 7
            "Stationary": (
                ambiguous_exercise_tag,
                stationary_tag
            ),
            # Type 5, Class 8
            "Other": (
                nonexercise_tag,
                other_tag,
                nonstationary_tag
            )
        }
    }

    NUM_OF_INPUTS_PER_TYPE = {
        # if any of these are ever not 3, switch code will need to be different
            # between two different numbers of inputs
        1: 3,
        2: 3,
        3: 3,
        4: 3,
        5: 3
    }
    NUM_OF_CLASSES_PER_TYPE = {
        1: 5,
        2: 10,
        3: 16,
        4: 4,
        5: 8
    }
    # if 1, the ouput is a one-hot vector
    NUM_OF_OUTPUTS_PER_TYPE = {
        1: 1,
        2: 1,
        3: 1,
        4: 1,
        5: 1
    }


    # Tuples and dictionaries describing BufferType and BufferNum
    NUM_OF_BUFFER_TYPES = 1
    # Currently these do nothing. If we later change how we want the buffers to function
        # (not the length of the buffers but which buffers overlap into other classes),
        # we will be able to do so using this
    VALID_BUFFER_TYPE_CONVERSIONS = ()
    OVERRIDE_BUFFER_TYPE_VALIDATION = (
        # This is here only for BufferType conversions that 
            # fail validate_buffer_type_conversion() but not for simple reasons
            # (simple reasons like accidentally choosing the wrong input/output types
            # or not listing the correct values in the above dictionaries)
        # If you add an entry here you may have to change logic of other parts of the code
    )

    # each buffer type has a dictionary;
        # the keys of each dictionary have precedence over the values;
        # rules (key-value pairs) listed sooner have priority over later rules
    PRECEDENCE_OF_TAGS_PER_BUFFER_TYPE = {
        1: {
            # exercises should have precedence over "Other" and "Stationary"
            exercise_tag: (nonexercise_tag, ambiguous_exercise_tag),
            # we actually don't want the following line because then in Type 3 and Type 5,
                # Stationary would have precedence over Other (which would be incorrect)
            #ambiguous_exercise_tag: (nonexercise_tag,) # the comma here is to make it a tuple #
            # "Other" should have precedence over "Stationary"
            nonstationary_tag: (stationary_tag, ambiguous_stationary_tag)
        }
    }



    def __init__(self, labeled_data_file):
        # if not all(n == Converter.NUM_OF_LABEL_TYPES for n in (len(Converter.NUM_OF_INPUTS_PER_TYPE), len(Converter.NUM_OF_CLASSES_PER_TYPE), len(Converter.NUM_OF_OUTPUTS_PER_TYPE))):
        #     print("Converter is not usable if defining dictionaries do not match corresponding dictionaries in size.")
        #     print("Fix and rerun the code to use the converter")
        #     return
        self.input_directory, self.input_beginning_descriptors, self.input_file_name, self.input_ending_descriptors, self.input_file_extension, self.input_specifiers = Converter.read_file_name(labeled_data_file)
        self.output_label_type = -1
        self.output_freq = -1
        self.output_buffer_type = -1
        self.output_buffer_num = -1

    

    def validate_label_type_conversion(self, input_label_type, output_label_type):
        # validating type values' and type dictionaries' consistency
        # all label type dictionaries have the same length
        can_use_dictionaries = all(n == Converter.NUM_OF_LABEL_TYPES for n in (len(Converter.NUM_OF_INPUTS_PER_TYPE), len(Converter.NUM_OF_CLASSES_PER_TYPE), len(Converter.NUM_OF_OUTPUTS_PER_TYPE)))
        # these commented lines were made obsolete by the line following
        # both label types are positive
        #has_positive_types = input_label_type > 0 and output_label_type > 0
        # both label types are greater than or equal to the number of different label types
        #has_bounded_types = input_label_type <= Converter.NUM_OF_LABEL_TYPES and output_label_type <= Converter.NUM_OF_LABEL_TYPES
        # both label types are valid types
        #has_valid_types = has_positive_types and has_bounded_types
        has_valid_types = input_label_type in np.arange(1, Converter.NUM_OF_LABEL_TYPES + 1) and output_label_type in np.arange(1, Converter.NUM_OF_LABEL_TYPES + 1)
        # input_label_type matches self.input_label_type
        matches_input_file = self.input_specifiers.get("Type", -1) == input_label_type
        # input_label_type corresponds to WithClassNum value stored in self
        consistent_with_class_num = self.input_specifiers.get("WithClassNum", -1) == Converter.NUM_OF_CLASSES_PER_TYPE[input_label_type]
        # has valid values (compared to type dictionaries and the conversion file)
        has_consistent_values = can_use_dictionaries and has_valid_types and matches_input_file and consistent_with_class_num

        # validating conversion logic
        # is a type conversion for which someone implemented the logic
        is_listed = (input_label_type, output_label_type) in Converter.VALID_TYPE_CONVERSIONS #.get((input_label_type, output_label_type), False)
        # is a logical input type conversion
        is_not_to_more_inputs = Converter.NUM_OF_INPUTS_PER_TYPE.get(input_label_type, 0) >= Converter.NUM_OF_INPUTS_PER_TYPE.get(output_label_type, 1)
        # is a logical output type conversion when only one class chosen at a time
        is_to_fewer_classes = Converter.NUM_OF_CLASSES_PER_TYPE.get(input_label_type, 0) > Converter.NUM_OF_CLASSES_PER_TYPE.get(output_label_type, 0)
        # may be logical output type conversion if we are converting from
            # an output with a "multi-hot" vector to an input with a one-hot vector
        input_is_not_one_hot_type = Converter.NUM_OF_OUTPUTS_PER_TYPE.get(input_label_type, 1) != 1
        # all conversion logic between types is sound
        has_valid_logic = is_listed and is_not_to_more_inputs and (is_to_fewer_classes or input_is_not_one_hot_type)

        # allowing override
        # the others are to keep someone from accidentally making a "bad conversion,"
            # but this one is to allow more-complex conversions that are possible,
            # given that someone manually listed the conversion in OVERRIDE_TYPE_VALIDATION
        # this does not override the conversion if the file is incorrect or if the types are invalid
        is_overridden = (input_label_type, output_label_type) in Converter.OVERRIDE_TYPE_VALIDATION #.get((input_label_type, output_label_type), False)

        return has_consistent_values and (has_valid_logic or is_overridden)
    

    def validate_buffer_num_conversion(self, input_buffer_num, output_buffer_num):
        # validate consistency
        # both buffer nums are non-negative
        has_nonnegative_buffer_nums = input_buffer_num >= 0 and output_buffer_num >= 0
        # input_buffer_num matches self.input_buffer_num
        matches_input_file = self.input_specifiers.get("BufferNum", -1) == input_buffer_num
        # BufferType is valid
        has_valid_buffer_type = self.input_specifiers.get("BufferType", 0) in np.arange(1, Converter.NUM_OF_BUFFER_TYPES + 1)
        # combining
        has_consistent_values = has_nonnegative_buffer_nums and matches_input_file and has_valid_buffer_type
        # returning
        return has_consistent_values
    

    def set_label_type_conversion(self, input_label_type, output_label_type):
        #NOTE that types have not yet been implemented as tuple labels
        if(not self.validate_label_type_conversion(input_label_type, output_label_type)):
            print(f"Current object/class definitions prohibit the conversion from Type {input_label_type} to Type {output_label_type}.")
            return
        #self.input_label_type = input_label_type
        self.output_label_type = output_label_type
    

    def set_buffer_num_conversion(self, input_buffer_num, output_buffer_num):
        if(not self.validate_buffer_num_conversion(input_buffer_num, output_buffer_num)):
            print(f"Current object/class definitions prohibit the conversion from BufferNum {input_buffer_num} to BufferNum {output_buffer_num}.")
            return
        #self.input_buffer_num = input_buffer_num
        self.output_buffer_num = output_buffer_num
        


    # this returns:
        # the input file's directory (where it is in the computer),
        # the beginning descriptors (the file descriptors that come before the file specifiers),
        # the file name (commented out code corrected it if it was missing any file specifiers,
            # but this is functionality that could be -- and has been -- replaced by build_file_name)
            # (the file name includes everything but the directory),
        # the ending descriptors (the file descriptors that come after the file specifiers),
        # the file's extension (the file type), and
        # the values for all file specifiers (as a dict)
            # (this does not yet include the "Labeled" or "Unlabeled" file specifiers)
    def read_file_name(file_path):
        specifier_values = {} #np.zeros((len(Converter.FILE_SPECIFIERS), 1))
        beginning_descriptors = []
        ending_descriptors = []
        # at the start of each iteration (except the first),
            # dash_index points to the dash just before the file specifier;
            # by the end of each iteration, it points to the next dash
        # rfind() finds the right-most instance;
            # using dash_index, we separate the directory and the file name
        separator_index = file_path.rfind('/')
        file_directory = file_path[ : (separator_index + 1)]
        print(file_directory)
        # we initialize output_file_name like this in case it has some
            # descriptors at the start of the file, before any file specifiers
        temp_index = file_path.find(Converter.FILE_SPECIFIERS[0])
        file_name = file_path[(separator_index + 1) : ] # used to be [(separator_index + 1) : temp_index]
        #print(file_name)


        # while we haven't reached the beginning of the file specifier list
        while (separator_index + 1) < temp_index:
            # separator_index_2 points to just before each descriptor,
                # and separator_index points to just after
            separator_index_2 = separator_index
            separator_index = file_path.find('_', separator_index)
            beginning_descriptors.append(file_path[(separator_index_2 + 1) : separator_index])


        for fs in Converter.FILE_SPECIFIERS:
            # ignore this comment
            # if(i != 0): (since we already do this for the first iteration beforehand)
            if fs != Converter.FILE_SPECIFIERS[0]:
                temp_index = file_path.find(fs, separator_index)

            # if temp_index is a substring of input_file_name
            if temp_index >= 0:
                # temp_index is the index of the start of the number value for the specifier
                    # (we treat this as unrelated to dash_index in case the value has more than one digit)
                temp_index += len(fs)
                # we are guaranteed to have a dash after each file specifier;
                    # this includes the last one since "Labeled" should be a final
                    # file specifier with no value for every labeled data set.
                    # "Unlabeled" may later be used, but code needs to change
                separator_index = file_path.find('-', temp_index)
                # this makes sure we get the full number
                specifier_values[fs] = (int)(file_path[temp_index : separator_index])
            else: #values[i] used to automatically stay 0
                specifier_values[fs] = 0

            #file_name += f'{fs}{specifier_values[fs]}-'

        # THIS IS ONLY HERE BECAUSE WE ARE ASSUMING THE FILE IS LABELED;
            # IF YOU ARE NOW PROCESSING UNLABELED FILES, ADJUST THIS
        #file_name += 'Labeled'


        # this line makes sure we keep track of the end of the file specifier section
        temp_index = separator_index + len('Labeled')
        # if there are no ending separators, separator_index_2 will be -1
            # (I don't know what separator_index will be)
        # if there is one ending separator, only separator_index will be -1.
            # the next step will not change anything
        # when we reach the last one normally, the separator_index will be -1.
            # the next step will not change anything
        # these two lines already prepare the first segment
        separator_index_2 = file_path.find('_', separator_index)
        separator_index = file_path.find('_', separator_index_2)
        # if it found an underscore indicating ending descriptors
        if(separator_index_2 > 0):
            # we purposefully decide to flip the order of operation versus incrementation
                # here as compared to the order of the beginning_descriptors reader
            # while we haven't reached the extension of file_path
            while file_path[separator_index_2] != '.':
                # here, if we have found the last ending_descriptor,
                    # the find function will not have found another
                    # underscore, and will return -1 for separator_index,
                    # but we do not want to include the extension
                    # (.csv or the like)
                if separator_index < 0:
                    separator_index = file_path.find('.', separator_index_2)

                ending_descriptors.append(file_path[(separator_index_2 + 1) : separator_index])
                # separator_index_2 points to just before each descriptor,
                    # and separator_index points to just after
                separator_index_2 = separator_index
                separator_index = file_path.find('_', separator_index)
        else:
            temp_index = file_path.find('.', temp_index)
        

        file_extension = file_path[separator_index_2 : ]


        #file_name += file_path[(temp_index + 1) : ]
        print(f'File name is "{file_name}".')
        print(f'File name read as "{Converter.build_file_name(specifier_values, beginning_descriptors, ending_descriptors, file_extension)}".')
        return file_directory, beginning_descriptors, file_name, ending_descriptors, file_extension, specifier_values
    


    # beginning_descriptors and ending_descriptors do not include the separating underscores;
        # ending_descriptors do not include the value-less file specifier "Labeled";
        # both should be numpy ARRAYS (though they can be empty)
    def build_file_name(file_specifier_values, beginning_descriptors = [], ending_descriptors = [], file_extension = '.csv'):
        # this ensures the file name is not empty so we can add to it
        file_name = ''

        for bd in beginning_descriptors:
            file_name += bd + '_'

        # this set of lines will need to be changed if we ever want to use this function
            # on Unlabeled files to convert their data
        for fs in file_specifier_values:
            file_name += f'{fs}{file_specifier_values[fs]}-'
        file_name += 'Labeled'

        for ed in ending_descriptors:
            file_name += '_' + ed

        file_name += file_extension
            
        return file_name



    # You will VERY LIKELY have to manually check the last column and fix any mismatches in the rows with 1's 
        # DO NOT delete the last column if you don't know what it is there for
        # (it is to mark which rows the converter skipped since otherwise there might be inconsistencies.
        # that way, you can manually change it how you had in mind)
    def convert_buffer_num(self, output_folder_path='_', return_df=False, override_output_file_path=False):
        if(not self.validate_buffer_num_conversion(self.input_specifiers['BufferNum'], self.output_buffer_num)):
            print(f"Current object/class definitions prohibit the conversion from BufferNum {self.input_specifiers['BufferNum']} to BufferNum {self.output_buffer_num}.")
            return
        if (not return_df) and output_folder_path == '_':
            output_folder_path = self.input_directory

        num_of_inputs = Converter.NUM_OF_INPUTS_PER_TYPE[self.input_specifiers['Type']]

        input_dataframe = pd.read_csv(self.input_directory + self.input_file_name, header=None)
        input_df_length = input_dataframe.shape[0]
        manual_override_required_at = pd.DataFrame(np.zeros((input_df_length,1)), columns = pd.Index([input_dataframe.shape[1]], dtype='int64'))
        output_dataframe = input_dataframe.join(manual_override_required_at)

        next_class = None
        i = 0
        while(i < input_df_length):
            prev_class = next_class
            # This line takes a row and focuses on the labels
                # I do want it to crash if the dictionary cannot pull the value at the given index
            next_class = input_dataframe[i, np.where(input_dataframe[i, num_of_inputs : input_dataframe.shape[1]]) + num_of_inputs]


        if(return_df):
            return output_dataframe
        # else create spreadsheet
        # if you want to override the output_file_path, please do so through the function arguments
        if(override_output_file_path):
            output_file_path = output_folder_path
        else:
            output_file_path = output_folder_path + Converter.build_file_name(output_file_specifiers, self.input_beginning_descriptors, self.input_ending_descriptors, self.input_file_extension) # You are too far to the right; go back to the left
            # outdated version:
            # f"{output_folder_path}COMBINED_Type{self.output_label_type}-WithClassNum{Converter.NUM_OF_CLASSES_PER_TYPE[self.output_label_type]}-Freq10-Buffer{output_buffer_number}-Labeled_Motion-sessions_23-24_Fall.csv"

        dataframe.to_csv(output_file_path, mode='x')



    # output_folder_path should end with a '/'
    # if you want to override the output_file_path, please do so through output_folder_path (with override = True)
    def convert_label_type(self, output_folder_path='_', return_df=False, override_output_file_path=False):
        if(not self.validate_label_type_conversion(self.input_specifiers['Type'], self.output_label_type)):
            print(f"Current object/class definitions prohibit the conversion from Type {self.input_specifiers['Type']} to Type {self.output_label_type}.")
            return
        if (not return_df) and output_folder_path == '_':
            output_folder_path = self.input_directory

        input_dataframe = pd.read_csv(self.input_directory + self.input_file_name, header=None)
        #input_input_columns = Converter.NUM_OF_INPUTS_PER_TYPE[self.input_label_type]
        # input_total_columns = Converter.NUM_OF_INPUTS_PER_TYPE[self.input_label_type] + Converter.NUM_OF_CLASSES_PER_TYPE[self.input_label_type]
        #output_input_columns = Converter.NUM_OF_INPUTS_PER_TYPE[self.output_label_type]
        # output_total_columns = Converter.NUM_OF_INPUTS_PER_TYPE[self.output_label_type] + Converter.NUM_OF_CLASSES_PER_TYPE[self.output_label_type]
        print(input_dataframe.columns)
        #row,
        print(input_dataframe.iat[1,1])
        print(input_dataframe.at[1,2])


        # main conversion logic
        match (self.input_specifiers['Type'], self.output_label_type):

            # We will mainly want to use stuff like df.iloc[[0, 2], [1, 3]] to access rows/columns

            case (3, 5):
                if(not self.validate_label_type_conversion(3, 5)):
                    print("Current class definitions prohibit the conversion from Type 3 to Type 5.")
                    return
                
                input_df_length = input_dataframe.shape[0]
                output_dataframe = input_dataframe.iloc[0:input_df_length,0:3]
                #df.set_index('key').join(other.set_index('key'))
                
                # We're mapping (with +3 columns for input):
                    # FKS (1) and FTS (3) to FS (1)
                    # FKE (2) and FTE (4) to FE (2)
                    # LPS (5), LHS (7), RPS (9), and RHS (11) to LS (3)
                    # LPE (6), LHE (8), RPE (10), and RHE (12) to LE (4)
                    # PS (13) to PS (5)
                    # PE (14) to PE (6)
                    # S (15) to S (7)
                    # O (16) to O (8)

                #add 2 to all indexes since 3 input columns
                temp_df = pd.DataFrame(input_dataframe.iloc[0:input_df_length,[3,5]].sum(axis=1))
                temp_df.columns = pd.Index([3], dtype='int64')
                # print(temp_df.columns)
                output_dataframe = output_dataframe.join(temp_df) # (1)

                temp_df = pd.DataFrame(input_dataframe.iloc[0:input_df_length,[4,6]].sum(axis=1))
                temp_df.columns = pd.Index([4], dtype='int64')
                output_dataframe = output_dataframe.join(temp_df) # (2)

                temp_df = pd.DataFrame(input_dataframe.iloc[0:input_df_length,[7,9,11,13]].sum(axis=1))
                temp_df.columns = pd.Index([5], dtype='int64')
                output_dataframe = output_dataframe.join(temp_df) # (3)

                temp_df = pd.DataFrame(input_dataframe.iloc[0:input_df_length,[8,10,12,14]].sum(axis=1))
                temp_df.columns = pd.Index([6], dtype='int64')
                output_dataframe = output_dataframe.join(temp_df) # (4)

                temp_df = pd.DataFrame(input_dataframe.iloc[0:input_df_length, 15:19])
                temp_df.columns = pd.Index([7,8,9,10], dtype='int64')
                output_dataframe = output_dataframe.join(temp_df) # (5:8)

                
            case _ :
                print(f"Logic not implemented for conversion from Type {self.input_specifiers['Type']} to Type {self.output_label_type}.")
                return
        
        if return_df:
            return output_dataframe
        # else create spreadsheet
        # if you want to override the output_file_path, please do so through the function arguments
        if(override_output_file_path):
            output_file_path = output_folder_path
        else:
            output_file_specifiers = self.input_specifiers
            output_file_specifiers['Type'] = self.output_label_type
            output_file_specifiers['WithClassNum'] = Converter.NUM_OF_CLASSES_PER_TYPE[self.output_label_type]
            print("If these are the same, you've modified the input file specifiers (which is not necessarily terrible):\n", self.input_specifiers['Type'], output_file_specifiers['Type'])
            output_file_path = output_folder_path + Converter.build_file_name(output_file_specifiers, self.input_beginning_descriptors, self.input_ending_descriptors, self.input_file_extension)
        # next two comments are old things
        # f"{output_folder_path}COMBINED_Type{self.output_label_type}-WithClassNum{Converter.NUM_OF_CLASSES_PER_TYPE[self.output_label_type]}-Freq10-Labeled_Motion-sessions_23-24_Fall.csv"
        # output_dataframe.drop(np.arange(output_total_columns, input_total_columns), axis=1)
        # file names should, starting now, include the number of classes the type has
        # "‘x’, exclusive creation, failing if the file already exists." (quote from https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html)
        output_dataframe.to_csv(output_file_path, mode='x')

In [6]:
WORKAREA_PATH = './'
INPUT_FILE_PATH = WORKAREA_PATH + 'Data/COMBINED_Type3-Freq10-Labeled_Motion-sessions_23-24_Fall.csv' #'Data/Week 1/Left then Right/Processed/Type3-Freq10-Labeled_Motion-sessions_2023-08-26_17-25-54.csv'
OUTPUT_FOLDER_PATH = WORKAREA_PATH + 'Data/'
converter = Converter()
converter.set_class_conversion_type(3, 5)
converter.convert(INPUT_FILE_PATH, OUTPUT_FOLDER_PATH)

Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], dtype='int64')
0.334197998
-0.948440552
