In [None]:
print(__name__)
if __name__ == "__main__" and hasattr(__builtins__,'__IPYTHON__') and ('google.colab' in str(get_ipython())):
    from google.colab import drive
    drive.mount('/content/drive')
    %cd /content/drive/MyDrive/PressureReliefWorkArea/SummerWork/
    !ls

In [None]:
import pandas as pd
# from sklearn.model_selection import train_test_split
# from torch.utils.data import Dataset, DataLoader
# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
import numpy as np

%run -n HelperFunctions.ipynb
# import ipynb
# from ipynb.fs.full.HelperFunctions import *

In [None]:
class SKConverter:

    VALID_TYPE_CONVERSIONS = (
        (3, 5),
    )
    OVERRIDE_TYPE_VALIDATION = (
        # This is here only for conversions that
            # fail validate_class_type_conversion() but not for simple reasons
            # (simple reasons like accidentally choosing the wrong input/output types
            # or not listing the correct values in the below dictionaries)
        # If you add an entry here you may have to change logic of other parts of the code
            # for instance if the number of columns of the output spreadsheet will be more
            # than the input spreadsheet, you may have to change the dataframe.drop line at the end
    )



    # Currently these do nothing. If we later change how we want the buffers to function
        # (not the length of the buffers but which buffers overlap into other classes),
        # we will be able to do so using this
    VALID_BUFFER_TYPE_CONVERSIONS = ()
    OVERRIDE_BUFFER_TYPE_VALIDATION = (
        # This is here only for BufferType conversions that
            # fail validate_buffer_type_conversion() but not for simple reasons
            # (simple reasons like accidentally choosing the wrong input/output types
            # or not listing the correct values in the above dictionaries)
        # If you add an entry here you may have to change logic of other parts of the code
    )



    def __init__(self, labeled_data_file):
        # if not all(n == Converter.NUM_OF_LABEL_TYPES for n in (len(Converter.NUM_OF_INPUTS_PER_TYPE), len(Converter.NUM_OF_CLASSES_PER_TYPE), len(Converter.NUM_OF_OUTPUTS_PER_TYPE))):
        #     print("Converter is not usable if defining dictionaries do not match corresponding dictionaries in size.")
        #     print("Fix and rerun the code to use the converter")
        #     return
        self.input_directory, self.input_beginning_descriptors, self.input_file_name, self.input_ending_descriptors, self.input_file_extension, self.input_specifiers = SKFileNameHandler.read_data_file_name(labeled_data_file)
        self.output_label_type = -1
        self.output_freq = -1
        self.output_buffer_type = -1
        self.output_buffer_num = -1



    def validate_label_type_conversion(self, input_label_type, output_label_type):
        # validating type values' consistency
        has_valid_types = SKDescriptors.validate_class_type(input_label_type) and SKDescriptors.validate_class_type(output_label_type)
        # input_label_type matches self.input_label_type
        matches_input_file = self.input_specifiers.get(SKDescriptors.CLASSIFICATION_TYPE_FS, -1) == input_label_type
        # input_label_type corresponds to WithClassNum value stored in self
        consistent_with_class_num = self.input_specifiers.get(SKDescriptors.WITH_CLASS_NUMBER_FS, -1) == SKDescriptors.NUM_OF_CLASSES_PER_TYPE[input_label_type]
        # has valid values (compared to type dictionaries and the conversion file)
        has_consistent_values = has_valid_types and matches_input_file and consistent_with_class_num

        # validating conversion logic
        # is a type conversion for which someone implemented the logic
        is_listed = (input_label_type, output_label_type) in SKConverter.VALID_TYPE_CONVERSIONS #.get((input_label_type, output_label_type), False)
        # is a logical input type conversion
        is_not_to_more_inputs = SKDescriptors.NUM_OF_INPUTS_PER_TYPE.get(input_label_type, 0) >= SKDescriptors.NUM_OF_INPUTS_PER_TYPE.get(output_label_type, 1)
        # is a logical output type conversion when only one class chosen at a time
        is_to_fewer_classes = SKDescriptors.NUM_OF_CLASSES_PER_TYPE.get(input_label_type, 0) > SKDescriptors.NUM_OF_CLASSES_PER_TYPE.get(output_label_type, 0)
        # may be logical output type conversion if we are converting from
            # an output with a "multi-hot" vector to an input with a one-hot vector
        input_is_not_one_hot_type = SKDescriptors.NUM_OF_OUTPUTS_PER_TYPE.get(input_label_type, 1) != 1
        # all conversion logic between types is sound
        has_valid_logic = is_listed and is_not_to_more_inputs and (is_to_fewer_classes or input_is_not_one_hot_type)

        # allowing override
        # the others are to keep someone from accidentally making a "bad conversion,"
            # but this one is to allow more-complex conversions that are possible,
            # given that someone manually listed the conversion in OVERRIDE_TYPE_VALIDATION
        # this does not override the conversion if the file is incorrect or if the types are invalid
        is_overridden = (input_label_type, output_label_type) in SKConverter.OVERRIDE_TYPE_VALIDATION #.get((input_label_type, output_label_type), False)

        return has_consistent_values and (has_valid_logic or is_overridden)


    def validate_buffer_num_conversion(self, input_buffer_num, output_buffer_num):
        # validate consistency
        # both buffer nums are non-negative
        has_nonnegative_buffer_nums = input_buffer_num >= 0 and output_buffer_num >= 0
        # input_buffer_num matches self.input_buffer_num
        matches_input_file = self.input_specifiers.get(SKDescriptors.BUFFER_NUMBER_FS, -1) == input_buffer_num
        # BufferType is valid
        has_valid_buffer_type = self.input_specifiers.get(SKDescriptors.BUFFER_TYPE_FS, 0) in np.arange(1, SKDescriptors.NUM_OF_BUFFER_TYPES + 1)
        # combining
        has_consistent_values = has_nonnegative_buffer_nums and matches_input_file and has_valid_buffer_type
        # returning
        return has_consistent_values


    def set_label_type_conversion(self, input_label_type, output_label_type):
        #NOTE that types have not yet been implemented as tuple labels
        if(not self.validate_label_type_conversion(input_label_type, output_label_type)):
            print(f"Current object/class definitions prohibit the conversion from Type {input_label_type} to Type {output_label_type}.")
            return
        #self.input_label_type = input_label_type
        self.output_label_type = output_label_type


    def set_buffer_num_conversion(self, input_buffer_num, output_buffer_num):
        if(not self.validate_buffer_num_conversion(input_buffer_num, output_buffer_num)):
            print(f"Current object/class definitions prohibit the conversion from BufferNum {input_buffer_num} to BufferNum {output_buffer_num}.")
            return
        #self.input_buffer_num = input_buffer_num
        self.output_buffer_num = output_buffer_num




    # You will VERY LIKELY have to manually check the last column and fix any mismatches in the rows with 1's
        # DO NOT delete the last column if you don't know what it is there for
        # (it is to mark which rows the converter skipped since otherwise there might be inconsistencies.
        # that way, you can manually change it how you had in mind)
    def convert_buffer_num(self, output_folder_path='_', return_df=False, override_output_file_path=False):
        if(not self.validate_buffer_num_conversion(self.input_specifiers['BufferNum'], self.output_buffer_num)):
            print(f"Current object/class definitions prohibit the conversion from BufferNum {self.input_specifiers['BufferNum']} to BufferNum {self.output_buffer_num}.")
            return
        if (not return_df) and output_folder_path == '_':
            output_folder_path = self.input_directory

        num_of_inputs = Converter.NUM_OF_INPUTS_PER_TYPE[self.input_specifiers['Type']]

        input_dataframe = pd.read_csv(self.input_directory + self.input_file_name, header=None)
        input_df_length = input_dataframe.shape[0]
        manual_override_required_at = pd.DataFrame(np.zeros((input_df_length,1)), columns = pd.Index([input_dataframe.shape[1]], dtype='int64'))
        output_dataframe = input_dataframe.join(manual_override_required_at)

        next_class = None
        i = 0
        while(i < input_df_length):
            prev_class = next_class
            # This line takes a row and focuses on the labels
                # I do want it to crash if the dictionary cannot pull the value at the given index
            next_class = input_dataframe[i, np.where(input_dataframe[i, num_of_inputs : input_dataframe.shape[1]]) + num_of_inputs]


        if(return_df):
            return output_dataframe
        # else create spreadsheet
        # if you want to override the output_file_path, please do so through the function arguments
        if(override_output_file_path):
            output_file_path = output_folder_path
        else:
            output_file_path = output_folder_path + Converter.build_file_name(output_file_specifiers, self.input_beginning_descriptors, self.input_ending_descriptors, self.input_file_extension) # You are too far to the right; go back to the left
            # outdated version:
            # f"{output_folder_path}COMBINED_Type{self.output_label_type}-WithClassNum{Converter.NUM_OF_CLASSES_PER_TYPE[self.output_label_type]}-Freq10-Buffer{output_buffer_number}-Labeled_Motion-sessions_23-24_Fall.csv"

        dataframe.to_csv(output_file_path, mode='x')



    # output_folder_path should end with a '/'
    # if you want to override the output_file_path, please do so through output_folder_path (with override = True)
    def convert_label_type(self, output_folder_path='_', return_df=False, override_output_file_path=False):
        if(not self.validate_label_type_conversion(self.input_specifiers['Type'], self.output_label_type)):
            print(f"Current object/class definitions prohibit the conversion from Type {self.input_specifiers['Type']} to Type {self.output_label_type}.")
            return
        if (not return_df) and output_folder_path == '_':
            output_folder_path = self.input_directory

        input_dataframe = pd.read_csv(self.input_directory + self.input_file_name, header=None)
        #input_input_columns = Converter.NUM_OF_INPUTS_PER_TYPE[self.input_label_type]
        # input_total_columns = Converter.NUM_OF_INPUTS_PER_TYPE[self.input_label_type] + Converter.NUM_OF_CLASSES_PER_TYPE[self.input_label_type]
        #output_input_columns = Converter.NUM_OF_INPUTS_PER_TYPE[self.output_label_type]
        # output_total_columns = Converter.NUM_OF_INPUTS_PER_TYPE[self.output_label_type] + Converter.NUM_OF_CLASSES_PER_TYPE[self.output_label_type]
        print(input_dataframe.columns)
        #row,
        print(input_dataframe.iat[1,1])
        print(input_dataframe.at[1,2])


        # main conversion logic
        match (self.input_specifiers['Type'], self.output_label_type):

            # We will mainly want to use stuff like df.iloc[[0, 2], [1, 3]] to access rows/columns

            case (3, 5):
                if(not self.validate_label_type_conversion(3, 5)):
                    print("Current class definitions prohibit the conversion from Type 3 to Type 5.")
                    return

                input_df_length = input_dataframe.shape[0]
                output_dataframe = input_dataframe.iloc[0:input_df_length,0:3]
                #df.set_index('key').join(other.set_index('key'))

                # We're mapping (with +3 columns for input):
                    # FKS (1) and FTS (3) to FS (1)
                    # FKE (2) and FTE (4) to FE (2)
                    # LPS (5), LHS (7), RPS (9), and RHS (11) to LS (3)
                    # LPE (6), LHE (8), RPE (10), and RHE (12) to LE (4)
                    # PS (13) to PS (5)
                    # PE (14) to PE (6)
                    # S (15) to S (7)
                    # O (16) to O (8)

                #add 2 to all indexes since 3 input columns
                temp_df = pd.DataFrame(input_dataframe.iloc[0:input_df_length,[3,5]].sum(axis=1))
                temp_df.columns = pd.Index([3], dtype='int64')
                # print(temp_df.columns)
                output_dataframe = output_dataframe.join(temp_df) # (1)

                temp_df = pd.DataFrame(input_dataframe.iloc[0:input_df_length,[4,6]].sum(axis=1))
                temp_df.columns = pd.Index([4], dtype='int64')
                output_dataframe = output_dataframe.join(temp_df) # (2)

                temp_df = pd.DataFrame(input_dataframe.iloc[0:input_df_length,[7,9,11,13]].sum(axis=1))
                temp_df.columns = pd.Index([5], dtype='int64')
                output_dataframe = output_dataframe.join(temp_df) # (3)

                temp_df = pd.DataFrame(input_dataframe.iloc[0:input_df_length,[8,10,12,14]].sum(axis=1))
                temp_df.columns = pd.Index([6], dtype='int64')
                output_dataframe = output_dataframe.join(temp_df) # (4)

                temp_df = pd.DataFrame(input_dataframe.iloc[0:input_df_length, 15:19])
                temp_df.columns = pd.Index([7,8,9,10], dtype='int64')
                output_dataframe = output_dataframe.join(temp_df) # (5:8)


            case _ :
                print(f"Logic not implemented for conversion from Type {self.input_specifiers['Type']} to Type {self.output_label_type}.")
                return

        if return_df:
            return output_dataframe
        # else create spreadsheet
        # if you want to override the output_file_path, please do so through the function arguments
        if(override_output_file_path):
            output_file_path = output_folder_path
        else:
            output_file_specifiers = self.input_specifiers
            output_file_specifiers['Type'] = self.output_label_type
            output_file_specifiers['WithClassNum'] = Converter.NUM_OF_CLASSES_PER_TYPE[self.output_label_type]
            print("If these are the same, you've modified the input file specifiers (which is not necessarily terrible):\n", self.input_specifiers['Type'], output_file_specifiers['Type'])
            output_file_path = output_folder_path + Converter.build_file_name(output_file_specifiers, self.input_beginning_descriptors, self.input_ending_descriptors, self.input_file_extension)
        # next two comments are old things
        # f"{output_folder_path}COMBINED_Type{self.output_label_type}-WithClassNum{Converter.NUM_OF_CLASSES_PER_TYPE[self.output_label_type]}-Freq10-Labeled_Motion-sessions_23-24_Fall.csv"
        # output_dataframe.drop(np.arange(output_total_columns, input_total_columns), axis=1)
        # file names should, starting now, include the number of classes the type has
        # "‘x’, exclusive creation, failing if the file already exists." (quote from https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html)
        output_dataframe.to_csv(output_file_path, mode='x')