# Kabirian-based Optinalysis: Feature Transformation Estimators

In [2]:
'''
                                # USER GUIDE
                            #******************#
# Introduction: 
    # Based on optinalysis, feature transformation of a given dataset is the isoreflectivity of every item 
        of that dataset to a defined magnitude (e.g estimate of location, scale, or other efficient parameter) of itself. 

# Input guide: feature_transformation([data, method, print_result, guide]) 
    # Input options: 
        # for data: list of numerical values from a set of real numbers. 
        # for method: "std", "min_max", "maxbyabsmndiff", "mnbyabsmndiff", "scalocSMM", "scaleSMM", or "max". 
        # for print: "kc", "psim", "pdsim", "kcalt1", "kcalt2", or "kcalt". 
        # for guide: "view", or "never". 

# Examples:
    # print("Transformed_data =", feature_transformation([data, "method:std", "print:psim", "guide:never"])) 
    # print("Transformed_data =", feature_transformation([data, "method:min_max", "print:psim", "guide:never"])) 
    # print("Transformed_data =", feature_transformation([data, "method:maxbyabsmndiff", "print:psim", "guide:never"]))
    # print("Transformed_data =", feature_transformation([data, "method:mnbyabsmndiff", "print:psim", "guide:never"]))
    # print("Transformed_data =", feature_transformation([data, "method:scalocSMM", "print:psim", "guide:never"]))  
    # print("Transformed_data =", feature_transformation([data, "method:scaleSMM", "print:psim", "guide:never"]))  
    # print("Transformed_data =", feature_transformation([data, "method:max", "print:psim", "guide:never"]))  

#******************************************************************************************************#
'''
import numpy
import numpy as np

def feature_transformation(instruction_list):
    # defining the variables of the instruction list 
    data = instruction_list[0]
    method = instruction_list[1]
    print_result = instruction_list[2]
    guide = instruction_list[3]
    
                         # ************** HERE ARE THE FOUNDAMENTAL CODES THAT BUILD THE DEFINITION REFERS ************** #
    # 'kc_isomorphic_optinalysis' is a tool that computes isomorphic optinalysis and return the result as Kabirian coefficient (i.e, kc).
    def kc_isomorphic_optinalysis(instruction_list):
        data_x = instruction_list[0]
        data_y = instruction_list[1]
        pairing = instruction_list[2]
    
        optiscale = [p/100 for p in range(1,(2*len(data_x) + 2))]
        mid_optiscale = (optiscale[0]*len(data_x)) + optiscale[0]
    
        if pairing == "pairing:H_H":
            isoreflective_list = data_x + [0] + (data_y[::-1])  
        elif pairing == "pairing:T_T":
            isoreflective_list = (data_x[::-1]) + [0] + data_y
        else:
            print('please, use "pairing:H_H", or "pairing:T_T" to command Head-to-head, or Tail-to-tail pairing respectivelly')
    
        sum_of_scalements = np.dot(isoreflective_list, optiscale)
    
        kc_optinalysis = (mid_optiscale*sum(isoreflective_list)) / sum_of_scalements
        return(kc_optinalysis)
    
    # 'kc_mirroring' is an optinalytic tool that compares a dataset with its established mirror. 
    def kc_mirroring(instruction_list):
        ### defining the variables of the instruction list 
        data_x = instruction_list[0]
        mirror_principal_value = instruction_list[1]
        centering = instruction_list[2]
        ordering = instruction_list[3]
        pairing = instruction_list[4]
    
        # centering of data of the variable
        if centering == "centering:allow":
            data_x_center = abs(np.array(data_x) - np.mean(data_x))
            data_x_centered = [i for i in data_x_center]
        elif centering == "centering:never":
            data_x_centered = data_x
        else:
            print('please, use "centering:allow" or "centering:never" to command centering')
    
        # ordering of data of the variable
        if ordering == "ordering:ascend":
            data_x_ordered = sorted(data_x_centered)
        elif ordering == "ordering:descend":
            data_x_ordered = sorted(data_x_centered)[::-1]
        elif ordering == "ordering:never":
            data_x_ordered = data_x_centered
        else:
            print('please, use "ordering:ascend", "ordering:descend" or "ordering:never" to command ordering')
    
        # outcome after centering and/or ordering of data of the variable
        data_1 = data_x_ordered
    
        # establishing a suitable mirror for mirroring with the outcome of the centered and/or ordered data of the variables
        if mirror_principal_value == "principal_value:mean":
            data_2 = [np.mean(data_1)]*len(data_x)
        elif mirror_principal_value == "principal_value:median":
            data_2 = [np.median(data_1)]*len(data_x)
        elif mirror_principal_value == "principal_value:mode":
            data_2 = [statistics.mode(data_1)]*len(data_x)
        elif mirror_principal_value == "principal_value:max":
            data_2 = [np.max(data_1)]*len(data_x)
        elif mirror_principal_value == "principal_value:min":
            data_2 = [np.min(data_1)]*len(data_x)
        elif mirror_principal_value == "principal_value:range":
            data_2 = [np.max(data_1)-np.min(data_1)]*len(data_x) 
        elif -1000000 <=mirror_principal_value<= 1000000:
            data_2 = [mirror_principal_value]*len(data_x) 
        else:
            print('please, type any of "principal_value:mean", "principal_value:median", "principal_value:mode", "principal_value:min", "principal_value:max", "principal_value:range", or type the refrence numerical value (e.g, 0.95, 12, etc) of your choice as the principal value for mirror establishment')
    
        # optinalyzing (pairwise_similarity) between the outcome of the centered and/or ordered of the variables and the established mirror
        kc_mirroring = kc_isomorphic_optinalysis([data_1, data_2, pairing])
        return(kc_mirroring)
        
    # 'scaler_psim' is a tool and an optinalytic translation model that translates Kabirian coefficient (i.e, kc) to percentage similalrty (i.e, psim).    
    def scaler_psim(kcs):
        res = [ ]
        for kc in kcs:
            if 0 <=kc<= 1:
                psim = (2-3*kc) / (kc-2)
            else:
                psim = (2-kc) / ((3*kc-2))
            res.append(psim)
        return (res)
    
    # 'scaler_pdsim' is a tool and an optinalytic translation model that translates percentage similalrty (i.e, psim) to percentage dissimilalrty (i.e, pdsim).    
    def scaler_pdsim(psims):
        res = [ ]
        for psim in psims:
            if 0 <=psim<= 1:
                pdsim = 1 - psim
            else:
                pdsim = -1 - psim
            res.append(pdsim)
        return (res)
    
    # 'scaler_kc_alt1' is a tool and an optinalytic translation model that translates backward the percentage similalrty (i.e, psim) to one of it's possible bi-Kabirian coefficients (i.e, kcalt1).
    def scaler_kc_alt1(psims):
        res = [ ]
        for psim in psims:
            kcalt1 = ((1+1) * (psim+1)) / (psim + ((2*1)+1))
            res.append(kcalt1)
        return (res)

    # 'scaler_kc_alt2' is a tool and an optinalytic translation model that translates backward the percentage similalrty (i.e, psim) to one of it's possible bi-Kabirian coefficients (i.e, kcalt2).
    def scaler_kc_alt2(psims):
        res = [ ]
        for psim in psims:
            kcalt2 = ((1+1) * (psim+1)) / (((2*1)+1) * psim+1)
            res.append(kcalt2)
        return (res)
    
    # 'absmndiff' is a tool that centers (i.e, subtracts mean or average from every data point of the dataset) the dataset and returns its absolute values. 
    def absmndiff(data):
        absmndiff = abs(numpy.array(data) - numpy.mean(data))
        return (absmndiff)

    # 'mndiff' is a tool that centers (i.e, subtracts mean or average from every data point of the dataset) the dataset and returns its actual values.
    def mndiff(data):
        absmndiff = numpy.array(data) - numpy.mean(data)
        return (absmndiff)
    
    
                            # ************* HERE STARTS DEFINING THE MAIN CODES IN DEFINITION ************* #
    def kc_scaling(data, mirror):
        scaling = (((((np.array(data) + (mirror)))))*2) / (((((np.array(data) + 3*(mirror))))))
        return(scaling)
    
    data_centered = mndiff(data) 
    mirror_f = kc_mirroring([data, "principal_value:mean", "centering:allow", "ordering:ascend", "pairing:H_H"])
    mirror_g = kc_mirroring([data, "principal_value:mean", "centering:never", "ordering:ascend", "pairing:H_H"])
    
    if method == "method:std":
        data_a = mndiff(data)
        mirror_a = np.std(data)
        kc_a = kc_scaling(data_a, mirror_a)
        psim_a = scaler_psim(kc_a)
        pdsim_a = scaler_pdsim(psim_a)
        kcalt1_a = scaler_kc_alt1(psim_a)
        kcalt2_a = scaler_kc_alt2(psim_a)
    
        if print_result == "print:kc":
            outcome = kc_a
        elif print_result == "print:psim":
            outcome = psim_a
        elif print_result == "print:pdsim":
            outcome = pdsim_a
        elif print_result == "print:kcalt1":
            outcome = kcalt1_a
        elif print_result == "print:kcalt2":
            outcome = kcalt2_a
        
    elif method == "method:min_max":
        data_b = mndiff(data) 
        mirror_b = np.max(data)-np.min(data)
        kc_b = kc_scaling(data_b, mirror_b)
        psim_b = scaler_psim(kc_b)
        pdsim_b = scaler_pdsim(psim_b)
        kcalt1_b = scaler_kc_alt1(psim_b)
        kcalt2_b = scaler_kc_alt2(psim_b)
       
        if print_result == "print:kc":
            outcome = kc_b
        elif print_result == "print:psim":
            outcome = psim_b
        elif print_result == "print:pdsim":
            outcome = pdsim_b
        elif print_result == "print:kcalt1":
            outcome = kcalt1_b
        elif print_result == "print:kcalt2":
            outcome = kcalt2_b
        
    elif method == "method:maxbyabsmndiff":
        data_c = mndiff(data) 
        mirror_c = np.max(absmndiff(data))
        kc_c = kc_scaling(data_c, mirror_c)
        psim_c = scaler_psim(kc_c)
        pdsim_c = scaler_pdsim(psim_c)
        kcalt1_c = scaler_kc_alt1(psim_c)
        kcalt2_c = scaler_kc_alt2(psim_c)
        
        if print_result == "print:kc":
            outcome = kc_c
        elif print_result == "print:psim":
            outcome = psim_c
        elif print_result == "print:pdsim":
            outcome = pdsim_c
        elif print_result == "print:kcalt1":
            outcome = kcalt1_c
        elif print_result == "print:kcalt2":
            outcome = kcalt2_c
        
    elif method == "method:mnbyabsmndiff":
        data_d = mndiff(data) 
        mirror_d = np.mean(absmndiff(data))
        kc_d = kc_scaling(data_d, mirror_d)
        psim_d = scaler_psim(kc_d)
        pdsim_d = scaler_pdsim(psim_d)
        kcalt1_d = scaler_kc_alt1(psim_d)
        kcalt2_d = scaler_kc_alt2(psim_d)
        
        if print_result == "print:kc":
            outcome = kc_d
        elif print_result == "print:psim":
            outcome = psim_d
        elif print_result == "print:pdsim":
            outcome = pdsim_d
        elif print_result == "print:kcalt1":
            outcome = kcalt1_d
        elif print_result == "print:kcalt2":
            outcome = kcalt2_d
        
    elif method == "method:max": 
        data_e = data 
        mirror_e = np.max(data)
        kc_e = kc_scaling(data_e, mirror_e)
        psim_e = scaler_psim(kc_e)
        pdsim_e = scaler_pdsim(psim_e)
        kcalt1_e = scaler_kc_alt1(psim_e)
        kcalt2_e = scaler_kc_alt2(psim_e)
        
        if print_result == "print:kc":
            outcome = kc_e
        elif print_result == "print:psim":
            outcome = psim_e
        elif print_result == "print:pdsim":
            outcome = pdsim_e
        elif print_result == "print:kcalt1":
            outcome = kcalt1_e
        elif print_result == "print:kcalt2":
            outcome = kcalt2_e
    
    elif method == "method:scalocSMM": 
        kc_f = kc_scaling(data_centered, mirror_f)
        psim_f = scaler_psim(kc_f)
        pdsim_f = scaler_pdsim(psim_f)
        kcalt1_f = scaler_kc_alt1(psim_f)
        kcalt2_f = scaler_kc_alt2(psim_f)
        
        if print_result == "print:kc":
            outcome = kc_f
        elif print_result == "print:psim":
            outcome = psim_f
        elif print_result == "print:pdsim":
            outcome = pdsim_f
        elif print_result == "print:kcalt1":
            outcome = kcalt1_f
        elif print_result == "print:kcalt2":
            outcome = kcalt2_f
    
    elif method == "method:scaleSMM":
        kc_g = kc_scaling(data_centered, mirror_g)
        psim_g = scaler_psim(kc_g)
        pdsim_g = scaler_pdsim(psim_g)
        kcalt1_g = scaler_kc_alt1(psim_g)
        kcalt2_g = scaler_kc_alt2(psim_g)
        
        if print_result == "print:kc":
            outcome = kc_g
        elif print_result == "print:psim":
            outcome = psim_g
        elif print_result == "print:pdsim":
            outcome = pdsim_g
        elif print_result == "print:kcalt1":
            outcome = kcalt1_g
        elif print_result == "print:kcalt2":
            outcome = kcalt2_g
    
    elif -1000000 <=method<= 1000000:
        data_h = data 
        mirror_h = method
        kc_h = kc_scaling(data_h, mirror_h)
        psim_h = scaler_psim(kc_h)
        pdsim_h = scaler_pdsim(psim_h)
        kcalt1_h = scaler_kc_alt1(psim_h)
        kcalt2_h = scaler_kc_alt2(psim_h)
        
        if print_result == "print:kc":
            outcome = kc_h
        elif print_result == "print:psim":
            outcome = psim_h
        elif print_result == "print:pdsim":
            outcome = pdsim_h
        elif print_result == "print:kcalt1":
            outcome = kcalt1_h
        elif print_result == "print:kcalt2":
            outcome = kcalt2_h    
    
    if guide == "guide:view":
        print('Please, IF you do not get it correct, the command input follows as this order: feature_transformation([dataset, "method", "print_result", "guide"]). Please, use "method:std", "method:min_max", "method=maxbyabsmndiff", "method:mnbyabsmndiff", "max", "method:scalocSMM", "method:scaleSMM" to command method; use "print:kc", "print:psim", "print:pdsim", "print:kcalt1", "print:kcalt2", or "print:kcalt" to command print_result, and use "guide:view" to see a guide. For example: optical_scaling([data, "method:max", "print:psim", "guide:view"])')
    elif guide == "guide:never":
        print("  ")
    else:
        print('please, see a guide by typing "guide:view" on the guide command')
        
    return (outcome)


# Examples

In [5]:
data = [12,-4,6.12,8,4,0.2,4,5,6,24,12,-2,0,-3,4,-1.05]

print("Transformed_data =", feature_transformation([data, "method:std", "print:psim", "guide:never"])) 
print("Transformed_data =", feature_transformation([data, "method:min_max", "print:psim", "guide:never"])) 
print("Transformed_data =", feature_transformation([data, "method:maxbyabsmndiff", "print:psim", "guide:never"]))
print("Transformed_data =", feature_transformation([data, "method:mnbyabsmndiff", "print:psim", "guide:never"]))
print("Transformed_data =", feature_transformation([data, "method:scalocSMM", "print:psim", "guide:never"]))  
print("Transformed_data =", feature_transformation([data, "method:scaleSMM", "print:psim", "guide:never"]))  
print("Transformed_data =", feature_transformation([data, "method:max", "print:psim", "guide:never"]))

  
Transformed_data = [0.9387553978463953, -0.7868235627960777, 0.20669657179381867, 0.4811969196771776, -0.10284637369167071, -0.6576875023920761, -0.10284637369167071, 0.04316444965054126, 0.18917527299275372, 0.35494094383639324, 0.9387553978463953, -0.9789113137449421, -0.6868896670605185, -0.8889504144610183, -0.10284637369167071, -0.8402010315698408]
  
Transformed_data = [0.26055803571428593, -0.3108705357142857, 0.05055803571428547, 0.11770089285714297, -0.025156249999999998, -0.16087053571428583, -0.025156249999999998, 0.01055803571428586, 0.04627232142857153, 0.6891294642857144, 0.26055803571428593, -0.23944196428571426, -0.16801339285714287, -0.27515625, -0.025156249999999998, -0.20551339285714282]
  
Transformed_data = [0.37809736663103694, -0.4511061445275809, 0.07336507628024456, 0.17079648884138246, -0.03650438894827181, -0.2334402228484435, -0.03650438894827181, 0.015320830499141654, 0.06714604994655532, 1.0, 0.37809736663103694, -0.3474557056327537, -0.2438052667379263