# Kabirian-based Optinalysis: Pairwise Similarity Estimator

In [7]:
'''                            
                                # USER GUIDE
                            #*******************#
# Introduction: 
    # A statistical pairwise comparison between two variables, under optinalysis, is their isoreflectivity 
        in a statistical order. Statistical pairwise comparison refers to the theoretical ordering, with or without 
        centering the data, and optinalysing the established isoreflective pair for the given variables. 
    # A geometrical pairwise comparison between two variables (two sequences), under optinalysis, is their isoreflectivity
        in a geometrical order. Geometrical pairwise comparison refers to the conceptual ordering, with or without 
        centering the data, and optinalysing the established isoreflective pair for the given variables. 

# Input guide: pairwise_similarity([data_x, data_y, centering, descaling, ordering, pairing, print_result]) 
    # Input options:  
        # for data: list of numerical values from a set of real numbers. 
        # for centering: "centering:allow", or "centering:never".
        # for descaling: "descaling:allow", or "descaling:never".
        # for ordering: "ordering:ascend", "ordering:descend", or "ordering:never". 
        # for pairing: "pairing:H_H", "pairing:T_T". 
        # for print_result: "print:kc", "print:psim", "print:pdsim", "print:kcalt1", "print:kcalt2", or "print:kcalt".
    # Note:
        # centering input is what determines whether the estimation is scale-invariance 
            (i.e, if "centering:never" option is used), or scaloc-invariance (i.e, if "centering:allow" option is used). 
        # descaling input is what makes the variables very comparable even if they are on different scales 
            (i.e, if "homogenizing=allow" option is used), or otherwise uncomparable if the variables are on different scales (i.e, if "homogenizing=never" option is used). 
        # ordering input is what determines whether the estimation is statistical 
            (i.e, if "ordering:ascend", or "ordering:descend" option is used), or geometrical (i.e, if "ordering:never" option is used). 

# Example (of statistical pairwise_similarity) 1:
    # print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descalingg:allow", "ordering:descend", "pairing:T_T", "print:kc"])) 
    # print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:descend", "pairing:T_T", "print:psim"])) 
    # print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:descend", "pairing:T_T", "print:pdsim"])) 
    # print("Alt1. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:descend", "pairing:T_T", "print:kcalt1"]))  
    
    # print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:descend", "pairing:H_H", "print:kc"])) 
    # print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:descend", "pairing:H_H", "print:psim"]))
    # print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:descend", "pairing:H_H", "print:pdsim"])) 
    # print("Alt2. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:descend", "pairing:H_H", "print:kcalt2"])) 
    
    # print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:ascend", "pairing:H_H", "print:kc"])) 
    # print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:ascend", "pairing:H_H", "print:kc"])) 
    # print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:ascend", "pairing:H_H", "print:psim"]))
    # print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:ascend", "pairing:H_H", "print:pdsim"])) 
    # print("Alt. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:ascend", "pairing:H_H", "print:kcalt"])) 
    
# Example (of geometrical pairwise_similarity) 2:
    # print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:T_T", "print:kc"])) 
    # print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:T_T", "print:psim"])) 
    # print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:T_T", "print:pdsim"])) 
    # print("Alt. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:T_T", "print:kcalt"]))  
    
    # print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:never", "pairing:H_H", "print:kc"])) 
    # print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:never", "pairing:H_H", "print:psim"]))
    # print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:never", "pairing:H_H", "print:pdsim"])) 
    # print("Alt2. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:never", "pairing:H_H", "print:kcalt2"])) 
    
    # print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:H_H", "print:kc"])) 
    # print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:H_H", "print:psim"]))
    # print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:H_H", "print:pdsim"])) 
    # print("Alt1. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:H_H", "print:kcalt1"])) 

#******************************************************************************************************#
'''
import numpy
import numpy as np

def pairwise_similarity(instruction_list):
    data_x = instruction_list[0]
    data_y = instruction_list[1]
    print_result = instruction_list[6]
    
                     # ************** HERE ARE THE FOUNDAMENTAL CODES THAT BUILD THE DEFINITION REFERS ************** #
    # 'kc_isomorphic_optinalysis' is a tool that computes isomorphic optinalysis and return the result as Kabirian coefficient (i.e, kc).
    def kc_isomorphic_optinalysis(instruction_list):
        data_x = instruction_list[0]
        data_y = instruction_list[1]
        pairing = instruction_list[2]
    
        optiscale = [p/100 for p in range(1,(2*len(data_x) + 2))]
        mid_optiscale = (optiscale[0]*len(data_x)) + optiscale[0]
    
        if pairing == "pairing:H_H":
            isoreflective_list = data_x + [0] + (data_y[::-1])  
        elif pairing == "pairing:T_T":
            isoreflective_list = (data_x[::-1]) + [0] + data_y
        else:
            print('please, use "pairing:H_H", or "pairing:T_T" to command Head-to-head, or Tail-to-tail pairing respectivelly')
    
        sum_of_scalements = np.dot(isoreflective_list, optiscale)
    
        kc_optinalysis = (mid_optiscale*sum(isoreflective_list)) / sum_of_scalements
        return(kc_optinalysis)

    # 'psim' is a tool and an optinalytic translation model that translates Kabirian coefficient (i.e, kc) to percentage similalrty (i.e, psim).    
    def psim(kc, num_of_dimensions):
        if 0 <=kc<= 1:
            psim = ((num_of_dimensions + 1) - kc*((2*num_of_dimensions) + 1)) / (kc - (num_of_dimensions + 1))
        else:
            psim = ((num_of_dimensions + 1) - kc) / (kc*((2*num_of_dimensions) + 1) - (num_of_dimensions + 1))
        return(psim)

    # 'pdsim' is a tool and an optinalytic translation model that translates percentage similalrty (i.e, psim) to percentage dissimilalrty (i.e, pdsim).    
    def pdsim(psim):
        if 0 <=psim<= 1:
             pdsim = 1 - psim
        else:
              pdsim = -1 - psim
        return(pdsim)

    # 'kc_alt' is a tool and an optinalytic translation model that translates backward the percentage similalrty (i.e, psim) to it's possible alternative Kabirian coefficient (i.e, kcalt1 or kcalt2). 
    def kc_alt(kc, psim, num_of_dimensions):
        if 0 <=kc<= 1:
            kc_alt = ((num_of_dimensions + 1)*(psim + 1)) / (((2*num_of_dimensions) + 1)*psim + 1)
        else:
            kc_alt = ((num_of_dimensions + 1)*(psim + 1)) / (psim + ((2*num_of_dimensions) + 1))
        return(kc_alt)

    # 'kc_alt1' is a tool and an optinalytic translation model that translates backward the percentage similalrty (i.e, psim) to one of it's possible bi-Kabirian coefficients (i.e, kcalt1).
    def kc_alt1(psim, num_of_dimensions):
        kc_alt1 = ((num_of_dimensions + 1)*(psim + 1)) / (psim + ((2*num_of_dimensions) + 1))
        return(kc_alt1)

    # 'kc_alt2' is a tool and an optinalytic translation model that translates backward the percentage similalrty (i.e, psim) to one of it's possible bi-Kabirian coefficients (i.e, kcalt2).
    def kc_alt2(psim, num_of_dimensions):
        kc_alt2 = ((num_of_dimensions + 1)*(psim + 1)) / (((2*num_of_dimensions) + 1)*psim + 1)
        return(kc_alt2)
    
    # 'absmndiff' is a tool that centers (i.e, subtracts mean or average from every data point of the dataset) the dataset and returns its absolute values. 
    def absmndiff(data):
        absmndiff = abs(numpy.array(data) - numpy.mean(data))
        return (absmndiff)
    
                        # ************* HERE STARTS DEFINING THE MAIN CODES IN DEFINITION ************* #    
    def kc_pairwise_similarity(instruction_list):
        ### defining the variables of the instruction list 
        data_x = instruction_list[0]
        data_y = instruction_list[1]
        centering = instruction_list[2]
        descaling = instruction_list[3]
        ordering = instruction_list[4]
        pairing = instruction_list[5]
        print_result = instruction_list[6]
    
        # centering of data of the variables 
        if centering == "centering:allow":
            data_x_center = abs(np.array(data_x) - np.mean(data_x))
            data_y_center = abs(np.array(data_y) - np.mean(data_y))
            data_x_centered = [i for i in data_x_center]
            data_y_centered = [i for i in data_y_center]
        elif centering == "centering:never":
            data_x_centered = data_x
            data_y_centered = data_y
        else:
            print('please, use "centering:allow" or "centering:never" to command centering')
        
        # homogenizing of data of the variables to make them very comparable even on different scales  
        if descaling == "descaling:allow":
            data_x_descale = np.array(data_x_centered) / np.max(data_x_centered)
            data_y_descale = np.array(data_y_centered) / np.max(data_y_centered)
            data_x_descaled = [i for i in data_x_descale]
            data_y_descaled = [i for i in data_y_descale]
        elif descaling == "descaling:never":
            data_x_descaled = data_x_centered
            data_y_descaled = data_y_centered
        else:
            print('please, use "descaling:allow" or "descaling:never" to command descaling')
    
        # ordering of data of the variables
        if ordering == "ordering:ascend":
            data_x_ordered = sorted(data_x_descaled)
            data_y_ordered = sorted(data_y_descaled)
        elif ordering == "ordering:descend":
            data_x_ordered = sorted(data_x_descaled)[::-1]
            data_y_ordered = sorted(data_y_descaled)[::-1]
        elif ordering == "ordering:never":
            data_x_ordered = data_x_descaled
            data_y_ordered = data_y_descaled
        else:
            print('please, use "ordering:ascend", "ordering:descend" or "ordering:never" to command ordering')
    
        # outcomes after centering and/or ordering of data of the variables
        data_1 = data_x_ordered
        data_2 = data_y_ordered
    
        # optinalyzing the outcomes of the centered and/or ordered data of the variables
        kc_pairwise_similarity = kc_isomorphic_optinalysis([data_1, data_2, pairing])
        return(kc_pairwise_similarity)
    
    kc = kc_pairwise_similarity(instruction_list)
    num_of_dimensions = len(data_x)
    psim = psim(kc, num_of_dimensions)
    pdsim = pdsim(psim)
    kc_alt1 = kc_alt1(psim, num_of_dimensions)
    kc_alt2 = kc_alt2(psim, num_of_dimensions)
    kc_alt = kc_alt(kc, psim, num_of_dimensions)
    
    if print_result == "print:kc":
        result = kc
    elif print_result == "print:psim":
        result = psim
    elif print_result == "print:pdsim":
        result = pdsim
    elif print_result == "print:kcalt1":
        result = kc_alt1
    elif print_result == "print:kcalt2":
        result = kc_alt2
    elif print_result == "print:kcalt":
        result = kc_alt
    else:
        print('please, use "print:kc", "print:psim", "print:pdsim", "print:kcalt1", "print:kcalt2", or "print:kcalt" to command print_result')
    return(result)


# Examples

In [5]:
data_x = [2, -4, 6.12, 8, 4, 0.2, 4, 5, 6, 24, 12, -2, 0, -3, 4, -1.05, 13.33]
data_y = [12, -2, 0, -3, 4, -1.05, 13.33, 2, -4, 6.12, 8, 4, 0.2, 4, 5, 6, 24]

print("Example (of statistical pairwise_similarity) 1:")
print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:descend", "pairing:T_T", "print:kc"])) 
print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:descend", "pairing:T_T", "print:psim"])) 
print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:descend", "pairing:T_T", "print:pdsim"])) 
print("Alt1. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:descend", "pairing:T_T", "print:kcalt1"]))  
    
print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:descend", "pairing:H_H", "print:kc"])) 
print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:descend", "pairing:H_H", "print:psim"]))
print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:descend", "pairing:H_H", "print:pdsim"])) 
print("Alt2. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:descend", "pairing:H_H", "print:kcalt2"])) 
    
print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:ascend", "pairing:H_H", "print:kc"])) 
print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:ascend", "pairing:H_H", "print:kc"])) 
print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:ascend", "pairing:H_H", "print:psim"]))
print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:ascend", "pairing:H_H", "print:pdsim"])) 
print("Alt. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:ascend", "pairing:H_H", "print:kcalt"])) 

print(  )
print("Example (of geometrical pairwise_similarity) 2:")
print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:T_T", "print:kc"])) 
print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:T_T", "print:psim"])) 
print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:T_T", "print:pdsim"])) 
print("Alt. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:T_T", "print:kcalt"]))  
    
print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:never", "pairing:H_H", "print:kc"])) 
print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:never", "pairing:H_H", "print:psim"]))
print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:never", "pairing:H_H", "print:pdsim"])) 
print("Alt2. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:allow", "descaling:allow", "ordering:never", "pairing:H_H", "print:kcalt2"])) 
    
print("Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:H_H", "print:kc"])) 
print("Probability of similarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:H_H", "print:psim"]))
print("Probability of dissimilarity =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:H_H", "print:pdsim"])) 
print("Alt1. Kabirian coefficient =", pairwise_similarity([data_x, data_y, "centering:never", "descaling:allow", "ordering:never", "pairing:H_H", "print:kcalt1"]))


Example (of statistical pairwise_similarity) 1:
Kabirian coefficient = 1.0000000000000002
Probability of similarity = 0.9999999999999996
Probability of dissimilarity = 4.440892098500626e-16
Alt1. Kabirian coefficient = 0.9999999999999998
Kabirian coefficient = 1.0000000000000002
Probability of similarity = 0.9999999999999996
Probability of dissimilarity = 4.440892098500626e-16
Alt2. Kabirian coefficient = 1.0000000000000002
Kabirian coefficient = 1.0000000000000002
Kabirian coefficient = 1.0000000000000002
Probability of similarity = 0.9999999999999996
Probability of dissimilarity = 4.440892098500626e-16
Alt. Kabirian coefficient = 0.9999999999999998

Example (of geometrical pairwise_similarity) 2:
Kabirian coefficient = 0.9482446615997105
Probability of similarity = 0.8907331154221676
Probability of dissimilarity = 0.1092668845778324
Alt. Kabirian coefficient = 1.0577311263625353
Kabirian coefficient = 0.9643352967341056
Probability of similarity = 0.9246328605348718
Probability of di