# Geometrical Pairwise Analysis of Biological Sequence Following Kabirian-based Optinalysis

In [1]:
'''                            
                                # USER GUIDE
                            #*******************#
# Introduction: 
    Geometrical pairwise analysis (GPA) of biological sequences following Kabirian-based optinalysis looks at two biological 
    sequences (i.e., DNA, RNA, and amino acid sequences) as isoreflective as a mirror-like reflection of each other 
    about a central point. It estimates the percentage similarity between two sequences. 

# Input guide: pairwise_analysis([sequence_x, sequence_y, encoding_scheme, pairing, print_result]) 
    # Input options:  
        # for sequences: aligned strings of biological sequences (i.e., DNA, RNA, or protein). 
        # for encoding_scheme: "seq_type:DNA", "seq_type:RNA", or "seq_type:Protein".
        # for pairing: "pairing:H_H", or "pairing:T_T". 
        # for print_result: "print:kc", "print:psim", "print:pdsim", "print:kcalt1", "print:kcalt2", "print:kcalt", or "all".
    # Note:
        encoding_scheme input is what convert each biological sequence into a numerical representation using 
        a specific encoding scheme. This encoding captures the features of the sequences such as base or 
        amino acid compositions, and aligned gabs of the sequences. 
    
# Example 1: geometrical pairwise_analysis of DNA sequences 
    # print("Kabirian coefficient =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:kc"])) 
    # print("Probability of similarity =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:psim"])) 
    # print("Probability of dissimilarity =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:pdsim"])) 
    # print("Alt1. Kabirian coefficient =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:kcalt1"])) 
    # print("Alt2. Kabirian coefficient =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:kcalt2"])) 
    # print("Alt. Kabirian coefficient =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:kcalt"])) 
    # print("All the estimates =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:all"])) 

# Example 2: geometrical pairwise_analysis of RNA sequences 
    # print("Kabirian coefficient =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:kc"])) 
    # print("Probability of similarity =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:psim"])) 
    # print("Probability of dissimilarity =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:pdsim"])) 
    # print("Alt1. Kabirian coefficient =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:kcalt1"])) 
    # print("Alt2. Kabirian coefficient =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:kcalt2"])) 
    # print("Alt. Kabirian coefficient =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:kcalt"])) 
    # print("All the estimates =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:all"])) 

# Example 3: geometrical pairwise_analysis of protein sequences 
    # print("Kabirian coefficient =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:kc"])) 
    # print("Probability of similarity =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:psim"])) 
    # print("Probability of dissimilarity =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:pdsim"])) 
    # print("Alt1. Kabirian coefficient =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:kcalt1"])) 
    # print("Alt2. Kabirian coefficient =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:kcalt2"])) 
    # print("Alt. Kabirian coefficient =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:kcalt"])) 
    # print("All the estimates =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:all"])) 

#******************************************************************************************************#
'''
# Importing necessary libraries
import numpy
import numpy as np

# Function for performing isomorphic optinalysis
def pairwise_analysis(instruction_list):
    # Extracting data from the instruction_list
    sequence_x = instruction_list[0]  # Data from the first list
    sequence_y = instruction_list[1]  # Data from the second list
    encoding_scheme = instruction_list[2] # Encoding scheme
    pairing = instruction_list[3] # Type of pairing
    print_result = instruction_list[4] # Type of result(s) to print
    
    # Numerical encoding scheme for DNA sequences
    def DNA_encoding(seq):
        encoding = {'c': 111, 't': 126, 'a': 135, 'g': 151, '-': 0, 'C': 111, 'T': 126, 'A': 135, 'G': 151,}
        return [encoding.get(n, 0) for n in seq]

    # Numerical encoding scheme for RNA sequences
    def RNA_encoding(seq):
        encoding = {'c': 111, 'u': 112, 'a': 135, 'g': 151, '-': 0, 'C': 111, 'U': 112, 'A': 135, 'G': 151}
        return [encoding.get(n, 0) for n in seq]

    # Numerical encoding scheme for protein sequences
    def protein_encoding(seq):
        encoding = {'G': 75, 'A': 89, 'S': 105, 'P': 115, 'V': 117, 'T': 119, 'C': 121, 'I': 131, 'L': 131, 'N': 132, 
                    'D': 133, 'Q': 146, 'K': 146, 'E': 147, 'M': 149, 'H': 155, 'F': 165, 'R': 174, 'Y': 181, 'W': 204,
                   'g': 75, 'a': 89, 's': 105, 'p': 115, 'v': 117, 't': 119, 'c': 121, 'i': 131, 'l': 131, 'n': 132, 
                    'd': 133, 'q': 146, 'k': 146, 'e': 147, 'm': 149, 'h': 155, 'f': 165, 'r': 174, 'y': 181, 'W': 204, '-': 0}
        return [encoding.get(n, 0) for n in seq]

    # Function for computing Kabirian coefficient (kc) of isomorphic optinalysis
    def kc_isomorphic_optinalysis(instruction_list):
        # Extracting data from the instruction_list
        data_x = instruction_list[0]  # Data from the first list
        data_y = instruction_list[1]  # Data from the second list
        pairing = instruction_list[2]  # Type of pairing

        # Generating a list of optiscale values from 0.01 to 2 times the length of data_x
        optiscale = [p / 100 for p in range(1, (2 * len(data_x) + 2))]

        # Calculating the mid-point of the optiscale values
        mid_optiscale = (optiscale[0] * len(data_x)) + optiscale[0]

        # Generating the isoreflective list based on the pairing type
        if pairing == "pairing:H_H":
            isoreflective_list = data_x + [0] + (data_y[::-1])  
        elif pairing == "pairing:T_T":
            isoreflective_list = (data_x[::-1]) + [0] + data_y
        else:
            print('Invalid command. Please, use "pairing:H_H", or "pairing:T_T" to command Head-to-head, or Tail-to-tail pairing respectivelly')

        # Calculating the dot product of isoreflective_list and optiscale
        sum_of_scalements = np.dot(isoreflective_list, optiscale)

        # Calculating the kc_optinalysis using the calculated values
        kc_optinalysis = (mid_optiscale * sum(isoreflective_list)) / sum_of_scalements

        # Returning the calculated kc_optinalysis value
        return kc_optinalysis

    # Function for translating Kabirian coefficient (kc) to percentage similarity (psim)
    def psim(kc, num_of_dimensions):
        # Check if kc is within the valid range [0, 1]
        if 0 <= kc <= 1:
            # Calculate psim using the formula for kc in [0, 1]
            psim = ((num_of_dimensions + 1) - kc * ((2 * num_of_dimensions) + 1)) / (kc - (num_of_dimensions + 1))
        else:
            # Calculate psim using the formula for kc outside the range [0, 1]
            psim = ((num_of_dimensions + 1) - kc) / (kc * ((2 * num_of_dimensions) + 1) - (num_of_dimensions + 1))
        return psim  # Return the calculated psim value
    
    # Function for translating percentage similarity (psim) to percentage dissimilarity (pdsim)
    def pdsim(psim):
        # Check if psim is within the valid range [0, 1]
        if 0 <= psim <= 1:
            # Calculate pdsim when psim is in [0, 1]
            pdsim = 1 - psim
        else:
            # Calculate pdsim when psim is outside [0, 1]
            pdsim = -1 - psim
        return pdsim

    # Function for translating percentage similarity (psim) to alternative Kabirian coefficient (kcalt)
    def kc_alt(kc, psim, num_of_dimensions):
        if 0 <= kc <= 1:
            # Calculate kc_alt when kc is in [0, 1]
            kc_alt = ((num_of_dimensions + 1) * (psim + 1)) / (((2 * num_of_dimensions) + 1) * psim + 1)
        else:
            # Calculate kc_alt when kc is outside [0, 1]
            kc_alt = ((num_of_dimensions + 1) * (psim + 1)) / (psim + ((2 * num_of_dimensions) + 1))
        return kc_alt
    
    # Function for translating percentage similarity (psim) to alternative Kabirian coefficient (kcalt1)
    def kc_alt1(psim, num_of_dimensions):
        # Calculate kc_alt1
        kc_alt1 = ((num_of_dimensions + 1) * (psim + 1)) / (psim + ((2 * num_of_dimensions) + 1))
        return kc_alt1
    
    # Function for translating percentage similarity (psim) to alternative Kabirian coefficient (kcalt2)
    def kc_alt2(psim, num_of_dimensions):
        # Calculate kc_alt2
        kc_alt2 = ((num_of_dimensions + 1) * (psim + 1)) / (((2 * num_of_dimensions) + 1) * psim + 1)
        return kc_alt2

    # Encoding Sequences Based on the Selected Encoding Scheme
    if encoding_scheme == "seq_type:DNA":
        seq_1 = DNA_encoding(sequence_x)
        seq_2 = DNA_encoding(sequence_y)
    elif encoding_scheme == "seq_type:RNA":
        seq_1 = RNA_encoding(sequence_x)
        seq_2 = RNA_encoding(sequence_y)
    elif encoding_scheme == "seq_type:protein":
        seq_1 = protein_encoding(sequence_x)
        seq_2 = protein_encoding(sequence_y)
    else:
        print('Invalid command. Please use "seq_type:DNA", "seq_type:RNA", or "seq_type:protein" to command encoding_scheme.')

    # Perform Isomorphic Optinalysis
    kc_result = kc_isomorphic_optinalysis([seq_1, seq_2, pairing])
    num_of_dimensions = len(sequence_x)

    # Calculate All Other Estimates
    psim_result = psim(kc_result, num_of_dimensions)
    pdsim_result = pdsim(psim_result)
    kc_alt1_result = kc_alt1(psim_result, num_of_dimensions)
    kc_alt2_result = kc_alt2(psim_result, num_of_dimensions)
    kc_alt_result = kc_alt(kc_result, psim_result, num_of_dimensions)

    # Create a dictionary containing all the estimates
    all_estimates = {
        "kc": kc_result,
        "psim": psim_result,
        "pdsim": pdsim_result,
        "kc_alt1": kc_alt1_result,
        "kc_alt2": kc_alt2_result
    }

    # Select the result to be printed based on the print_result input
    if print_result == "print:kc":
        result = kc_result
    elif print_result == "print:psim":
        result = psim_result
    elif print_result == "print:pdsim":
        result = pdsim_result
    elif print_result == "print:kcalt1":
        result = kc_alt1_result
    elif print_result == "print:kcalt2":
        result = kc_alt2_result
    elif print_result == "print:kcalt":
        result = kc_alt_result
    elif print_result == "print:all":
        result = all_estimates
    else:
        result = 'Invalid command. Please use "print:kc", "print:psim", "print:pdsim", "print:kcalt1", "print:kcalt2", "print:kcalt", or "print:all" to command print_result'
    
    return result


# Practical examples

In [31]:

# For DNA sequences that has been aligned 
sequence_1 = 'AGTAAGCAAACCGAACAGGTGTTCCTAGCATAATACACG------'
sequence_2 = 'AGTAAGCAAAAGGTGCAGGTGTTCC---GAAAATACACGTAGTGC'

# For RNA sequences that has been from a converted aligned DNA sequences 
sequence_3 = 'AGUAAGCAAACCGAACAGGUGUUCCUAGCAUAAUACACG------'
sequence_4 = 'AGUAAGCAAAAGGUGCAGGUGUUCC---GAAAAUACACGUAGUGC'

# For protein sequences that has been aligned 
sequence_5 = 'GDQ-----SHFRYWKEMNDQSHFRHFRYKEMWILNDQK-----MEDNDQE'
sequence_6 = 'GDQFRYNYMHFRY----NDQKEM-YWKEKEMWILNDQKDNDQEMEDNDQE'

print("Example 1: geometrical pairwise_analysis of DNA sequences") 
print("Kabirian coefficient =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:kc"])) 
print("Probability of similarity =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:psim"])) 
print("Probability of dissimilarity =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:pdsim"])) 
print("Alt1. Kabirian coefficient =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:kcalt1"])) 
print("Alt2. Kabirian coefficient =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:kcalt2"])) 
print("Alt. Kabirian coefficient =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:kcalt"])) 
print("All the estimates =", pairwise_analysis([sequence_1, sequence_2, "seq_type:DNA", "pairing:H_H", "print:all"])) 

print("******************************************************************")

print("Example 2: geometrical pairwise_analysis of RNA sequences") 
print("Kabirian coefficient =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:kc"])) 
print("Probability of similarity =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:psim"])) 
print("Probability of dissimilarity =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:pdsim"])) 
print("Alt1. Kabirian coefficient =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:kcalt1"])) 
print("Alt2. Kabirian coefficient =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:kcalt2"])) 
print("Alt. Kabirian coefficient =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:kcalt"])) 
print("All the estimates =", pairwise_analysis([sequence_4, sequence_3, "seq_type:RNA", "pairing:H_H", "print:all"])) 

print("******************************************************************")

print("Example 3: geometrical pairwise_analysis of protein sequences") 
print("Kabirian coefficient =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:kc"])) 
print("Probability of similarity =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:psim"])) 
print("Probability of dissimilarity =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:pdsim"])) 
print("Alt1. Kabirian coefficient =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:kcalt1"])) 
print("Alt2. Kabirian coefficient =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:kcalt2"])) 
print("Alt. Kabirian coefficient =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:kcalt"])) 
print("All the estimates =", pairwise_analysis([sequence_5, sequence_6, "seq_type:protein", "pairing:H_H", "print:all"])) 


Example 1: geometrical pairwise_analysis of DNA sequences
Kabirian coefficient = 1.0035643017098081
Probability of similarity = 0.9927651307349952
Probability of dissimilarity = 0.0072348692650048285
Alt1. Kabirian coefficient = 0.9964609269385202
Alt2. Kabirian coefficient = 1.003564301709808
Alt. Kabirian coefficient = 0.9964609269385202
All the estimates = {'kc': 1.0035643017098081, 'psim': 0.9927651307349952, 'pdsim': 0.0072348692650048285, 'kc_alt1': 0.9964609269385202, 'kc_alt2': 1.003564301709808}
******************************************************************
Example 2: geometrical pairwise_analysis of RNA sequences
Kabirian coefficient = 0.9962183715542534
Probability of similarity = 0.9922693203899832
Probability of dissimilarity = 0.007730679610016833
Alt1. Kabirian coefficient = 0.9962183715542537
Alt2. Kabirian coefficient = 1.0038104478416445
Alt. Kabirian coefficient = 1.0038104478416445
All the estimates = {'kc': 0.9962183715542534, 'psim': 0.9922693203899832, 'pdsim

# Well-structured Datasets (Sequences) Used in the Article Publication

In [32]:
# well-structured DNA sequences

S0 = 'CTAGCTAGCTAG'
S1 = 'GTAGCTAGCTAG'
S2 = 'CAAGCTAGCTAG'
S3 = 'CTTGCTAGCTAG'
S4 = 'CTACCTAGCTAG'
S5 = 'CTAGGTAGCTAG'
S6 = 'CTAGCAAGCTAG'
S7 = 'CTAGCTTGCTAG'
S8 = 'CTAGCTACCTAG'
S9 = 'CTAGCTAGGTAG'
S10 = 'CTAGCTAGCAAG'
S11 = 'CTAGCTAGCTTG'
S12 = 'CTAGCTAGCTAC'
S13 = 'GTAGCTAGCTAG'
S14 = 'ATAGCTAGCTAG'
S15 = 'TTAGCTAGCTAG'
S16 = 'CTAGCGAGCTAG'
S17 = 'CTAGCAAGCTAG'
S18 = 'CTAGCCAGCTAG'
S19 = 'CTAGCTAGCTGG'
S20 = 'CTAGCTAGCTTG'
S21 = 'CTAGCTAGCTCG'
S22 = '-TAGCTAGCTAG'
S23 = 'C-AGCTAGCTAG'
S24 = 'CT-GCTAGCTAG'
S25 = 'CTA-CTAGCTAG'
S26 = 'CTAG-TAGCTAG'
S27 = 'CTAGC-AGCTAG'
S28 = 'CTAGCT-GCTAG'
S29 = 'CTAGCTA-CTAG'
S30 = 'CTAGCTAG-TAG'
S31 = 'CTAGCTAGC-AG'
S32 = 'CTAGCTAGCT-G'
S33 = 'CTAGCTAGCTA-'
S34 = 'CTAGCTAGCT--'
S35 = 'CTAGCTAGC---'
S36 = 'CTAGCTAG----'
S37 = 'CTAGCTA-----'
S38 = 'CTAGCT------'
S39 = 'CTAGC-------'
S40 = 'CTAG--------'
S41 = '--------CTAG'
S42 = '-------GCTAG'
S43 = '------AGCTAG'
S44 = '-----TAGCTAG'
S45 = '----CTAGCTAG'
S46 = '---GCTAGCTAG'
S47 = '--AGCTAGCTAG'
S48 = 'GTA-CTGCCT-G'
S49 = 'C-GG-TAGATAT'
S50 = '-TAGGTAACCA-'
S51 = 'C--GCTAAGTAA'
S52 = 'TCAGCGAGCT--'
S53 = 'CCAGCGC--TAG'
S54 = 'CCAGCCG--TAG'
S55 = 'CAAGCCG--TAG'
S56 = 'CGAGCCG--TAG'
S57 = 'C--ACTAGGGAG'
S58 = 'C--ACTAGCGAT'
S59 = 'C--TCTAGCATG'
S60 = 'C--CCTAGCAGT'


In [33]:
import pandas as pd

# Analysis (optinalysis) of the well-structured DNA sequences
lab = "S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, S50, S51, S52, S53, S54, S55, S56, S57, S58, S59, S60"
DNA_sequences = [S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, S50, S51, S52, S53, S54, S55, S56, S57, S58, S59, S60]
kc_results = [pairwise_analysis([S0, seq, "seq_type:DNA", "pairing:H_H", "print:kc"]) for seq in DNA_sequences]
psim_results = [pairwise_analysis([S0, seq, "seq_type:DNA", "pairing:H_H", "print:psim"]) for seq in DNA_sequences]
pdsim_results = [pairwise_analysis([S0, seq, "seq_type:DNA", "pairing:H_H", "print:pdsim"]) for seq in DNA_sequences]
kcalt1_results = [pairwise_analysis([S0, seq, "seq_type:DNA", "pairing:H_H", "print:kcalt1"]) for seq in DNA_sequences]
kcalt2_results = [pairwise_analysis([S0, seq, "seq_type:DNA", "pairing:H_H", "print:kcalt2"]) for seq in DNA_sequences]

seq_ID = ['S{}'.format(i) for i in range(1, len(DNA_sequences) + 1)]
DNA_seq = [str(i) for i in DNA_sequences]
df = pd.DataFrame({"SEQ ID":seq_ID, "SEQ":DNA_seq, "KC":kc_results, "PSIM":psim_results, "PDSIM":pdsim_results, "KCALT1":kcalt1_results, "KCALT2":kcalt2_results})
print(df)

   SEQ ID           SEQ        KC      PSIM     PDSIM    KCALT1    KCALT2
0      S1  GTAGCTAGCTAG  0.988515  0.975140  0.024860  0.988515  1.011755
1      S2  CAAGCTAGCTAG  0.997586  0.994771  0.005229  0.997586  1.002426
2      S3  CTTGCTAGCTAG  1.002217  0.995218  0.004782  0.997792  1.002217
3      S4  CTACCTAGCTAG  1.009019  0.980818  0.019182  0.991140  1.009019
4      S5  CTAGGTAGCTAG  0.992314  0.983358  0.016642  0.992314  1.007806
5      S6  CTAGCAAGCTAG  0.998462  0.996669  0.003331  0.998462  1.001542
6      S7  CTAGCTTGCTAG  1.001329  0.997128  0.002872  0.998674  1.001329
7      S8  CTAGCTACCTAG  1.004991  0.989298  0.010702  0.995059  1.004991
8      S9  CTAGCTAGGTAG  0.996142  0.991644  0.008356  0.996142  1.003888
9     S10  CTAGCTAGCAAG  0.999340  0.998571  0.001429  0.999340  1.000660
10    S11  CTAGCTAGCTTG  1.000443  0.999042  0.000958  0.999558  1.000443
11    S12  CTAGCTAGCTAC  1.000994  0.997850  0.002150  0.999008  1.000994
12    S13  GTAGCTAGCTAG  0.988515  0.9

In [34]:
# well-structured protein sequences

S0 = 'GASPCLDQMFRY'
S1 = 'WASPCLDQMFRY'
S2 = 'GWSPCLDQMFRY'
S3 = 'GAWPCLDQMFRY'
S4 = 'GASWCLDQMFRY'
S5 = 'GASPWLDQMFRY'
S6 = 'GASPCWDQMFRY'
S7 = 'GASPCLWQMFRY'
S8 = 'GASPCLDWMFRY'
S9 = 'GASPCLDQWFRY'
S10 = 'GASPCLDQMWRY'
S11 = 'GASPCLDQMFWY'
S12 = 'GASPCLDQMFRW'
S13 = 'AASPCLDQMFRY'
S14 = 'SASPCLDQMFRY'
S15 = 'PASPCLDQMFRY'
S16 = 'GASPCADQMFRY'
S17 = 'GASPCSDQMFRY'
S18 = 'GASPCPDQMFRY'
S19 = 'GASPCLDQMFAY'
S20 = 'GASPCLDQMFSY'
S21 = 'GASPCLDQMFPY'
S22 = '-ASPCLDQMFRY'
S23 = 'G-SPCLDQMFRY'
S24 = 'GA-PCLDQMFRY'
S25 = 'GAS-CLDQMFRY'
S26 = 'GASP-LDQMFRY'
S27 = 'GASPC-DQMFRY'
S28 = 'GASPCL-QMFRY'
S29 = 'GASPCLD-MFRY'
S30 = 'GASPCLDQ-FRY'
S31 = 'GASPCLDQM-RY'
S32 = 'GASPCLDQMF-Y'
S33 = 'GASPCLDQMFR-'
S34 = 'GASPCLDQMF--'
S35 = 'GASPCLDQM---'
S36 = 'GASPCLDQ----'
S37 = 'GASPCLD-----'
S38 = 'GASPCL------'
S39 = 'GASPC-------'
S40 = 'GASP--------'
S41 = '--------MFRY'
S42 = '-------QMFRY'
S43 = '------DQMFRY'
S44 = '-----LDQMFRY'
S45 = '----CLDQMFRY'
S46 = '---PCLDQMFRY'
S47 = '--SPCLDQMFRY'
S48 = 'YAS-CLWSMF-Y'
S49 = 'G-WP-LDQGFRW'
S50 = '-ASPWLDAMAR-'
S51 = 'G--PCLDPAFRS'
S52 = 'WASPCYDQMF--'
S53 = 'GYSPCDL--FRY'
S54 = 'GYSPCLD--FRY'
S55 = 'GRSPCLD--FRY'
S56 = 'GFSPCLD--FRY'
S57 = '--WPCLDQPPRY'
S58 = '--FPCLDQMYRF'
S59 = '--RPCLDQMRFY'
S60 = '--YPCLDQMRYF'


In [35]:
import pandas as pd

# Analysis (optinalysis) of the well-structured protein sequences
protein_sequences = [S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, S50, S51, S52, S53, S54, S55, S56, S57, S58, S59, S60]
kc_results = [pairwise_analysis([S0, seq, "seq_type:protein", "pairing:H_H", "print:kc"]) for seq in protein_sequences]
psim_results = [pairwise_analysis([S0, seq, "seq_type:protein", "pairing:H_H", "print:psim"]) for seq in protein_sequences]
pdsim_results = [pairwise_analysis([S0, seq, "seq_type:protein", "pairing:H_H", "print:pdsim"]) for seq in protein_sequences]
kcalt1_results = [pairwise_analysis([S0, seq, "seq_type:protein", "pairing:H_H", "print:kcalt1"]) for seq in protein_sequences]
kcalt2_results = [pairwise_analysis([S0, seq, "seq_type:protein", "pairing:H_H", "print:kcalt2"]) for seq in protein_sequences]

seq_ID = ['S{}'.format(i) for i in range(1, len(protein_sequences) + 1)]
protein_seq = [str(i) for i in protein_sequences]
df = pd.DataFrame({"SEQ ID":seq_ID, "SEQ":protein_seq, "KC":kc_results, "PSIM":psim_results, "PDSIM":pdsim_results, "KCALT1":kcalt1_results, "KCALT2":kcalt2_results})
print(df)

   SEQ ID           SEQ        KC      PSIM     PDSIM    KCALT1    KCALT2
0      S1  WASPCLDQMFRY  0.965142  0.924694  0.075306  0.965142  1.037470
1      S2  GWSPCLDQMFRY  0.971213  0.937778  0.062222  0.971213  1.030545
2      S3  GAWPCLDQMFRY  0.977221  0.950739  0.049261  0.977221  1.023866
3      S4  GASWCLDQMFRY  0.981433  0.959835  0.040165  0.981433  1.019283
4      S5  GASPWLDQMFRY  0.984532  0.966529  0.033471  0.984532  1.015962
5      S6  GASPCWDQMFRY  0.988017  0.974063  0.025937  0.988017  1.012277
6      S7  GASPCLWQMFRY  0.989984  0.978317  0.021683  0.989984  1.010220
7      S8  GASPCLDWMFRY  0.993133  0.985129  0.014871  0.993133  1.006963
8      S9  GASPCLDQWFRY  0.994777  0.988688  0.011312  0.994777  1.005278
9     S10  GASPCLDQMWRY  0.997201  0.993938  0.006062  0.997201  1.002814
10    S11  GASPCLDQMFWY  0.998559  0.996878  0.003122  0.998559  1.001445
11    S12  GASPCLDQMFRW  0.999446  0.998799  0.001201  0.999446  1.000555
12    S13  AASPCLDQMFRY  0.995955  0.9