## Function 1: AACodon
## Goal: Calculate the amino acids coded for by a set of three nucleotides

In [57]:
def AACodon(codon):
    """Determines the amino acid coded for by a string of three nucleotides (T/U, A, C, G).
    
    Input: A string comprised of T/U, A, C, G, and preferably of length 3 (other sizes will
            be accepted, but only the first three characters will be read). Strings smaller than
            3 characters or containing gap characters etc. will be disregarded.
            
    Output: The name of the corresponding amino acid in string format, such as “Glycene”
    """
    ## error handling/check that the input is valid
    if (isinstance(codon, str) == False):
        print("AACodon Error: Input was not a string.")
        return "Error"
    
    if (len(codon) < 3): ## need at least three characters to be a codon!
        print("AACodon Error: Input was less than three characters in length.")
        return "Error"
    
    ## Branch using first nucleotide/character in the codon, then second, then third
    
    if (codon[0] == 'T' or codon[0] == 'U'):
        
        if (codon[1] == 'T' or codon[1] == 'U'): ## TT or UU
            if (codon[2] == 'T' or codon[2] == 'U'):
                return "Phenylalanine"
            if (codon[2] == 'A'):
                return "Leucine"
            if (codon[2] == 'C'):
                return "Leucine"
            if (codon[2] == 'G'):
                return "Phenylalanine"
            
        if (codon[1] == 'A'): ## TA or UA
            if (codon[2] == 'T' or codon[2] == 'U'):
                return "Tyrosine"
            if (codon[2] == 'A'):
                return "Translation STOP"
            if (codon[2] == 'C'):
                return "Tyrosine"
            if (codon[2] == 'G'):
                return "Translation STOP"
            
        if (codon[1] == 'C'): ## TC or UC
                return "Serine" ## Third nucleotide is irrelevant in this case!
        
        if (codon[1] == 'G'): ## TG or UG
            if (codon[2] == 'T' or codon[2] == 'U'):
                return "Cysteine"
            if (codon[2] == 'A'):
                return "Translation STOP"
            if (codon[2] == 'C'):
                return "Cysteine"
            if (codon[2] == 'G'):
                return "Tryptophan"
            
            
    if (codon[0] == 'A'):
        
        if (codon[1] == 'T' or codon[1] == 'U'): ## AT or AU
            if (codon[2] == 'T' or codon[2] == 'U'):
                return "Isoleucine"
            if (codon[2] == 'A'):
                return "Isoleucine"
            if (codon[2] == 'C'):
                return "Isoleucine"
            if (codon[2] == 'G'):
                return "Methionine"
            
        if (codon[1] == 'A'): ## AA
            if (codon[2] == 'T' or codon[2] == 'U'):
                return "Asparagine"
            if (codon[2] == 'A'):
                return "Lysine"
            if (codon[2] == 'C'):
                return "Asparagine"
            if (codon[2] == 'G'):
                return "Lysine"
        
        if (codon[1] == 'C'): ## AC
            return "Threonine" ## Third nucleotide is irrelevant in this case!
        
        if (codon[1] == 'G'): ## AG
            if (codon[2] == 'T' or codon[2] == 'U'):
                return "Serine"
            if (codon[2] == 'A'):
                return "Arginine"
            if (codon[2] == 'C'):
                return "Serine"
            if (codon[2] == 'G'):
                return "Arginine"
            
            
    if (codon[0] == 'C'):
        
        if (codon[1] == 'T' or codon[1] == 'U'): ## CT or CU
            return "Leucine" ## Third nucleotide is irrelevant in this case!
        
        if (codon[1] == 'A'): ## CA
            if (codon[2] == 'T' or codon[2] == 'U'):
                return "Histidine"
            if (codon[2] == 'A'):
                return "Glutamine"
            if (codon[2] == 'C'):
                return "Histidine"
            if (codon[2] == 'G'):
                return "Glutamine"
            
        if (codon[1] == 'C'): ## CC
            return "Proline" ## Third nucleotide is irrelevant in this case!
        
        if (codon[1] == 'G'): ## CG
            return "Arginine" ## Third nucleotide is irrelevant in this case!
        
        
    if (codon[0] == 'G'):
        
        if (codon[1] == 'T' or codon[1] == 'U'): ## GT or GU
            return "Valine" ## Third nucleotide is irrelevant in this case!
        
        if (codon[1] == 'A'): ## GA
            if (codon[2] == 'T' or codon[2] == 'U'):
                return "Aspartic acid"
            if (codon[2] == 'A'):
                return "Glutamic acid"
            if (codon[2] == 'C'):
                return "Aspartic acid"
            if (codon[2] == 'G'):
                return "Glutamic acid"
            
        if (codon[1] == 'C'): ## GC
            return "Alanine" ## Third nucleotide is irrelevant in this case!
        
        if (codon[1] == 'G'): ## GG
            return "Glycine" ## Third nucleotide is irrelevant in this case!
        
        
    ## if we get here, that means that something went wrong. 
    ## A gap character or some other irrelevant character could be present.
    print("AACodon Error: No amino acid could be determined.", "Input =", codon[:3])
    return "Error";

In [58]:
## test AACodon with a few different codons for accuracy
print(AACodon("TTA"))
print(AACodon("GCG"))
print(AACodon("GCT"))
print(AACodon("UAU"))
print(AACodon("CCC"))
print(AACodon("CUG"))
print()

## should produce errors
print(AACodon("TA"))
print(AACodon("T-A"))
print(AACodon("TTH"))

Leucine
Alanine
Alanine
Tyrosine
Proline
Leucine

AACodon Error: Input was less than three characters in length.
Error
AACodon Error: No amino acid could be determined. Input = T-A
Error
AACodon Error: No amino acid could be determined. Input = TTH
Error


## Function 2: CalcSubstitutions
## Goal: Calculate possible synonymous or nonsynonymous substitutions for a codon

In [59]:
def CalcSubstitutions(codon, printNonsynonymous=False):
    """Calculates the number of synonymous and nonsynonymous substitutions given a valid amino
        acid codon.
    
    Input: A string comprised of T/U, A, C, G, and preferably of length 3 (other sizes will
            be accepted, but only the first three characters will be read). Strings smaller than
            3 characters or containing gap characters etc. will be disregarded.
            
            Optional: A boolean value that determines whether nonsynonymous substitution data will be printed.
            Could be useful if you want to know exactly what the given codon could become after all possible
            point mutations.
            
    Output: Two integers, the first representing the count of possible synonymous
            substitutions for all codons in the sequence, and the second representing the
            count of nonsynonymous substitutions for all codons in the sequence.
    """
    ## error handling/check that the input is valid
    aminoAcid = AACodon(codon)
    if (aminoAcid == "Error"):
        print("CalcSubstitutions Error: Given string is not a valid codon")
        return -1, -1
    
    ## wherever relevant, check to see if the user wants additional data printed
    if (printNonsynonymous == True):
        print("Calculating Synonymous and Nonsynonymous Substitutions for:", aminoAcid, codon[0:3])
    
    ## build a basic list of nucleotides that can be used in the for loops below
    nucleotideList = ['T', 'A', 'C', 'G']
    if (codon.find('U') != -1): ## if dealing with RNA, account for this by replaing Thymine with Uracil
        nucleotideList[0] = 'U'
        
    ## temporary variable that will be modified and reset repeatedly to simulate point mutations
    mutation = codon
    
    ## two integer variables to track the number of synonymous and nonsynonymous mutations, will be returned later
    numSyn = 0
    numNon = 0
    
    ## A nested for loop is used to test all possible point mutations in all three places they could occur in.
    ## The outer for loop includes the 1st, 2nd, and 3rd positions in the codon.
    ## The inner for loop includes all possible substitutions: T/U, A, C, and G.
    for i in range(3):
        for nucleotide in nucleotideList:
            ## replace the nucleotide at index i with another one from the list
            mutation = mutation[:i] + nucleotide + mutation[i+1:]
            
            if (AACodon(mutation) == aminoAcid): ## check: has the amino acid changed?
                ## synonymous mutation? check to make sure a mutation is actually happening before it is counted
                if (codon[i] != nucleotide):
                    ## synonymous mutation
                    numSyn += 1
            else:
                ## nonsynonymous mutation
                numNon += 1
                if (printNonsynonymous == True):
                    print("Nonsynonymous Mutation found:", AACodon(mutation), "-", nucleotide, "substitution at position", i)
                    
            ## reset mutation variable to original contents
            mutation = codon
    
    return numSyn, numNon

In [60]:
## test CalcSubstitutions with a few select codons and extra output enabled/disabled
s, n = CalcSubstitutions("AAA")
print("Synonymous:", s)
print("Nonsynonymous", n)
print()

s, n = CalcSubstitutions("TCG")
print("Synonymous:", s)
print("Nonsynonymous", n)
print()

s, n = CalcSubstitutions("CUG")
print("Synonymous:", s)
print("Nonsynonymous", n)
print()

s, n = CalcSubstitutions("CUG", True)
print("Synonymous:", s)
print("Nonsynonymous", n)
print()

s, n = CalcSubstitutions("CTG", True)
print("Synonymous:", s)
print("Nonsynonymous", n)
print()

Synonymous: 1
Nonsynonymous 8

Synonymous: 3
Nonsynonymous 6

Synonymous: 3
Nonsynonymous 6

Calculating Synonymous and Nonsynonymous Substitutions for: Leucine CUG
Nonsynonymous Mutation found: Phenylalanine - U substitution at position 0
Nonsynonymous Mutation found: Methionine - A substitution at position 0
Nonsynonymous Mutation found: Valine - G substitution at position 0
Nonsynonymous Mutation found: Glutamine - A substitution at position 1
Nonsynonymous Mutation found: Proline - C substitution at position 1
Nonsynonymous Mutation found: Arginine - G substitution at position 1
Synonymous: 3
Nonsynonymous 6

Calculating Synonymous and Nonsynonymous Substitutions for: Leucine CTG
Nonsynonymous Mutation found: Phenylalanine - T substitution at position 0
Nonsynonymous Mutation found: Methionine - A substitution at position 0
Nonsynonymous Mutation found: Valine - G substitution at position 0
Nonsynonymous Mutation found: Glutamine - A substitution at position 1
Nonsynonymous Mutatio

## Function 3: AACount
## Goal: Determine a raw count of each amino acid codon in an entire sequence

In [61]:
def AACount(sequence, calcSubstitutions=False):
    """Determines the amino acid coded for by each three nucleotides (T/U, A, C, G) in a large
        string and returns a final tally of all amino acids. Optionally, can also calculate a final
        tally of possible synonymous and nonsynonymous substitutions over the entire sequence.
    
    Input: A string comprised of T/U, A, C, G, and of any length. 
    
            Optional: A boolean value that determines whether nonsynonymous substitution data will be printed.
            Could be useful if you want to know exactly what the given codon could become after all possible
            point mutations.
            
    Output: A Python Dictionary containing all amino acid types and their associated
            counts such as “Glycene : 2, Lysine: 1” etc. If calcSubstitutions is true,
            also returns the total of synonymous and nonsynonymous substitutions as two
            integer values.
    """
    firstAmino = AACodon(sequence) ## read the first three nucleotides from the sequence
    
    ## declare a dictionary to house amino acids and their frequency (to be returned at the end)
    aminoDict = {firstAmino : 1}
    
    ## declare an integer to track errors
    errorCount = 0
    
    if (firstAmino == "Error"): ## don't go any further if the input is not valid
        print("AACount Error: Invalid codon at the start of sequence.")
        return aminoDict; ## should contain just one error
    
    ## optionally declare variables to track the total number of synonymous and nonsynonymous substitutions for the sequence
    if (calcSubstitutions == True):
        totalSyn, totalNon = CalcSubstitutions(sequence)
        
    sequence = sequence[3:] ## remove the first three nucleotides from the sequence (processed already)
    
    while (len(sequence) >= 3):
        currentAmino = AACodon(sequence)
        if (currentAmino != "Error"):
            if (aminoDict.get(currentAmino) == None):
                aminoDict.update({currentAmino : 1})
            else:
                aminoDict.update({currentAmino : aminoDict[currentAmino] + 1})
        else:
            ## track no. of errors, since sequences can be long this number will be printed once at the end
            errorCount += 1
            
        if (calcSubstitutions == True):
            numSyn, numNon = CalcSubstitutions(sequence)
            if (numSyn != -1): ## -1 means something went wrong, do not add -1 to the total
                totalSyn += numSyn
            if (numNon != -1):
                totalNon += numNon
            
        sequence = sequence[3:]
    
    ## check to see if there are any characters left in the sequence
    ## i.e. if the sequence length was divisible by 3
    if (len(sequence) != 0):
        print("AACount Error: Remaining nucleotides at end of string -", sequence)
    
    if (errorCount > 0):
        print("AACount for sequence beginning with", firstAmino, "finished with", errorCount, "errors")
    
    print() ## for spacing in the output
    
    if (calcSubstitutions == True):
        return aminoDict, totalSyn, totalNon
    
    return aminoDict

In [62]:
## test AACount with a variety of planned and pseudorandom sequences
print(AACount("AAAAAAAAA"))
print(AACount("AAAAAATTTTTTTTT"))
print(AACount("AAATTTCCCGGG"))
print(AACount("AGCTAGGAGTTCGGGGATGCT"))
print(AACount("GCTGCAGTGCTGTCGAGATCATCGCTCGACTCATCTCTATCTCATCGTCATGCTGCGACTATCGCGCATGCTGCGCAGCCAACACACGTACGAACA"))
print()

## should produce errors
print(AACount("AA"))
print()
print(AACount(""))
print()
print(AACount("AAAAAATG"))
print()


{'Lysine': 3}

{'Lysine': 2, 'Phenylalanine': 3}

{'Lysine': 1, 'Phenylalanine': 1, 'Proline': 1, 'Glycine': 1}

{'Serine': 1, 'Translation STOP': 1, 'Glutamic acid': 1, 'Leucine': 1, 'Glycine': 1, 'Aspartic acid': 1, 'Alanine': 1}

{'Alanine': 7, 'Valine': 2, 'Leucine': 3, 'Serine': 6, 'Arginine': 3, 'Aspartic acid': 1, 'Histidine': 4, 'Threonine': 3, 'Isoleucine': 1, 'Glutamine': 1, 'Proline': 1}

AACodon Error: Input was less than three characters in length.
AACount Error: Invalid codon at the start of sequence.
{'Error': 1}

AACodon Error: Input was less than three characters in length.
AACount Error: Invalid codon at the start of sequence.
{'Error': 1}

AACount Error: Remaining nucleotides at end of string - TG

{'Lysine': 2}



## Testing done with Butterfly Genome Data (Collagen IV-alpha 1 genes for four species)

In [63]:
## import data for each gene as raw strings
COLIVA1_danaus_plexippus = "ATGAAGTTAGCAGGATTCGTTATTCGATCGTTTGTTCAAGTTCAAGATCTCGTTGAAGTCTTTTCAATGATCAATTCTTGGTTAGTTCTTAGAGTAGAAGTTACAGAGGCACTTACAGCAGATACAGGAGCCGAAATCCTGGAGAGAAGGAGGAGAACGGGCACAAGGTGTACAGCGAGGAGGCGCGCACGCGCCGGGCCCGACATGGCTGCGCCGCACTACTGGTCGCGCGCCCTCACACACAGGGACGAGCGCAGCGCCTCCCGTACAGTCAGGCGCGGGGCGGGGGCTCAGAGACCGCCGCCCGAGCGCTCCTCTTTCCTTAGTATCCCGGCTCAAACGGCGCGCGCGCACGGGACACTGGCGCAGCGCGCTTTGGGCCATGGCGGTCCACGTACTTTGGTAAGATTCTATTTCAAATTTATTTATTTTCTTAGTTAAATTATAACTATTTTTTTTTCTTGTAAATGCGATGAGTCTTTTGATTAAATAATATTTTTTTATGTCTATATTCTTGGAAGACAATTATTAAAAAAGTATTGATTTAGGTTGCTGGTTTTGGTGACATTTGGTGGAGTTAATACGGTGAGTTAATTTTATTTTATAAAAGTGTCTTTAATGAGCAATTATTATAATAAGCTATAATACATACATTTTAATATAAATCATGAACGCTGTTTGGTTTAGGGAATGTTAGGACAACGAAAGGGCGCTCTGACTGGTCTGTCAGCTTATTTTGTCTTTTTGCAATTTCTTTTAAAGCGCGAACTTTGGCAGATAGTGTTTGAAAGTGAGGCAGTTGCGCTCTAAATAAAAGATTTTTAAAAAATAAAAAAATATTAGGTATTTACTGAAATATGTTTCTGTAACATCAAATATATATTTCATTATAAAACGTAGGACCGACAAAGTTTATCATTCGGATGTATAGTGATATAGAGCAGAACATTTCAATAAATTAATGAAGGAATAAGAACAAGTGTGATAAAAATAAATAGTCCACGTCGTGACTTACTCATGTATAATATTAAATATTGCGCTAAATAATTCATATTGAAAATATGAAACATAATTTACATATTTTTTTATCTTTGTAACCTTTGCAACTTTTTTAATTGAAAATATTTGCTGAGGGGATAATTTTTTTTATTTTTTACAATAATAGTCAAAGTTGATTTTTTTTTTATTAAAAATGAAATGACAATCACATCTCGTATGCCCGTGATCACGGTCGCTGAAAAGTAACCGAAACGTCGGGAGTATGTAGTTTTTTACAAAAATAAATCACGCATAGTTTATCCGAAAAATACTAGTTTCATTTAAATTAAATGACAAATGTTAATAATAAAAGAAAAGGATTTTTTTAATTACAAAAGTGTACTTTGGAAAATGAAATATAAAGAAAATGTTGTGCAATTGATTGTAACAATTTTAACAAATCAAACTCATCTCATGAGCGAACTAAAAGTATAAAGACAAGAAAATAATAGTATAAATCAACCTTTATATTAAGTAAGTGTACATACATATGAAATTAACTATTTCCATTTAATGGCGTATTCTCATAATTATAATATTATATATTATTGAATATTATCTTAAAATAAAAATAATAATTTCGGCAAATTTCAATAATCAGAAAAACAATATTTAAAATATATGTAAATTTCAAGTTTGTTAGGAAACTGAATCGTGATCACCGAGTTTAGAATCACGATGATTTTCGTTTGAGTTTGGGATTACTAAACGAAGTAATAATTTGGTATCAGCTTACATAGCTAACGCTTTGCCAGCAAAATATCGCAAAACAGTGACGAAAATGAGCAGTCACTATATTATACCTATTATAATACATTCATACGATGCGAAAATCACTAAATATATATAATTTTCATTTCATTGTTGCGATAAGAAAATTCATCGGTTTTGTTTCAACTTTAAACCGATAAATTAAACTTCGTTTTAACATGAATAAATTTAATATAACTGACAATTACATAGTTCTATATTAGTTGCGCAATTTATTTTTTGGAATCTTTTAACTTCGGCTTCATAATTTCGAAACCAGAATATGTAAGATTTTTTTGATAATTTCTGTAAATTAAGGAACCACTGATACAATAGAAATTACAAACTTTGTAAGTTAACATAGCCTGATTTCTAACACGAAGCAGAGGCAATGTTAAATAAAAATTTAACTTGTACAAAAATTTATCCTTAACAAAAAGCCATATTAACGCAACAGTATTGTTTGTAAAAAAATACGTCAGTCAAAACTTGTTATATAAAGTTGACCCGATTGATTTATATGTGAAATAGTTGTGATATTAAGACTTTAACATTGTAAGGTATGTTAAAAAAGGACTCGTGTTTCTTACTGACTCAACACAACAGGACTGTGTTGAGTGCTTGCTGCATCTTTCTTGCAGACCACCTGTAAACTATCTTATTAATTTTATTTGAAGATTTAATATGTTATGATTTTATTGATTATAATGCTTACTAGATTAAGTATATTTTATTTTGATTTCTTATATTTCAGCAACGAAACGAATATGAAAGAGGAGACGTCGAAGAAAATAATATTTATGATAATCAAGACTGGATGTATCAGAATAGTTACAATCCGCAACCAACAATAAATTCATACTATGGTTTATCTAGAAGGCAGGACTTACCGACACCCTCTGCCCCCCCTTCTCCTCCACCGGAAAGAGTACAGCCCTCAAGAAGTTTTGGACAAAACTTTGCTGTGTATGACCCAGTAACTCGTCAGCGGACAAATGCATTTGATCGTAATTGTACGGCCCCTGGCTGCTGTGTACCAAAATGTTTTGCAGAAAAGGGTAGTAGGGTGCGTAATCCATTGTTATATCATGTTTAATATTCTATATTATGAATACTTTTGTTACTTAAACATTAGAAAGCTCTTTTAATACTCGTAAGTAATCTGATTAAACAAGATAAGAAATTAATATTAAATATTAATAATAGGCATTTTTTACCTAGGGTTTCCCAGGAATGCGGGGACCACCTGGAATCACAGGTCTACCAGGACACGTTGGCGCTGAAGGTCCACAGGGACTTAAGGGTCAAAAAGGTCAAGATGGACCACAAGGTCCTCGGGGTCCGCGAGGAGAAAAGGGTAAACCTGGAGCTCAAGGATTTATAGGCTTAGCAGGACCACCAGGTCCTCAAGGAGAACCTGGTATGCCAGGGATTCCTGGACGTGATGGTTGTAACGGAACTGATGTACGTTATAAAGCAAAATGATCTACAAAAAAGATCAATAACATACAAAGGTGATGTTTTAAATATGAATAAACTTTTAGGGAGAACCTGGAATGGTGGGGATCAAAGGTTCACAGGGTCCACGTGGATTTGCTGGGCCTAAAGGTAACAAGGGTGATAAAGGAGAGCCGGCTTATATGGGTCGATACCCAAAAGGTGAAAAAGGAGAACCTGGAGCTGATGGTTTACAAGGCCAATCTGGCCCAGCTGGACCAACAGGTCCTCCAGGTTTGGCTGGTCCCAAAGGAATGACTGGACCTATGGTAAGTAATGCTAATAAAATATAACATTCTATAAATTGTTTGTTTAACCATTATAAACTAATTTTAGGGACCACCTGGATATAAAGGTGATAAAGGTCCTAAAGGATCTAAAGGACAATCTATTCAAGGTGATAAAGGAGACCGAGGTGACAAAGGTGACAGAGGACCCGGTTGTCCATCAACAACGTTACCTTCATTGGATAATAAAGGAGCAATAAAAGGTGTCAAAGGTGATATGGGATCAAAAGGTGAAAAGGGAGAACCTGGGAGAATGGGTGAAAAGGGAGAAACAGGTCCAATGGGGGAACCTGGCTTGCCTGGATTAATGGGCATTAAAGGAGAAAAGGGCTTAAGAGGAAATCCTGGGGAACGGGTTAGTATTTTCGTAATAATTTATAGAATTGAAATAATAAAAATTAAATTAATTTTAAATAATTAATCCATTCTTTTTGTTTTTCAAACTTATAATACAAAATATATTTAAATAGGGTCGTGAAGGAATGTATGGTGAACCCGGACCTATGGGAAGAAAAGGTGATAGAGGCATTGATGGACTGAATGGTCTTCCCGGCCGACCGGGTTTGAAAGGAGAACCCGGCAGGGATGGAGCAACAGGTCTAATGGGCTTAAAAGGAGTGCCAGGTCCACCTGGTGGTCGAGCTGGAGCACGAGGTCCACCTGGGCCACCAGGTCCTCGGGGCTATATCGGCGTTGCTGGTGCACCAGGGTCTAGTGGTAGGCCCGGAGAAAATGGATTACCAGGACCTATGGGTCCAAGAGGTGGACAGGGAGAACCAGGTGACACAGGCATTGAAGGTCCAGCAGGTCAAAAAGGAGAAAAAGGAGAACCTGGTCTTGATGGCTTGCCTGGAGAAATAGGTCAACGAGGATATGATGGACCCATTGGTCCTCAAGGACCTAGGGGACTAAAAGGAGAAGAAGGTCAATCAATTCCTGTAAGTTTACGACATAAATATTTGTATAATAGAGATTGCAACAAAAAAATTATATCATAAAATTATTTTTTAATGACAGATAAATTTCCAAAAAATATAATAGAATTTGAAATAATTATAAGTTCATAAAATTAAAAAATGCCTATTTGATAGTGATAATCATATTTATATTTACCACCAAATTTATAATCAGGGTGACAAGGGAAACAGTGGCCAACCAGGAATTCCAGGAGATAAGGGAGCCAAGGGCGAAAGAGGTTATCCAGGATTACGAGGTACACCTGGAAACTCTACATTAGGTACACCAGGAAGTCCCGGAGAAATGGGTCCACCTGGTGAAAAAGGTGAAAAAGGAACTCCTGGGTACGATGGAATACCTGGTAATCCTGGACAAAAAGGTGACATTGGAGGACGGTGTAACGAATGTCGACCTGGAAGTATGGGCGAAAAGGGAGACCGCGGTGCTGATGGTCTACCTGGTGAACGGGGTGAACGAGGTCACATCGGACCCATCGGGATGACCGGGGAGCGTGGTGCTGACGGTATGAATGGAATGCCTGGAGCTGCTGGAGCACCGGTATGTTAAAAAATCATGTTGTATTGAATTTGATATGCATTTTTCCTTTATTTATTATTTTATTTATTAAGGGTGAACGTGGATTGGACGGACCAATAGGACCACCAGGAATGAGGGGAGCAGATGCAATGATACCGTCCAATTTAGTAAAAGGACCTCCCGGAGAAAGAGGTGAACCAGGAGAAAAAGGAAACATGGGACCTAAGGGTGAAAGAGGACCTGATGGAATAATGGGTGATCGTGGATTAAATGGCATGCCCGGACAGAAGGGTGACATGGGTAGAATGGGACCTTCTGGTATAGATGGCACACCTGGTAGTGATGGAATACCGGGACGGCCAGGAATGAAAGGCATGTCCATCAAAGGTGAAAAAGGAATATCTGGTGATCAGGGTGAAAAAGGTGACAAAGGATTTTCTGGAAGACCAGGACTTAAAGGTGAACCTGGTCAATGTCCCAATGAGTTAAAAATTCGCACAAAGGGAGAAAAAGGCAACCCTGGCGTTCCAGGACCACAAGGACCATTAGGTACTGATTTTATTTCATTAATCTTGTCTTTTAATTTTAACAAACCTAAATATACATTTAAAAAATAATAATTTTTATTTTTAAAGGTATGAAGGGTGAAAAAGGTAATCAAGGGCCATTCGGTTTTACTGGTCCAAAGGGAGAGATGGGTTTACCAGGACGAGCTGGACCGGTAGGTCCACGTGGTCTTCCGGGTTTCAAAGGCGATAAAGGTGAAATGGGTTCAATGGGATTTCCGGGAACACCAGGGGATTTAGGCCCTAGAGGTTTTCCAGGGTTACCAGGATTAAAAGGAGACAAAGGTGAGATTGGTCCTTCTATGCCTGGACCACCTGGACCTGCTGGATTAAAGGGAGATAAAGGAGAACAAGGTCCAAGAGGTCAACCTGGAATAGAAGGAAAGGATGGTCCTCCAGGATTAGCTGGCTTACAAGGTGAAAAAGGTGATATGGGATTAATAGGAAGGCAAGGTTATCCAGGACCTATTGGATTAAAGGGCGAACCGGGTCCTATAGGACCATCCGGAGTTCCGGGCATTCCTGGTACGCCAGGAAGAGATGGACCTAAAGGTCAACAAGGATTTCCCGGTCCACCTGGTAAACCTGGTGTAATTGGCTTACCTGGACAAAAAGGTGAACCAGGTATTCAAGGTCCAGATGGCCCGAAAGGTTTCCCAGGACCTCGTGGTCATGTTGGTATGCAAGGGCAAACTGGTCTTGATGGAAGTCCCGGTGAAAAAGGAGATAAGGGTGATATAGGATTCCCGGGTGAGCCTGGTAGACCTGGTCTTGATGGACCTAGAGGATTAGCTGGTGCACCTGGTGAGAAAGGTGATATAGGTTTCCCAGGAAACCCTGGGTTGAATGGATTTATTGGACCAGCTGGCCCAAGAGGTGATATAGGCTTCAAGGGTTCCAGGGGACCAAAAGGAGAACCTGGTTTAGCTTCAGAAAAGGGAGAAAAAGGAGATCAAGGTTTTCCAGGATTACCTGGTGTTGATGGAAGACCTGGGCAAGATGGAGAAAAAGGTGACAAAGGTTTCCCTGGCTATCCAGGTCAAGGCATTCCAGGAAGTCAAGGTGAAAAGGGAGATGCAGGTTTGCCTGGAAAAATGGGTTTTCCTGGTATTCCTGGCGATAAAGGCGACCGAGGCTTTCCAGGACTGGCAGGTTTAAAGGGAGAAAGAGGCCCTGCAGGCAAAGACGGTTTGCCAGGAATGCCGGGAAGAGATGGCAGTCCTGGTGCTCCAGGCCAAGATGGTTTACCAGGAATGGATGGCGAAAAGGGTGAAAGAGGTGATCGAGGATTACCAGGTCGTGATGGTCTTGATGGATTGAAAGGTGACCAGGGTATTGCTGGACCACCAGGGCCAATAGGACCAATGGGTTTTCCGGGTCCTAAAGGAGACATTGGTTTACCTGGGCCATCTATAAATATCAAAGGTGAAAAGGGAGATATAGGTTTTCCCGGTATTACTGGACTTCAAGGAGATAAGGGTGATCGAGGTAGAGATGGCTTCCAAGGTCTACAAGGGGAAAAGGGTGATCAAGGATTCACTGGACAAAAGGGTGAAATGGGTAGAATGGGCGCCATGGGTGAAAGAGGTATTTTATTTTTCATTTATTTAATTAAATATCTACAACACTTTTGCAAGAATAATTTCAAAGTACAATTCAAATGTTTTTTCGAATCCAATATCAACAAGTGATAATTTAATTATAAAAAAATTAAGACAATTTTAACAGTAATTATGTTTATCTAGGTGAAAGAGGTCCAATTGGACCGACTGGTATTCCTGGACTCACAGTAAAAGGTGAAAAAGGTTTACCTGGAAATAACGGAAAACACGGCAGACCTGGCATGCGCGGTGCTACTGGAGAAAAAGGAGAACAAGGATTACCTGGACTTCCAGGTCCAATTGGGCGCTCTGGCATGCCAGGAACACCGGGACCTAGAGGTGAACCCGGTGAACCAGGAAGTGAAGGAGTCGCAGGACCCCCTGGGTTTGACGGTCCTCCGGGGCTACAAGGTCGTCCTGGCGAATATGGTGAAAAAGGTAACAAGGGTGATAAGGGTGCTGTTGGTTTTGGTTTACCTGGCCCGAAAGGAGACACTGGCTTGCCAGGATTACCGGGTTTAAATGGTGAAAAAGGTGATAAAGGAGATCAGGGTTTCGATGGATTAGTTGGAGAGATGGGTGAGAAAGGTAACCAAGGAGAAAAAGGTGACAGAGGCTATCCTGGTCGGCCTGGAATTCCTGGCCTTGATGGTGTAAAAGGAGATAAGGGAGAAGCGGCTGCTATAGTTTATGGAAGTAAGGGAGAACCAGGACCAAGAGGTCCTCCTGGATTGAATGGTCCACCTGGACTTGACGGATTACCTGGTCCTAAAGGCTGGGATGGTGCTCCAGGCATGAAAGGAGATAAAGGTTTCCAAGGACCTATGGGCCCACCAGGCTTACCAGGACCTCAAGGAATAATGGGTATTCAAGGTGAACGTGGTGAAACAGGTCGTATGGGATTACAAGGTGTACCTGGAATACCTGGTGCTCCTTGTGCTACTACAGACTATCTTACTGGCATCCTTTTAGTGCGTCATAGTCAAACAAACATAGTACCCCAATGTGAACCCGGACATATTAAATTGTGGGATGGCTATTCCTTACTTTACATTGATGGAAATGAAAAGGCTCATAATCAAGATCTGGGATATGCTGGATCTTGTGTAAGAAAGTTCAGTACCATGCCATTCCTTTTCTGTGATCTTAATGATGTATGCAATTACGCAAGTCGAAATGATCGCAGTTATTGGCTTTCTACAAATTTGCCGATACCCATGATGCCAGTAAACAACAATGAAATTTCACGATATATTTCAAGATGTGTTGTTTGTGAGGTTCCAGCCAATGTCATAGCTGTTCACAGTCAAACTCTTGATATACCTAGTTGTCCAGTGGGTTGGAACTCATTATGGATTGGATACAGTTTTGTTATGGTAAGACCTATCGCAATTTTATTACATTGAATAATCAATACATAAGTTCGTACATATTATTAAACAAAAAGATTAAATCATTAAATTGTGGTCTAAACATGAATTATTCATTGTATCTCTGATTTTAGCACACTGGAGCTGGTGGACAAGGCGGTGGTCAAGCCCTTGCTAGTCCGGGATCTTGTCTTGAAGACTTCCGAGCGACACCATTTATTGAATGTAACGGTGAAGGTGGTACTTGCCATCATTTCGCCAATAAACTTAGTTTTTGGCTAACAACTATAGATGATAAGAAGCAATTCGCAAAACCAGAGCGTGAAACTCTTAAATCTGGACGACTATTGCAGCGAGTGTCTAGATGCGCTGTTTGCATTAAGAATACCACATAG"
COLIVA1_papilio_xuthus = "ATGACGGTGACAAAGAGGTGGAGCACTTTGGTGCGCGCACGCGCCGGACCCGACATGGCTGCGCCGCGCTACTGGTGGCGGCGAGCGCTGCTGGCCGCCGTTAGGGCGGCGCGGGGGCAGCGCGGGGTAGTGCGGGGTGGCGCGCGGCAGGCGGGGCGGGGCGCGCAGGCCACCGCCAGCCGCTTCTCGCTCCTTAGTATCCCGGCACGAACGGCGCGCGCGCACGGGACAAACGCGCAGCGCGCTCCGTGCCATGGCGCTCCACGTACTTTGGTAACTTTTTAAACTTTAAATATTTTATTTCCTTCTTTTAAAACTTATAATTTTTTTAAATCATTTTTTATAAACGGATCATTAGAATATATTTATTTATAGCTATTTATTCTATGGTATTTCAATATTATATTTATATAATATTTTAATAATTAACTGAAAATTCACAACTGAGTGGCCTGATTTTCATATCTTTTAATTAGGTTGGTGGCTGTATTGGCGCCTTTAACAGGAGTAATAACGGTAAGTAAGATTTTATTTTTTCTATGCTTAACTAAAACTATTTTGTTCATTCTGAAATTTAAAGTCAATATTAAGCAAAAACTGTGCGGAATCGGGAATGACGGAGGAGGCAGGCGCAGGCGCGTCCCGACTCGCCGGTTCGCTTGTTTTATTCCTTTTGCAATTTCTTCAAAACGCGAACTATATCAAGGCTTTGACGGACACTCAAATTAAACGAGTTAGAAATAAGTAAAATTTTGAATGATAGAGCTTTTTCAAGTAAACTTTTGATTTAACAAAATGTAACATTTGATTTACTTATATACATAGTATTTATGTTGTTTACATATATAGTATGTAAATCATTTGTAAGCGCGATGTGAACACCGACAAAGTTTATCATCGGATGCAGTGCCCCCACATTGCGCAAACTGCGCCTCGGTCGCTACAATTATTTTTAATTTAATTACTAATATAGAATGTGTATCGAATTTTTTGTTAAGAAGATTATAAAAGTATAATTTTTTTCATTAAAATTAAATTATGTTCTAAGTGGTAACAAACAATATTTGCTATATAAAACTTTCGTTTTTTGGCTGCTAAATGTTTCTGTAAGTTTATTAATTTAGTTCGATATTCACCTAATACTACTCCAAACTATGGACAAGCCAAAAGTCAAAGGAGTCATTTTTTTGGGCATAAATTTTATGGCGGCGTAAAATTATAAAAATAAATATTAATAAAGATATTATTGCAGTCAATTTATTTTTGTGAAGGCTAAATACAATCATAACATTTCAAATAATAGAAGATGTCTCGTTGCGGTTCAATACATTTAGATATGGCCGATAACCTCAAAATTAATGATAATTTTGTTCAATATTCAGTATATTCCGCTAAAATACGTTAAAACTTTATTGTTACGAATTCAGTGATTTTACTGTTCTTCACTAAGACTGGTGAGCCTAAATCCCAAGAAAGAATTTTTTTCCTACAGGATTTTTTCCATTCTTAAAATAAAAAAAAATATGCAGGAAACACGCTTTATAATTTATAGTCAGTGTCAAATTTATCATCTCGCTGGCCTTTGTACCAATGATTTTAATTAGTAGGTACTAAGCGACTGAAAGACATAAAGTGAAGCATAAGTTCTATAAAAATATGTTACGAAACTACGGTGTCATCTTACAAGCAAATGCAAACGTAGAGAAATGTGGCATATCAAATTATGCTAGTAATACCTAACGTTGTTCATTAGATTTCTGTGAATTGCGCATTGTACTTATTCAATTGTATTAGATACACATAAAATATCCTTCATCTTTGATGCCACACCATGAAGATAGGCAATCACGGCAGCTCGAACTTTCAACGCGGTTATTTTTACCAAATTGTCAGTACCTTTTCGCCGAATCGTCTGTGAAGAGCAGCCGCTGTGAGAGTTCGAAATGTCAGAACAGGGTGGATAATAATGTAATAATAAATATAAAATAGTTATTATTACATTATTATGTACCCTCATAACGTTATGACAAATATAGACAGTAAATATTTTATTACAAGTATTTTGGTAATGGAATTTTACAATTGACTTAACATTTTATGACATAAAATTATGTTTTATATTGAGTATAGATATTTCTTATAAGAGACTTAGCCATTTTAATTAAATGTTTTAAGATAAAAACATGCTGTAATTAACTTTTTTTAATGTTTATAAAATATATGTTTCAGACAATAATCATTTATGTTTAATAATTAAAATGATCAAAAACATTGATTATTTTTTGTATTACTTGTTTTGTAAGGGCATGTTTTTGCTCTTGTATTGAAGTTGACGGCTCAGAAGCTTTCTTCTCTTATCTTTTATACTATGACTACGGCGGCAAAAAAGTTCAGGTATATATTTTTCTTGCAGCAAGATGATGGTTGGATGGACGTTAACGACATAAATGCAAGCCGTTGGCCGCCGACCGATTGGTATGGTACCCGGAACCAACCATCGCCGCCCTCGCCGACGGATTGGTACGGTGCGCGTAATGATCCGCGCAATGATCCGCGCAGTGATCCGCGCAGTGATCCGCGCAATGACCCTCGCCCAGAACCTCCTGCGCCTACAAGATATTACGGGCTGTCAGGGCGGCCAGACGTTCCCGCGCCACGACCGCCGCCCAGCCCAGGCCTTAACTTCGCTGTATACGACCCTGTAACGCGGCAGCGCACCACCGCTGTGGACCGGAACTGTACGGCGCCCGGTTGCTGTGTACCAAAATGTTTTGCTGAAAAGGGATCAAGAGTAAGTCACATTATAGATACACCTTAAGCTTAAATTTCTTTCATATATCAACAAAAGCTAAACGTTACCTCGGATTTAGAAAAATCCTATGTAGTGATGATACCGTCTAGGTTTGGTCTAATTTTCAGGACACAAATCTAAATGTATTCATCTATCATCCTCTAATTTTTTTATTCGTGTTAGCATGGGAGGTAATGTATTTTTGCGCCTTCATGGTGTGTTTGTATGTACTAATGTATTAGCTAATTTATTAGTTTGTATGTGTAATGGTTTTAATATTTCTTCGCAGGGTTTTCCAGGTATTGCAGGTCCACCGGGAATGAGAGGTCTCCCTGGTCACGAAGGTGCTGAAGGCCCGCAAGGACCAAAGGGACAAAAAGGACAGTTGGGTCCACAGGGTCCTCGAGGTCCAAAGGGCGATAGAGGTAAACCTGGAGCTCAAGGTTTTATTGGTCTCACCGGACCGCCGGGACCACAAGGAGAAACTGGAATTCCCGGTATACCAGGAAGAGATGGCTGTAATGGAACAGATGTAAGTATTTAACTTTTTTTCTAACAACTACAAACTACGCCACCAGAACATCTGCTTACCTTTGATTTAAAATTAGGGTGAACCGGGAGAATCAGGTCCCAAGGGTTCGCAAGGGCCCCGGGGTATCGGGGGACCAAAGGGAGATAAAGGAGACAAGGGAGAAGCAGCTCATATGGGCCGATATCCCAAAGGACAGAAAGGAGAACCTGGAGCTGATGGTATGCAAGGTAGTCAAGGACCTGTCGGACCACCAGGACCTTCTGGATTACAAGGACCAAAAGGAAATACGGGTCCAATGGTAAGCCATAAATTTACCAATTAAAATATTTTTATTTTTAAATTGATTTTTATATTTCATTTCTTTATTTTAAAAAATGTGTTATATATTTTTCTTCCTTTAGGGACAACAAGGATTTAAAGGAGATAAGGGCACTAAAGGTTCAAGGGGTCAATCGATTCAAGGAGACAAGGGAGATAGGGGGGATCGAGGTGACAAAGGGCCAGGTTGTCCTCCGATTATATTGCCTTGGGAGGATAATAAAAGCGCGATCAGAGGAGTTCCAGGTGATATGGGACCAAAAGGAGAGAAAGGAGAGCCTGGAAAATTTGGCGAGAAGGGGGATACTGGTCCTGTAGGTGAACCAGGTTTATCAGGACAAATGGGAATTAAAGGAGAGAAAGGCATTAGAGGAAATCCCGGAGCAATAGTAAGTTATAATTTTTGGTGTGCCATACAAACTAAGGTCAATCGTAGTCACACAACATTATATATGACACACATTGCTATATGAAAATAGGTAATTACAATAATATCAACATTAGTATCAAATCATGCCATTCAGACTATATCGGTGGAATGATAGGTTTTTATGGGGAAATATACTTTATTCTTTATTTTTATAAGGATTCTATGAACATTACATACTCATAGCAAACTACTTACAAAATAATTATAACTAAAAGAGCCAGTAAAAACAAAAATTCTCATAATAATAATAATAATAACAAGAAATGTATTTACTAACTTATAAGGCTTTTAGCACTATGCTGAGAACTAAGGTCCGACAATGGTTGGGCCAAGGCAGACAGGGTAGACAATATTGAACGAAAATATTATTCGTTTAGCAGAAATATAATACTTGAAAATTACATTATTTTTATTAAATAATACAAAATTTCAAATCCAAATTATAGAATAGTTATTCAAGTTTAGTAATTACATTCATACTCTATATGAACAAAGTCGCGGGAGTCACCAAGTTAGAAAATGTATATGACATAAAAAATACTTGTAAAACTTTCAGGGAAGAGATGGTATGTATGGCGCTCCAGGACCAATGGGACAAAAGGGAGACAGAGGTAATGATGGACTTGCAGGATTACCAGGACGCCCAGGTGGAAAGGGAGAACCTGGAAGGGATGGGTTACCAGGACAAAGAGGTCTTAAAGGTGTGCAAGGATTACCCGGTGGGCGCACTGGATCTCGCGGCCCGCCAGGTCCTGCAGGACCCAGAGGTTATATGGGCCCAGCGGGACCGAAAGGTACAGACGGAAGACCTGGGGAACGGGGAGAATCAGGTCCAATTGGATCTCCGGGTGGTCAAGGAGAACCAGGAACACCAGGCATAGAAGGACCTGCTGGTCATAAGGGAGAAAAGGGAGAGCCTGGCTTGGATGGGCCTCAAGGAGAAGTTGGTGCTAGAGGTTATGATGGACCAGTTGGCCCACAAGGACCAAGAGGATTTAAAGGCGAAGACGGTTATTCGATCCCCGTAAGTGACTTTTTTACATGTATATTTTTAATTTTTTAGTGTGCAAGAACAAATACACTTATTTATGCGACTACCAAACATTTTTCCTGATACACAAGTTGAGCCGAGCTGTAGTCAATTTGAGGATGCTCAAAAGTGGTTAAATGCTTCTCAGTCGTAGAATGTTTTTGGTTTCAGTTCAGGTAACAACTGTGAGCAATGGATACTTTTGGAAAGTTCCTAGGTGTTACATTAGTAGTAAGTAAATAGCATAAGCATATTTAAATTTTGCCATTTACATTATTTATCAGGGTGATAAAGGCATGAGTGGTCGTCCCGGAATACCTGGAGAAAGAGGACAAAAAGGTGAACGAGGTTATCCCGGATTACGTGGTGTACCCGGAAATTCCACATTGGGAACTCCTGGAAGCCCGGGAGAAATGGGTGCTCCGGGAGAGAAGGGAGATAAAGGTACTCCTGGTACTGATGGCACACCTGGAACTTCTGGACCTAAAGGTGACATAGGTGGCCGTTGTAATGAATGTTGGCCTGGCGGTCCGGGTGAGAAGGGTGATCGTGGTGAAGACGGATTCCCGGGCGAAAGGGGGGAAAGAGGACCTTCAGGTCCTGTAGGCCTACCAGGTGAACGTGGCGCGGATGGGTTAAATGGATTACCCGGTTCAGCAGGCGCTCCCGTACGTATTACTTTATTTCTATTCATATCAGTAGTAAACTTTTAAACAAGACCGTGCTCTTTCCACGACTGAGTTTTCGGTTATTCTTATTGTTTGTGATGATGTTCTACCATATTTGTCAGATATATCTTAATACTATTATCAATGCTGGATGGATGGATGAATGTTTGAAGGTGTCTCATGAACGGTTTGAAATCTGTAAAAACATAGTCTAGAAGAGCATATAGACTACTAATAGTAGTAGGCTTTTTTTATTCCGCGGGGAAGTAGTCGCGGGCGATAAGCTAGTATACCCTAAAACACAATTTAAATCAATATTAAATTCTTCTGATAACAAACACATTTATTTGCATAGGAGTCAATATTAAAACGAAAAAAAAAGCCCTAAATAACATGTAAGTTTTAAATGCTTTTTCAAATTGTGGTTATTGTGAGAAAACCAAATAAGACAGATGAGAGATGCAATTCCTGACTTTTTTGCGGCACTTTTTAAGATCAATATAATTTTAATATGGCATAAAAAGTCAAATTTTGATGGGTTCACAAATATCTACGTTGTTTATTCTATTACTCAATGATATGTTAAGTAATTTATTATTGGATATCTACAAAGGTAGAAGGAGACCAATGGAAGTATGGATGGATTGTGTGAAAGACGCTGTACGTAAGAATCGAGTGATTTCAGGAATGACGGCTTATAAGATATATGAAAGAGTAGTACATGATGTGTTGACCCTCCGTAGGCATTAATAGCAGGATGGTGATGTGGTTTTACAAGACTGGATGAATCATTTAACATTTTTTAAATATTGTGTTTATAGGGCGAACGCGGTGACGAGGGCCCTATGGGTCCACCTGGAGATAAAGGCGCCGATGCTATTATACCATCAAGCTTGATAAAGGGACCTCCTGGAGAAAAGGGACTACAAGGGCCGAGAGGCCCACCAGGCCCAAAAGGTGAAAGAGGAAGCGATGGCTTTAAAGGCGATCGTGGTCAGATTGGTATGCCTGGACATAAGGGAGATCAAGGCTTTTTAGGACCGCCTGGCGCGGATGGTATTCCTGGAATTCCTGGGACTCCTGGAGTACCTGGTGCCAAAGGTGCTTCAGTTAAAGGAGAAAAGGGAACTCCTGGTGATGTAGGTCAAAAGGGTGATAAAGGATTCCCAGGAATCTTAGGAAGGAAAGGAGAACCTGGCATGTGTCCGGCAAATCTACTAGAGTTAACCAAGGGTGATAGGGGAGCTCAAGGACCACCAGGACCTCAAGGATTACCAGGTAACTAAACGGACAATAAAATATTCATTAAGTTATTGATAAAACATGTATGTTTGTTATGTATGTTACAATGAACATGAATTCGAAATAAAAATGTTCGGTAATCTTATAATTCGACACTTAAATGGAAACACGTACACATTGAGTTCGTACTCACACATATACACATCGAGTTATAAAGTTAAATCGCGACTTCGGTCGCGCGATATTAGAAAAAGATCAATAGTTTTTGGACAAAATTATAGGTGCTTACAAAATTTGATAAGAATTGGTTCAGTACTTTTGGAGTTTATCGCGAAAACGCGTCGTGACATGAAATTAATTTATATCTCCAGATGAATTACGATATGGCAAATGCACGGTGTAATCAAGTTATTCTGACATCGTGAGCTAAGCTAACCCCATATTTCAATTTTGTTTTGCATTTTTTTCTTCTTATATTTGATCAATATCGTCAACAGGAAACGCAATTAATTTTTTGCGATTATTGTTATAAGTTTCTACTTACATTAATGAATTTCTTTGAGTGTTATTTGTATCTAAGAAATCGTGTTATTTATTTTACAGGTGACATAGGTGAAAAGGGAGATAAAGGTTTCTCAGGTCGCCCTGGAGATAAGGGTGATATGGGGTTAGTTGGAAGACCGGGTCCAGTTGGACCTCGTGGACTTCCTGGACCAAGAGGTGAAAAAGGTGATATGGGTAGTATGGGCTTCCCCGGAACACCTGGCGAATTCGGCCCTAGAGGATTCCCCGGAATTCCTGGGCTAAAAGGAGATAAGGGTGAAATTGGTCCTTCCATGCCTGGTCCTCCTGGACCTGCAGGTCTAACTGGAGAAAAAGGAGACCAGGGACCAAGAGGAATATCTGGACTACCTGGAAATGACGGACCACCGGGAGCAATGGGCTTACCAGGTGAAAAGGGAGAAATGGGATTATTTGGAAGGCAGGGAAGTCCAGGACTCCCAGGACAGAAAGGTGATGATGGCCCTATTGGTCCTCCTGGCGTACCAGGTCTTCCAGGTACACCTGGGAGGGAGGGTCCTAAAGGACAGCAAGGATATCCCGGCATTCCAGGTAAATCAGGAGTTATAGGCTTGCCTGGCAAAAAAGGAGAACCTGGCCTGCAGGGGCCTGATGGCCCTAAAGGTTTCCCTGGAGCTCGTGGTCGTCCAGGTCCCAGAGGCATAACGGGGGTAGATGGCATGCCAGGGGATAAAGGAGATAAGGGAGAATTAGGTTTTCCGGGCATACCTGGAGTAGCAGGCCCTGCCGGACCCATTGGCCCCGTAGGTGTCCCTGGTCTCAAGGGCGATCAAGGTTTTGACGGACCAGCTGGAACGCCAGGAAGAACAGGTTTATCTGGAGATAAGGGTGATAGAGGTTTTCCAGGTATTCCTGGCCCCAAAGGTGAACCAGGTTTAGCAGCAGACAAAGGTGACAAAGGCGACCAAGGTTTACCTGGTTTGAGAGGTTTGGATGGAGCTCCAGGAATGACAGGCGACAAAGGAGATAAAGGGAATGCTGGTTTACCGGGTTACGGTCTACAGGGAACGCCCGGACAAAAAGGTGACACAGGTCCTCAAGGATTCAATGGCTTACCTGGATTACCAGGTGTTAAAGGGGATCGGGGATACCCTGGTGTACGAGGCCAGAAAGGCGAAAGAGGCATTGCAGGGGAACCTGGCAGACCAGGCACACCAGGTATTGATGGAATGCCTGGGTCAGTTGGTGAGCCGGGATTCCCCGGGCTGCAAGGTGAAAAAGGTGATAAAGGAGACAGGGGCTTCCCTGGACCTGATGGATTAGATGGTCTTAAAGGAGAACGAGGTCCTGTTGGCCCTGTTGGTCTACCAGGACTAAGAGGAGACAAAGGATTGAAAGGCGATCTGGGTCTACCTGGAATATCAATTGACATTAAGGGAGATAAAGGAGATACAGGCCCTCAAGGAAATCCTGGTTTTAGTGGAGAGAAAGGAGATGCCGGTAGAGATGGAGAGCGAGGCTTACAAGGAGAAAAGGGAGATCAAGGATTTACGGGACAAAAGGGAGAGCGGGGAAGAACTGGATTTACAGGAGAAAGAGGTAAAATTTATATAAGTATCATATTCTATCAACCCTTCGTCGTAGTATAAAAAAAATTTTTTGATAGTTGCGAATGCGGTATCATTTCTTATTAATTTTTTAAAGATATTTCTCTATTTTAATTTTATTAGATAGCCGGAGTGAGTCCTTTTTTTAAATACCACTTCTAATTCTTTTGCTTTTTCTCATTCATTTTAATATTAAGCTTTCAAAGCCTACTGGAAACAAGAAGAAAACCTTTAACTACCTATGTTTTATTGTATTATAACAAACCTTTTATTGCAGGAGAAAGAGGTCCTATTGGTCCATCTGGCATCCCTGGACTTACAATCAAAGGAGAAAAAGGATTACCTGGAACACCTGGCAAACATGGACGTCCTGGCCAACAAGGTGCTCATGGTGAGAAAGGAGAAATAGGATTCCCTGGTCTTCCAGGTCCAATGGGTCTACCGGGGCTTTCTGCTCCACCAGGGCAACGGGGAGAGAAAGGAGAACAAGGACTAGAAGGAGCTGCTGGACCACCCGGCGTTGACGGTGTACCAGGTGTACCAGGATTACCTGGAATTGAAGGACCAAAAGGTGATAAAGGAGACAAAGGAGCAACTGGATTTGGCTTACCGGGCCAAAAGGGTGATCAAGGAGCACCTGGTATACCTGGAATTATAGGCGAAAAAGGAGCTAAAGGGGAGCGTGGTTTTGATGGACTACACGGTCAAACAGGCGCAATTGGTTTACAAGGGGAGAAAGGTGATAGAGGATATCCTGGTGCGCCAGGTTTGATTGGTATGGCTGGTGAAAAGGGCGACAAGGGCGAAGATGCAGAGTTTATTTTAGGAGAAAAGGGGGTAACAGGACCTCGTGGTCCTCCTGGGTTCGACGGTCTACCAGGCAATCCTGGACCACCAGGTGTGCCAGGCATAGACGGCACTCCAGGAATGAAAGGCGATCGTGGTTATCCAGGTCCTCAAGGTCCTCCAGGCATACCCGGACCGCAAGGTATTCAAGGTTTACAAGGTGAAAGAGGTGAAACTGGTCGTACCGGTGCCACTGGGATTCCCGGAACTCCAGGGGCTCCATGTGTTACAACTGATTATTTGACGGGTATTCTCTTAGTTAGGCACAGCCAGAGTGACGCAGTACCACAATGTGAAGCTGGCCACGTCAAGTTATGGGACGGATACTCCTTACTTTACATCGACGGCAATGAAAAGGCCCACAACCAAGATTTGGGTTACGCCGGCTCATGTGTCCGTAAATTCAGTACGATGCCTTTCCTATTTTGCGACCTTAATGATGTTTGTAATTATGCTAGTCGCAACGACCGAAGCTACTGGCTGTCCACTGGCCAACCAATTCCAATGATGCCAGTTGAAGGCAATGAAATAATGCGATACATATCAAGATGCGTGGTTTGCGAAGTGCCAGCAAACGTTATAGCAGTACACAGTCAAACACTGGACATTCCGAGCTGCCCGGCGGGTTGGCGAGAACTATGGATCGGTTACAGCTTTGTCATGGTGAGTTATCTCTTTAATTATTTTGTACAATGTATACAACTATTTAATATTTCAAAAACCAAGCAGCCAAACTGATAATAGCATTACATCTATTTCCTATATGTTTGGTAAACATTTTATTTACCTAATTTAATTCCGTTTTATCAATTTCGATGGTACAATTATAATACAAATCGAATGACGGGATAAATTAAAAATAAACGTACATACATGTGATAGAATAATTTATAATACGTTTTTTTAGCACACGGGAGCGGGTGGTCAAGGTGGCGGCCAAGCATTGGCTAGTCCTGGGTCATGTCTCGAAGATTTCCGCTCTACGCCGTTTATTGAATGTAACGGAGAGGGCGGCACTTGTCATCACTTTGCCAACAAACTTAGCTTCTGGCTCACGATTATTGAAGACAATAAACAATTCGCAAAACCTGAACGTGACACACTTAAATCTGGACGATTATTGGAGCGTGTCTCGCGGTGCTCCGTTTGCATAAAAAATACCGTTTAA"
COLIVA1_papilio_glaucus = "ATGGTAATTGACGATTCATTTTTATGTAAAGCGATTTTATTACGATTACGAATAATTAAAGCACAATTTTTGTCCATAGGTCGTGTGCAATCAGACAGTATGTGACTGTTCCGGTTTAAAGGGAGATCGTGGTGACCCAGGTCCTCCTGGCATTCCTGGACTTCAAGGAGACTACGGTGATGATGGACCCGAAGGCCAGCTTGGCATAAACGGCGAACCAGGTGCCTGGGGCGAGAAAGGAGATGTCGGTGACAAAGGCGAACGGGTAAAATCAAATCTTTATTAATTTAAATTATATTGCATGTAAAGCTTGTTATGATTTATATTTTGATGGACTTATTATTCTTGTTTTTACAGGGAGCGGACGGCGCCTACGGGAGTAGGGGATACACAGGACCACAGGTATATTAGTATTACCTTATTTATTAATTATAATAAACGCGTACTTATTACACTCCTCTGTTTTATAAATACATGCATTATTTACCATTCCAATTTAATCTATATTAAACAATAAGTGTCATATATAAAGAATCATAAAGAGAGCCCGCGTTGACATTTCACTCCATCTTGAATAGGTATATCGGGCATTTAGTAGGGCGAGAAGAACGATATGTCACAAAATACGTTACAATAAGGATGTTACAAAATATATTTAAGGATTAATGGTATAGTCATTAGGAACACTGAATAAAGAAGCACTCCACCAAATACGAAACGTGTAGTTGCGATGCACCCTTATTGCCATGTATTTTGGTGTTGCGTCAGAGTGAATTACAACACACAGCCCTCGCGTTGACAAATCGGCGTCATTCACTTTCTTCCAGAAGCCAGAAGTGCCATCATGTACTACTTGTATATGTATATTTTATACAGTAATTTAATGTTTCCCTTCAGGGGCCACACGGATTGGAAGGCGTTCGAGGTGTGGCTGGATTCGACGGCTGTAGCGGTGTCGATGGTGTAGTTGGACCACCTGGAAGGCAAGGTCCTCCCGGTGACAGAGGTCCCCCCGGACCATATGGAGAAAAGGGGTCACAAGGTTTAGCCGGTGAAGGAGGAGTCAATTCAAGGGGCGCAAAAGGAGTTCAGGGTGACTCTGGTAGTCATGGCATACCTGGACCGCCTGGCCCAATGGGTTGGAGAGGAGACAGTGGTCTCCGTGGTGAAGAAGGTGACGAGGTAAATTTTATCTAAACCTAAACATCTCGAAATTTTTGTTTTTTACGTACAAAAAAAAAATATTATTTTTGTTTCTCAGGGTATTATGGGTATCAAAGGAGAACCAGGTTACAGAGGCGATCCAGGTGATATTGTGGTGGGGCCACCAGGAATAAAAGGAGATCAAGGAGAAACCGGTGAACCTGGAAGACCAGCCACCATAGTCAACATAGATACCTTTAAACATAATTTAAGTATTATAGCCAGAGGAGACAAAGGCGAGAAAGGACTTAAAGGAATGCAAGGTGTAAAAGGATTAAAAGGAGAAATAGGTTCAAGAGGTCAAATGGTAAGTAAAATATATAATCTGAGAAATTCTAAGGAGCATATTTAAGGAAAAGAGAAATTTCACATTAAAATGTATAATATATAGATAGATAGATAGATAGATAGATAAACACTTTATTGCTCACATTACATTAAATACAAAGTTTTGAAGTTGTGTCTTTTAACAAAAAATACGATTTTTTCCTACTTAAATACATAGAATGTCGTAATTTACGCCCGTTATAAAATATCGTAATACCTAATATAAACAAAATCTAACGTTTTATAAAGCGCTAATGTCACTACAATAAGAGTATACCACTATTTACCTCTCAATATAAAGGCACTGAACACTAAAATCTTTAAAAAGCTGAAGAACTGGTAGCTGGAACGTTGATGATGATGATTTATTCGATTATGTTTTTTTATTTATTTATTAAAAACACTTAAAATGTATTAAGTTTGGAATTTTAACTTGTTTGCGTTACAGTTTATACCTAGCCACTGTAATATTGCAGGATGGGGTCACTGTTGATACTGTTATACCTATATCACCTATATGTATGTAATTCATAACTTAACCATTGTACACAGTTTTGCAATAAAATAATTAATTGATATTACGTGGTTTTGTGAACATTTTTTTTTCGTCTGGTTGGGTAGAATTATATACAATATATAAAATAAATACATATTTCAAAATATAAACCATAAAATTAATACCATCTAAGTATTTCATCAACTTTTCTTTGATATCGACTGACATTAGACGACTGACGACAGATCGTTATTTTATTATTTATTCAATGTAATGCAATTTGACGCTTTTATGCTTTTAGGGTCCATATGGTCAAAACGGTCCACAAGGTTACAAAGGAGATCAAGGTGAAGAGGGCTCTAGAGGAAAACCTGGTCGCCGAGGAAAAGACGGTCCTCCTGGTCCTAAGGGCGACAAAGGGGCACCTGGGTTCCTAGGCATCGATGGAGAAGATGGGATACCTGGGGGACCTGGAGAAGATGGTAGACCAGGGATTCCCGGCGAGCAAGGACCTATAGGAGAACCAGGAATTTTCGATGAAAGATTGAATGAACCTTTACTACCAGGTGCTCAAGGGCCGCAGGGACCGGTAGGTTTTATGGGACCAATAGGAGCCCCAGGAATTGATGGATTCAAAGGAATACCCGGAGTTATGGGACCACCTGGTTATCCAGGTGTCAAAGGATTTCCTGGAAGAGTAGGAAACCCAGGAAAATCTCCCAAGGGAGAACCGGGGAGTGACGGACGCAAAGGTCTACCTGGGCCCCGAGGACCCCCAGGTTTACAAGGATTGCCTGGTACATTAGGTCCAAAAGGTTTTAAAGGTGCAACAGTCAGAGGTCCTGATGGTGAAGATGGCATGCCCGGCATAGACGGTGTCCCTGGACCAAGAGGAGATAGAGGTGAACCTGGGTTTATGGGTCCACCGGGATATCCTGGCCGTGGGGTACATGGAATGGGACCCCAAGGAGAAGACGGGCCACCAGGGCCGCCTGGTATGATTGGTGACCTAGGCACCCCAGGCCGACCAGGTTATCCGGGGTCGAGAGGACCGAGAGGTGATGACTGCCCATTCTGTAAATCAGGTAAGTTGTTTGAATCTCTGTTAGTATTTTTCAGCCATCGTTTAAAATGTGCCGTTCGTCTAGAATCGTAATTTAGCATTATAGACAATAATTTACTTGTTTAAGGTAATATTAATGTCATTTCTGTGATATTGTATATAATTTACATTAAACACAGGTTCACCTGGACCTCAGGGCCAAAGGGGCGATGACGGATTCCCAGGACAAAAAGGATATCCAGGATATGAAGGTTTACCGGGACCACGAGGTCAAGTTGGGTTACCCGGTACTTCTGGCATACCAGGTGCAAAGGGTTCAAAAGGAAATAAGGGACAATCAGGAATGGCTGGACCCCAAGGGCCGAAAGGGCAAAAGGGACGACTTAACCACCCACCGTTGGATTTAACCTTAGCAGAAAGAGGTCCCCCGGGAGATCAAGGTGAGGAAAATTATTTTACCAAAGATAAAACGCCTTCCATCTTTACATCATCGACCGTGGACGTATTGAAAGATAAAATAAGAAATTAAATCCTACGAACAATTTCTCTTATAGGTGGACGATGGTTGAAGAGGGATGAAATGTCCACCACAAAAACTTCTTTTAGCATACCAAAAAAAATAAGAAACTACCTTGACAGCAGTAGATGAATTAGATTAGACGCACATATTACATAGTAAAAGCTAATTTTGTAATGTTCACAGATTTTTTTCCTTAACAAATATGTAATTTATATAATCTTTCACTACATTGTTAACCATTCCTATTCTTGATAATTTTGCGCATGAACATATAAAACTATTTTTTTCCATATTTTTTTCCTTTAATAGGTTTCATAGGTCCACAAGGCCTACCCGGTGATGCAGGTTGGCCTGGACTTAGCGGTGACGTCGGCCCAATCGGTATTAAGGGAAGAATGGGAGATTTCGGCTATCCCGGGCTTCCTGGACGTAATGGATCAAGAGGAAGGGATGGTGCTCCTGGGTGGCCGGGTGCGCCTGCTGATATACCGATAGCATTTCTGCGTGGAGAGAAAGGGGATGCTGGGATAAAGTAAATCTAAAACATCACCGTCTAATATAACAATAATTTAAACGTTAAATACGATCCCTTGTACAGTCCATAAATGTTTAATTAAATATTTACTGTTGCCATTCCAAAAATCTTTTCGAATATGTTTAATATGTACATGTCACTGGCCGATTGTCACAAATAATTGGGCTAGACAAATTCTAAATTCAAAAAGGCACATTCTCAGTTAAACGAATCATTGACAATATGACGTTCCATTTCCAGTCACTTTCTAATGGGGCGAAGGCTAAATTTAAACCTGCTATTTATTTTGACAAGAGTGAGAATTGAAGACAATGAAAAATAATTATTAGTGAATTCAACGATATCTTTTTTGTTTAAATAATTTATAATCTTAAAAAAACCTGCGTAATATTGTTTGTGACGTCTATATCCAGTTGATATAGGTGAAAATTCTTTTGGTTATTTTTCACTGTCCTGACCTGGTAGAAAATAAATAACATATTTAATTAATTAAATAGTTATAGTCTAATGAGTGAGGTGCCGGAGGCCTAATTTCAGTCCTCTTCCCCTTCCCACCCTTTCTTCATAAGGATAGGATGGGCTCGGGAAGAGGATTGGACAGGGAGGGGGTTAACGGGGAAATGGAAATATTTTTTTTCCTGTCTATGCCTCAGTCCATTAATGGTAGGTAACGCGTCTGCAGCACATTTGCTTGTTGCCGATGTCTGTGGGTAGCGCTCACTTAACATCAGGTGATCCGCTTGCTCGTTTGCCACCTTCTCCCTTAAAAAAAAGTTACTAATGCTACGAAAAAAAAAATCACATTTAGTTGTTTACGTTTAGCGACTGGCAACGAATCAAAAAAAGCGGGTCTCAAGTTCCGACCAATCAGCGCTCAAAGTTCTTTTGTAGACAAAGACATTCATGTCTATTGGATACATATGCGTCTGCCTTACGTCGCCTCTTTGTAAAGTGCCTAAAAAAGAAACGGCGCATTCTCAAATTCGTATATTGGTATTTCCTTGACTGATGCCGACTCCAAAACTAAATATTTTTATTTGTTCAATTTTTTTAAAATAAGATCTCACAACTTTTTGTTGCGGAAAAACTGCGTTCCGACACTTGGCTTTTTTAGATTTTTTGTTTTTGATGGAATCTGCTCAACAGTTTGACATTCTTTAATACGCAAAATCTATATTATATATAAAAATCAATTGCTGTTCGTTAGTCTCGCTAAAACTCGAGAAGGCCGAACGTATTTATCTTATCTTGGTGTTGAAATGTTCGTGGAGGTCTAGGGAAGGTTTAAAAGGTGAGAAACAATCGAATAATTGCCGCGAAAACCCTAACAACAGCCCTTTTCGTTTTCCCATATAAACGTTTTCTAAATAAAATGTAGAGTCAATTTGAACTTTATTGATATTCAATAAAGTTCATATTAAATTATAATCACTCATGTTGTCTAAGAAATGTAGATGTGTTCCTAATTTAGAAGATTCAATTTGCAATTTATTGCCGCTGCACATAGTTCAAATTCAATCATATATATCAAAAAACTTTTAAACAAAACCGACTTCAAACAAACTGTATGCAATAACAAGAAATGCACTTAAAAGTATAAAATAATTTTACGCAATTAAACATAAGAATCGATTCTTAATCATATTTCTTTTTTGGAGTCGGTGTCAGCTGTGTTTGATAGTGTTCGGATAAACAAATTAAGTGGTTCCCAGGCGAAGCCGGGGCGGGTCACTAGTAAACATTAAAAACCCAGATTTGAAAACTCAAAACCTTTTTATTTTTATAGATTATTAAGTAATTTCTTTAATTTCTTACACAAATTATGAATTCCAGAGGCGAAGCTGGAGACAATGGAGAACCTGGTCTACCTGGCGAGGTTGGAGATTCCTTAGATTCTGACATTAATGTTAAAGGTGAAAGAGGGCCCCCAGGAGTCATTGGCAGAAAAGGTTTGTATGACATCAAAACTAACTATTGACTATAAATAAATAAATAAATATGCGCTCACTTCCGTAACCCAGAGGGGTAGGCAGAGATCATGGATTTCCATTTGGCGCGATCCTGGCACACCTCTATCGCTTCTTCTACATTCATACAGTACAGTACAGTAGTGAGTAGTCAGAATGTGTGTCAGGCTCTTCTGGCAATACACAATACTGTACTAGTCCCCACTTTAATGTATGGAAGTGAGAGCTGAGTTTGGCAGCATGTTAGTAAAGTTATTGCTGTTGAGACGCGATCCTTAAGAAGTATGTTAGGCGTGACATTGAATGATAGTATTAAAAATAGCATTGTAAGGGAAGTGTGGCTTAAAAGTGAATTTTGTGACTAGAATTGAGAAAGGTATGCTTCGGTGGTTCGGGCATGTTGAAAGGATGAATGAAGAGAGAATAATGAAACAAATTTATGTTGCAAAAGTGAGTGGCAGTGTTGAGAACTTATATAGATCAGATTATGCTTATTAAAAAAAGGGCGATATAAAGAGTACCCATAACCGACGAGCATGCATGCGTAGATGTATGAATGTAGAAGAAGCGAGAGAGGTGTGCTAGGATCGTGCCAAATGAAAATCCATGATCTCTGCCTACCCCTTAGGGTTACGGGCGTGAGCACATATTTATTTATTTTATATGGTATCTTGGATTTAAAATGATACTGTATATATTATATTGCATTGTCATCAGGGCAAAAAATTGGAAGGGTATAAAATTTAAATTATTAAATTTGACCCAAAGAAACATGTAATTTATACAGAGTGTTAGTGACGGATACACAATAAACGAAAACGCAACTTGTTGATGATTGATGCCATAAAATCTTTAAAAAAATATCACATTACTGAACACAAAAAAATAAAAATAATTCAAACAAAATAAATAATACATAAATCGTTCCGAACCTAAGCACCATTGCTAGCACGAGTTCAATAACCTATTCCGTCACCCACCATCGGTTAGACCACGCGCGGGGGTGCGCGTCATCTCTCATTTTTAAATTTGGACCGTTAATTGTTTTTTTTATAATGATAAATAAACAGTGTTTATTCTTTTTTATAAATCAGAAGAAAAATTGAGATTTAAACGATATTATAAAAGGTTACAATTATCGCAAATAGTCACCGCTATCACTCCTAACGGGAATACGAATGATTACCAGTCGAATTACCAATGATTTACATATAATGTTTCGACAAGCTACAGATTTTGTGAAAGTAATTAAAAATGTGGTCCTGTGACTTTACTGTTTAAGATATTTAGCTGTCTCGTAAATACGTAACTGTTACACGCATAGTAATGCGTGGTTGTTTTTTGTATATTAGATAATGTTTCGATAAACAATTATGTGTGATACACAATGTCTCGTAAGGGAATGGAAGCGCTCAAAAATACACAATTTTTTTCCCCTAAAAGTACCTTTTTTTACTTTACTGCACTTTCAGGTACACCACAAATGATGGAAATTTGTAACGTTTTTGGTATTATATAACCCTCACAGACTACACTATTATGACTTCAGCAAATAGTATGAGTACCTATTGTACATTTTCGAAAAAAAATAACTAAAAATTTTACCATGATATTTAACAGAATTGAAATTTGTAAAGCTTGTGAGGGTTTCATGAAAAAAAATTATGAAAATAAAAACCAAAGTGAAAAATTATTTAAAGGATGTGCACAGTTGAAACAGTTTACAAAAGTTTTTTTTTAATAATAGGTATTCATACTGTTTGCTGCATTTATAATAATGTAGTCTTATGAGGATTCAGATTACATCGAAAAAGTAACAAATTTCATTAATTTGCAACTGTAGTGTGGTGACTGTGCGGTGGTGGTGTGACTGACATTAATGTACAGTGAAGTAAAAGTGGTACATATTATATTAAATAGTGTCATATTGTGGATGCCAATACATCAATGTACCTTGTAGCAATATACATATATTTTTAAAGAGCTTCACATTTTAAAAAAAATGTGTATTTTTTCAGTTCTAGGATTATCCATACATTACAGCATACTCTGTATATAAAGCGGACTTACTACAACAATCTGACAAGATGAACACTTAATAATTTTTGTCTACAATTGTCACATAGGTTATCGCGGAAGAAAAGGAGAACGTGGGGACATGGGATATGACGGATTCAGAGGCACTCCCGGTGATCAGGGACTTCCGGGTAGATCTATACAAGGCCCTGGTGGAATAAAGGGATTTGCTGGTGAAAAGGGGGAAATAGGTCCTAGCGGAGAGGCAGGAACAGTCGGTCAACGTGGACTCTCTGGATTTAGTGGCAATAAAGGGCATAAAGGATCAAGGGGCGATGTTGGCCTTGCTACAATTTTTGGTGAAAGAGGTACAGATGGTTACCGCGGGGAGCGGGGTGATGTCGGTGAACCAGGGTACCCCGGCACACCGGGACGCGGCGGTGTCATCGGAATTAAAGGAGTTACTGGACAACCTGGCGACATTGGTCCAGAAGGACCACGGGGACCACCAGTAAGATATTTTATTTACAATGTACATTCATAAGATCATAAATTATTTAACGTTACTGATTTATTGGCTTAATTCTAAGTGTTGTACCAATAACATGATCAAAAAACAATCAAAGACAATTAAATTCAATTAACAAGAACAAACAGATGAACAGATAAAAGTTACACGCAATATTTACAAATATCCATATTACATTTTTAAACATCCATAAATAATAGAACTAAAAACTAAAGATCCAAAACATCAAAAAATTAAAGAACTTATTAAATTATATTTTACGGGCCTTCGGACTTATTCGTGGAAAAGGATCATGATTTTTGGATAATTGTATTTGATAGAAATTCTATAATAACAGAATCTTTACCTTTTGTGCCACACCAGGAACAGTATATAGAATAGGTTGTCGTTAATCAAGATCCAATGTGTGTATGCCTGCCTACCTACACGTACTTCGCGATTACTTGGAATTATAATATTAAATAAATTGATTTGGTTTGATTTTTTCGCTGTTCATTTGCTTTGTCTACTTAAATACCACTTTATTGCATTCTAGAATCGTCTTTAGCGTCCTTTTTAACATATACAGCCATTATCAAGTATAGTGCCACGTAAAAAGTACGTAAAAAGTACGTAAAAAGTACGTAAAAAGTCATCTTATAATGAGTTAAAACCCTATAAAATACATGTTAGTTCTTAGTTTTCTTAAACATTTAAAATTTACAGGGTCGAAAAGGATTAACTGGTATCATAGTGCAAGGTGCTCCTGGAATACCTGGACGACCAGGATTCTCTGGGCCATTCGGTCAAATTGGTGAACCTGGTTTGCAAGGATTAAACGGCTTGCAAGGTGATGTTGGACCTAAAGGCGTAAAAGGACAAGCAGGCCGTATTGGAATTCGTGGATGGACCGGTGATATGGGTGCAGTTGGAGTCCCCGGATTCCCAGGAGTTATGGGAACACCCGGATCTAACGGTGAAGTTGGAAGTCGTGGTGAGACTGGGGCTCCTGGATATCCAGGGGCACCTGGAAAAATGGGCTTTCCGGGATCTGAAGGAGTTAAGGGTAAACAAGGCGACACAGGTTTTGTGGGATTACCAGGGGTAGATGGTTTAAATGGTGTTAAAGGAGTCATAGGAGACCAAGGATTTCAAGGTGTAAAAGGTTTAAGAGGTGAAAATGCTTTAGTCGGAATGAGGGGTGATGTTGGAGAACCAGGGCTCGATGGGGTATCTGGCATACCTGGTACCTTTGGACTCAAAGGGTCAAAAGGAGACTTAGGAAATCCTGGCTTAGCAATTGGAGGATACGAAGGTATAAAAGGGGAACAAGGGGAGCCTGGATTACCAGGTCACCCTGGTTTCAACGGAACCAAGGGTGAATTCGGTACCTATGGAATTGAGGGTCCTCGAGGTGATATTGGTGATAAAGGATATCCAGGCCAACAGGGACGGCCCGGGAGAAATGGATTTAATGGTTTAAAAGGAAGAATGGGAGTGCCGGGTCTTGTTGGATTAACTGGAGAAAAGGGAGACTTAGGACAGGAAGGGGAGCCAGGTATTATGGGTAGACCCGGTTTTCCAGGAGACGAAGGTATGCAAGGTCCAATGGGCAGCAAAGGCGAAATGGGTTTAAAAGGAGAACGCGGAGACATGGGTATAACTACCCGTTTACCGGCTACAAAAGGTGACATGGGAGACATAGGAATCGATGGAATACCTGGAACAAAAGGACAGGTTGGAGACGACGGCTTTTTTGGAATTAAAGGAATAAAAGGTGAACGTGGTGACTTTGGTTTCCAAGGAGAAATGGGTGAAGATGGACTACCAGGAGAGAAAGGTCCTCTAGGAGATCGAGGTCCACCGGGATTACCTGGTTTAGATGGAATTGACGCTGAGAGAGGACAAAAAGGTAGAGCTGGAATTGATGGACTTTCTGGATGGCCTGGACCAATGGGACAAAAAGGTGCTCCTGGTGAATATGGTGTAACCGGTCCTATTGGTGAACCTGGACCTCCAGGATTAAGTTTCCAGGGTCCGAAAGGTTTTAGAGGAATGGACGGTTTTAACGGTAGGCAAGGAGCGCCTGGAAAACCTGGACCTAGAGGAATGGAAGGTGTTCCTGGATTACAAGGAAAACCAGGAGAAATAGGTGAACTAGGTTCATCATTTAGTGCTAAGGGTGCCAGAGGTGATCAAGGGTGGCAAGGCTTAAATGGCTTCCAAGGAAATAAAGGTGACATGGGTGAAGCTGGATTTGATGGCTATTCAGGCTTGCAAGGCCCTAGAGGTATTAAAGGAGAAAGAGGAGATGAAGGAATGCAAGGCATAACAGGTTTAAGAGGACTAAGAGGAATGAAAGGGGATAAAGGTAAAACTATTCACCCCTCGGAAATATTGCCCGGCCCAATTGGTGATATCGGACCCCCCGGATTTCCTGGACGACGCGGTGAAGATGGCCCTCCAGGTTTTATGGGACGTAATGGAATAGTCGGTTTTAAAGGTGAAAGAGGAGAAATTGGTTTTATGGGCGCTGTAGGACTTCCAGGACATCAAGGCGTTCAAGGAATAAAAGGAAATGTAGGGTCTGTAGGTTTTGAAGGTTTACACGGAGCCCCAGGAGCTCCGGGTAGGCCTGCACCACCACCGCCGATACCGAAATCAAGAGGTTTTTACTTTACAGTACATTCTCAAACCAGAATGATTCCAGAATGTCCATCAGGAACTTCACCTCTTTGGGAAGGATTCTCACTCATACATATAGTTGCAAATGCAAAAGCCCACGGCCAAGATTTAGGTTGGTATTTCTTTGGTTTTCCCTATTTTATGCAAAATATTGTGAATTCATATTTTTAGGTCGCTTATATATACATATTATAAAAATATAACTGATCGCTGTCTGTAAATTGTAGATATTTGAGAAAAACTATTAACTAAACGCAATAAAACCCAAAACAATGATTATTAGAATTTTTGTCTGTTTATCTGTGCGTTTGTTACCGGTAATCTCAGAAACGGCTTAACCAATTTGGGTGCGGTTTTCCCTAATGTATGTAAGCTTCACTAATCATTTAGTGTTTGTTTTATCTCAATCGGTTTATAAATAAAAAAGTTATACACATTTAACTGTGCTACGGCCATACTGTGTCAAACCCACATACGCGGGAAAAGGGCAGTGAGATGATAATGAGGTTACGGTAGTATTAAGAAGGTAATATCATTTCGTTTAAGTAAAATTGCAGGGCGGATGAAACCGCGGGGCACAGCTAGTTCTGAATAATTTAATAGTATTCGATGTTAGTACTACAACGATTAAAATCTGTATATTTTTTTTCGAATAAGAATGATAATTATTGTTTGACAGGTGCACCTGGCAGTTGTCTACGAAAATTCTCCACGATGCCCTACATGTTCTGCAATCTTAACAATGTTTGTGACTTTGCTCAACGAGAGGATTACAGTTTCTGGCTTTCTACGCCCGAGCCAATGCCAATAGCTATGAAGCCCATTCAAGCGAGAGATGTTGGCTCTTACATATCGAGGTTACAATTCTATAATTCCTATTGTTTTTATTAGAAAGCATTAAGAGCTAAAACTATAATCCGTATCAATTTAACCTTAAACTTATTAAATAAAAATATTTTAACACATATTTATCACGTCTTTAAATTAAATTATTCAAGTTTATAGTTATTGCATTATTTTTAATTAAATTCTACATTAAGAATAAAATACAAGAAATTCGACCTTTAAAAATCGATTTTTTTTAGATTTTTAGAATTTTTTCCTTGCCTATTGGTCGTATGATTTTTTTTTCCTTATTTGTCATCTATATTTATAAACCGATATAAAAAAAAAGTTTTTGTTGTAAGGGGGACATCTTTTCTTAAAGAATATATGGGACACTCTTGTGTCCCATATTATTTTGAATGGCAACTTTAGATTCCGAATATATATTGCGCATCGTGAGACTCGAGCTCAGTTTGTTAACGTCCACGTAAACTTGACCCCTAGGATCTCCAATGGCAAGTTACGCAAGGGTTGAGCCTAATTTGGTTAATCGAAAAAATTGCAAAGGGACATTTCGCTCCAGCATACTGTAATTCCAAGTCGGTTTTATTTTTTAATTAACGCAGTTTAATTGATTATTATTGTTTTAGATGTCAAGTGTGTGAATCATCGACACGAACAATCGCCATACATAGCCAAAGTAATGAAGTGCCAACGTGTCCGCAAGGTTGGGATGAATTATGGATTGGGTACAGCTTTTTAATGGTATGATTATCCTTGAAATTTAAGCTAATCAGAGTTAATATATTCTTACAACCTAAATATCTTTATCGGATATGCAGTTGGTCCCTGGCATGACATGGTCGTATATCCTTTGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTCATAACGCCCTTCTTACGTAACGGTAATGACGGTTTCATAGTATCACGTTTTCATTAAAAATGAAGAAATATTTTCTATAATATTCGACAAAAATTTTGACTCAAAGATCATCAAAATTTTGGTGTATAATACAGTTTAGAGTTAGAATAATCATCATTTGCCTGCAGCAGTCTAGTGCTAGGCAAGGGCCCTTCCTACCTTGTGCTGTTCGTGACTGATTACGTCATCACAAGAGTAACTGTTTCAATTTGTTTTAAGACTTCTTAGATATGTAAAGATGTAGTAATATGAACTCCTTCCTTTCGCAGCATACGGCAGGCGCAGACGCAGCTGGACAAAGTCTAATATCACCTGGATCTTGTCTCCGCGAGTTCCGTACACGTCCATTTATAGAATGTAACGGACTCGGTCGATGCAATTATTTCGCCACAGCAGTTTCCTATTGGTTGTCGACCATCGATGATAATAGAATGTTCTTACGACCCGAACAACAAACGCTCAAGGAAGATAAAGTTTCTAAAGTTAGCAGGTACGTCCAAACTTTACATCAGAAAATAATTTACAATTAATATAAAAAAAATACTTAAAGTCTCTTGCACTTCTTACAACCCATGTATCACAGCAGTGGTAAAGTTAGTGCAATAAGATAAACTGAATTAGGGTTAGGCGGCAACTTTAAAGTGAAACAACATTACACCTTCTATTATGGCAAAAAAAACTAGATATCGATCGATCGAAAGAATCGTTATTTACTAATTCATAGGCATTTTTTATAACATGGCAACATGTTCTTTTTTGCGTAGGTGGATGTCTATAGACAATACTAAAGTATAAATACAAACTTTAGCGGATAGAATTCAAAACGCTAAAATTATAATATTAACTAAAGTAAGCTGTAGTTAAAATATATTACTTAAAAAAAGGAAACTTTTTGCCTGCGGATAATAAGTGTGGTGTTGTTATCCGCAGGCAGAAAGTTTTCCTATGAATAACTCTCTCACTTTCGCCTACCAATGCAGGACTGGTAGATAAAAATTAGAGATATCACTAATCGACGATTTTCACCTGGCTCACCTGGCCAATACTATTATCAAAACAGTCGTAGGTACTTATGTAAGAACGATGTGGAGGTTAGTCGGCAAACTTTCTGCGCGCGGATAATATCTCCACAACGTTAATGCACAAATACCTTCTAATGTTCTAAGATAAAAAAAGACGCGGGATATCTACGAAATACGCGCGAGCGAGCAACACAAATCTGTGCGGTCGCGTTCGCGCGAACTGCTACGCGTACTAAAGCGTCGGTCATCGCACGAAAAATCATAGTAAGATCGAATCAACCACATATACGAGTTTCAGTAAACCCAGTAACTGGCAATAGATGACCATCACTACTAAATGTAAAAACGTGCCAATTAGCACGAATTATTAGGAATACGAACTTAAGTGTGCTTTCATTAATTTTAAAGCGGCTTTATAACTTCGTATTAATATCTACCTATGTTAACCATTTGGTTTTATACTAAGATTTGCAAATGACCTGCGGATTTGTTAGCCGTACCTCGATTCAAATAAGCTATAACAGTGCTAATAAGAGCCATAATGGTCGAAGAGCTATGTAAATATTTAAATTGTGTAATATTTGAATTTAAGATGTTCGGTGTGTATGAGACGAATACCGGGCCCGCCGGCTGGCACCTTCTACCAGACTGGCGACGTGAGGTCGGTACCCAACGAGGTGGTGCGGCGCCCTCCGTACTACCCTGTGCGTCGCAGACCCAACGTGCGGAGATACGGCATCCCGCGCGGCCGACGCCAGCCCCTCAGAACTTATAAGCGCGTCGCCACATAG"
COLIVA1_lerema_accius = "ATGGTAAGATTTTCTAACTCTATATAATTATAAAACAAATTCCGCAATTATTATTAACGATTTTTATGTGCAAAAAAAAATTATAAGGGTGTATTTATTAAAACCTTGATAATATAGTGTGCTATTTGTACATGACTAATATTTTTTTTTAATTAAAATTGCAAACTTTAAGTTGCATAATGAATAATGCATGAAATTATTTCTTTAGGTTTGTGGCCGCTTTGGTGCTACTGAGTGGAGTAAATTCGGTGAGTGAGAATTTTCTTATTGGAAACTAATATAAAAATATAATTTATTACAAGTTATCTGAATTAGAGAATGTAGCGCAGGAGTCGCGGCGGGCACGGGCGCGAGCGGACCGACCCGCTCGCTTGCTTGCAGTTGTTCTTTTGCAATTTCTTAAAAATTCGAGCTTTATCAAGGTTTTGTGCAGAAGCATAATGTGTTTAGAAAGAGATATGTTTTCCTCCGCCAAAGTCGAATAATTTTAATTTCAATTCACAGTTTTAACTGCTTAAGTACTTCTCAACAATAAATTATTTTAAGTACAGCGCAAATGTATATGAGGCCGACAAAGTTTATCATCACTCGGATGTTAAGTGTCGGCATTGCGCAAACCAATGTTTTTCTGTAATATCTATGAAGTTAATATTAATACTTCTAATATAGACATTGATCTAATTGTTTTATTGAGGATTATTTACTTATCCTTGGACTAATTATCTTAAATGACATACCTGACTATTTTAATTTTTAACACTTAATTTTAAAGTCAAACTTTTAAATTGAATACTGAAAAGAAAGTAGTTAAGTCAAATAAAGAGAAAATACCGTAAATACATATACCTACTTACAGTATCAAATTAAATTTTTTTTTTTTAAGAATACTTACGTACTTTAAACATTTTCAATATTTTCATAATTTGTAATTAAATAAATACTGCATGCCTTAAATGGTTTTATTACATAGTGCGATTTCCTAGGTGTTATTTGCGGCGACAGGGACCAACGCCCAGTAGGCGGCAAAATAGGTACCTTTCTAATGTCTACTATTCATGGGAACCTATTCCAGTACGTTAGGTACATAATGTATACTGAAATTGGTTTTCGGAGAGGTCTGTAAAGTTTTCTACAAAAAAAAAGAGTGAAGAGAGTGACACCTTGTTGGGACAGAATTCCTTTCGCAATAATATACGATTAATAACACAATGATTAATGTATAAAAGATGGCCGTGAAGAAAAAAAAATATCCTGACAGAATACATATTTTGTATAATGTGATATTAACACCTACCATAATTTTATATACAGAGTATGTACACTACATGACTGTTTTTTAAATAGTTTGTGAACTATTTCCCCCTCTATTGAATTTCATGTTATCCTGGAAGTGAAAATTCGACATATTGTTGGAAAATTTAAATAAGAGACAGAAAGCATTTCCGTTACTTTATTTTTACCGGTTACGTAACTCCTGCCGCAAACCTTATTAGGGATCTTTGTTCCAAAAACATGGCCTTAAAGTGTTATTAGTGATAACTGACAGAAAATATTTGTAGGTATATTTTGAGTAATAGATAATGCTAGTAGTGTTTTATATAAAAAAATAGAAGTGGTAAATTTTTTATGCTTTAAGATATCTTAAAAAACAAATTAAATAAGTACTATATCAACACCAAAAATGGGTTTGTAAATGATTTCAAAATAAACACAAATACCTATTCAAAATATGTTTTGATTTAGGCACTTTAACATTTTTTTATTTAAGTAAGTGTCTACTTCTTTTAAATTAAATTATAGGTACGTTAGATAAAGTACCTACCTGTGACAGTTCAACAAAACTTGAATTTTAATTATAATTTATTTAAAATATCTCAATATGCTTGTCCCTCAATATTATCATAATAAAATATTCCAACGACCAATTAAAATTAGGCATTGATAAAATTGTTATCGTTGGTAAAACTGACTAGATTATCAAATAAAATAATAAAATAACTACGGTACATAAAGATAAATAAAAAATACTTAATAAGCTAAACGTTGCACATACCGACAAGGGATCATTTTACGATAAAATTGCAACTACTGCAAAATATCACGTGTCATAACGGCTGCACTACCAATATTATAGTTACAATCATAGAATGCTTGAAGCTAATGACGTCTAGATGAAATAAACTTAATTTAGGATTCGTATGTAACTGTCATTTCAGTGACATAATTTTTTTAAAGCAATATATTTGGATATTACATTTTCTAAAGAATCATGGCAAGATCCATACATTGAATTGTACCAAAAAAAAAGGCGGTCTAAAATGTTTCCTTAAAATTTTGTCATATTTTTACGAACATAAATATACACTCAAAATATTTTTTAAACCCGGATTACGTGCAAAAATATTAATATAATTTTTGGGCAATCTTGCCATTCTAATAAATGAATACGCCATTTGAGGTATTCTGTCAGTGACAATTGAGTATTGGCAAAAATAGAAGATTGAGGTTACACAAAACCAATATTATTAAGTTTTTATTTTAACAATAATTATTTATCCTGTTATAATTTGGCATTACAATTTTTGTTTCGTAACTTATTTCAATTGAATGTATTTCTGAAGAACTTCACAACAAGAATAGAAGTCTACTGAGTACACTGAATGTGAATTGTTGAAGCCTACTGCATTTCTACGCTGATTCCTGGAGAGTTGTGTTCGTCATAACATTTACAACATTAGATATCGAAGAAACAGTGATATAATAAGCTGAATATAATATAATGTATACCTACATGCTGCACTGTTGCTATTGCTCATTTTAGTTTGGAAAATAGTTGCTTAGTACCTTATACGAATGAACAGCAGTGAATAGTGGTGTGATAAATATTGTGAATGTTTAAATAAAAGGAAACTAAAAGTATCAAGGGCGTCTATGATTTGAAACTACTTTTGCGGCAAATAAAACATCTTGTTGCTTATTATTATTGTATGTTCACATAGATTCTATAGAATTAATTAATTAATTAGAACAATGTAATTTGTTTACAAGATATGATTTATTAAATGAAATACCAATTTTTTACAATGTTTTATTAATCATTTTCGTTTTCTATAAAAACAGTATTTAAAATACTGAGCAGGGGTTACAATAGTACCTTTATATTTTGCCCAACCAATATATTAAACAAGTTAGTGTCATAACATAGGCACAGCAACCCAGCGTTCGCCAACCAACTATATACTACCCGTAATATATATGCATTATAATACGAGATAATTGCAGCAATACACTATAAATTATTATTCCATTTCGCCCTGATGAGTTATTTTGACACCAGAGATCATTTCTGATCGCTGTTTGACACTAATAATAATGTCATATAAATGACAATCTTGTTCTTACTTCCGCTGTAACGATTCGTGCAGAAATATTCTACTGATATTAATAAAAATAATAATTTTGATCCCGGAGTCTTAAAAATACGTTGATTGCTTTAAGTTGTTCCTTGTGGTCTAATAACTAATTGTGAACTTAATATTGCGGGCCTTTTTTTCTGGTACAAATTCTGCCATGATTCTTTACTGATGACATATGAGAAATATCCGACGATGTTGAAATTAACTATTACCTTACACAAGAACTAGAAAACCGGGCGGACGTTGTTTTGCCAGATATTTTATCTCTGAAAGGTTAGAAAGATCCCTTAGTCTATCAAGTTCAGTTCTTCAATTTCCATGTACCTTTAGTAAATGCTCATCGTGTCGGACAACGTTTGCTATGGCAGCGTTTCAATAACTAATAGTTTGGCTGGTCCCTTGGAGGGTTTTGACATCACCAACTTCTGCCGGGTCCAGCTGCAGCTCGAAACTATACTTAGTCCTTGTAAAATACTTTAATTTTTTTTTAATTATATAGTTTCTAGGTTTGTACTACAATAAAGTTTTCAAATAAATAAATAAATTATTGTATCTCTTTTTTCTGTCTTTAGGCTTGTATATTAATCACAGGTCCATAATACCTAATGTAACTGAGGTGGGGTTTACGACGCCGCCAGTAGCGATGAATTGTATTTAATTAAGGGTATTTATTTTGTGTCAAAAAATCGATTTAAGTATCACGGTTTTATTTGCAAAAAATTCGTTTTCGTGGAGAATTTATTAGAGTCATGAACATAATTGTGACGACGTCTGTTCTAATTGTTTAACATAAAAATGTGAACTAATTTGGACAATACGAAGGCTTTATCAGTAGACATCACCATTGACACTTGTAGTTTCAGTTAATGACAGCTGAGTTTGGTACTTATGTTGAGGGATACGTTGAGTTGAGGGTACCTCCTTTATTTTTATTATTCTAATATGGGTAGATTGACCTCCACCCGCTAGCAATCTCTCCGCTCTGCTTCATTTAGTGTAGGATAAAAGATATAAGTACATATTTCACGAAATAATATTCATCTGATATCTGAAAGTTTCGACTCCGTTACCCCCTTTATTAATAGTGCAATAAATTAAAGTAGGTACGATATTTTTGCTCAAATGTAATATTATAATTAGAGTTGTAACAAGTTATTGGAAATGAATAAAAAAAAATTTTTCGACTATATGGATATCGTATTTGATATTAATCCACGCGGCCGAAGTTCGTCCCAGCTTAAGCGCAATTTCTCTATGTGTCCCTTATCTGCTTAAGTTTATACATGTCTGTAACCTATAGTACAGTAACTATTGGAAAATCTTCAGATATAAAACTTAAATTTTAATCCCTACTAACATTTTTAACAGTAAATTTTTGAACAAGTCTATAAAATGAAACTATGTTTCAAATTTTAATTTTAATTTATTTTTGTTTTAGCAAGATGATTACGGAAGGGGCGATTTGCTGGAGAACAATTTATACCCTGACAATGAGTGGTACACAGCGACTTTACCTGAAGGACCTCCACCAGACAATACATATAATTATCCTGGATCAGAATATCCAGTGCCGCCGCAGTCAGTGAACACGTATGGTTTGTCTGGAAGACGTGATTATCCATTACCGCCATCATCAGCGAGCACCTACGGACAGTCTGGAAGGTCAGAGTATCAACCACCATCATCGTCATCGAATACTTACGGTTTGTCTAGAAGACCAGATTATCCGGGACCATCACCATCATCGAACCCCTACGATTTGTCTGGAAGAGTAGATTATCAATCGCCACCTCAGTCGGAATCCACATATGGCCAGTCTGGAAGATCCGACTATCCAGTGCCACCTCAATCAGTGAACACGTTTGGTGAAGGTGAAAGGCCAGATTATTCCGCGTCATCATCACGCGCACAATTAGGCTCCAATTTTGCTGTTTATGATCCTGTTACAAGACAGCGTACCGTAGCATTTGATCGCAACTGTACTGCAACAGGTTGCTGTGTTCCTAAATGTTTTGCGGAAAAGGGTAACAGAGTAAGTATTCTAAATGTTATAAAAATATAAATAACGTAAAAAGGTTTCACAAATTACTTATTATCCTAAACATCTTTCAGGGCTTTCCAGGAATGATTGGACTGCAGGGCCCTCCAGGATTACCTGGTCACGAAGGAGTCGAAGGGCCTCAAGGACCGAAGGGTCAAAAGGGACAAATGGGTCCCACAGGCCCTCGTGGACCAAGAGGTGATATAGGTAAACCAGGTACTCAAGGATTTACTGGCATAGCTGGACCTCCAGGACTACAAGGTGAACAAGGTCCGCCAGGTATTCCGGGCCGAGATGGTTGTAATGGAACTGATGTAAGTATATTTTTATCCATAACAAACAAAGTAAAATAAACTTAATATTATTAATATAGTACTTAGTTATAACCCTTCTGCGAAGCCCTAGCACATATGATTGAGTTTCCAACACCGAATTATTTCTACAAAAATATTTTCTACTCCGGTGTCCGGATTTAAAAATATATTATATAATTTAGGTTCAGCCATCACTGATTTAGTTTAGAGAGTTAATATTCATCCTTTTTGCATTACATAATAGTTCATAAATTTTTCTGTTTAACATAGTTTTAAGTAGGCTGGAATGTATTCCAATAATAAAAATGCTTTTATTATCGCAATAATTATATTGAATCAACACATTTTACAATATTATTTATATTCTTTTATAGGGCGAGCAGGGTCCACAAGGTTTACCAGGATTACAAGGCCCTCGGGGCTATCCTGGTCCGAAAGGAGAAAGAGGTGATAAGGGCGAAGCAGCTCAAACTGGGCGCTATCCCGACAAAGGTCAAAAGGGAGAAGCTGGAATTGACGGAATGCAAGGGCCACCCGGCTCTGTAGGACCCACAGGATTACCTGGTTTAAAAGGACCCAAAGGAAACACAGGCCCACTGGTAATTTGCTATCTTATAAGTTTCTATTTTATATTCGTTAAATTTCGAGTATTGCAAAATTGGGTTGCCTCGGTGTAAATTGGTAATAGCTATTTCGATGATAAATAGTTGATTTTATTTTCATTAATAAGTAGTTATTTTGTATCCTTTAATCAAATTAATTTGTCTAAAATCTTCACTAGGGCCCCCAAGGACCAAAAGGTGATAAAGGTTCTAAAGGGTCAAAAGGCGCGGCCATACCAGGTGACAAAGGAGATAGAGGAGATCGAGGAGACAGGGGTCCTGGTTGTCCTCCTGTAGTTTTACCTGGAGCATGGGAAAATAAAGGTGCAATTGAAGGGGCGAAAGGAGACATGGGACCAAAAGGAGATAAAGGAGAACCTGGAAGAGAAGGTGAAAAAGGTGATCTAGGAATTACTGGTGAACCGGGTTTACCAGGTCAAATGGGCATAAAGGGGGAAAAGGGCATCAGAGGAAATCCTGGAATAAGGGTATGTATACCTAAATATATTAATTAAAAACAAAACAATAATTATTAAATTAACCAAATAAAATATTGTTTATAGGGCAGAGAAGGTATGATTGGACCTGCTGGGCCGTTGGGACAAAAGGGTGACAGAGGAAATGATGGCTTATCAGGACTTCCTGGAAGACCAGGAAGCAAAGGAGAACCTGGAAAGGATGGAACTCCTGGCTTAAGAGGACTTAGAGGATTACCGGGGGCACCTGGAGGTAGGGCAGGGGCACGAGGGCCACCAGGGCCTATTGGCCCTCGAGGTTACACTGGACCTCCTGGAGTACCAGGTACAGAAGGAAAACCTGGTGAAAGGGGACAACCAGGACCAATGGGTGCTCCGGGTGGTTTAGGTGAACCGGGTACTCCTGGTAGAGAAGGCCCATCTGGTGTTAAGGGAGAAAAAGGCGAGCCTGGATTGGAGGGTCCTAAAGGTGAAATAGGCCCTAGAGGCTACGATGGACCAGTCGGTCCTCAAGGTCCTAGAGGACTTAAAGGTGAAGATGGACTTACAATACCGGTAAGCTTATTTTTCCCACAAGTTAAGATGGAGTATGTAGAGACATCTAAATTCTAAGTATACATTATAGATATTTCAAATAATTAATTTCAAATACTTTATTAATATTTATGAGATCCTTAATTTCTTAATTTACGGTATAATGTTTCAGGGTGAGACTGGTATTCCGGGTCTAGCTGGTCGACCAGGTGATAAAGGTCAAAAGGGAGAAAGGGGATATACAGGCTTACGAGGAACACCTGGTAATTCCACAAGAGGTACTCCTGGCATGCCAGGTGAAATGGGACCTAGAGGTGAAAAGGGTGACAAAGGTAGTCCAGGTAACGACGGCGTACCAGGCTCGCCAGGGTCAAAAGGTGACATTGGTGGACAATGTAATGAATGCAGGCCTGGCGGTCCAGGGCGGAAAGGAGAGAAAGGTGCTGATGGCAGAGATGGCGAACGTGGAGAACGAGGTCCAACGGGTCCAACTGGAGTTCCTGGTGAACGAGGTGCTGATGGACTAAACGGAATACCTGGATCACCCGGTGCTCCAGTAAGTACATACTTATAAATTTTGAAATCAAAAACAAAAGTACCGCGTGTCTATACATTGCTTATTTTGGGCAGTGAGAATGAGACGTAGAAGAATTATTTTACATAAAGATGTCTTTCTGTTTTTCTGTATGTTTCAACGCGCATCACGCCAAAACTACGGATTTGTCTGACCAATAAACAAAATATTATATGTATAGTATAATTGAATTTGGGTAAAACTTAGACTTAAACTTATTGCAATCGGTTCACTGTATCAAAAGTCATAAAAATTTGCGTGGAACTCGTCTAAGTAATAATATTGTAACACGTTGTCAGTGGTATTAAAAAAATGCACAAAGTCATTTGTTTTTATAGTTTTAGTAGTTCTCATTTATTTTATATGTTTGAGTATCACGCGGAAACCACTGGACCAAATTAGATGAAAATGGCTAATGGGGGATTTTTATAAGATACATTTTATCCCAAAAACTACTGGGGTCTCGCGTGATAATAGGTAAAAAGTTCACAAGCGAAGCGAAGCGAGAGCGGGCCTACTAATGTCAAATATATCACAAATAAAGGTGATTTATTTCAGACTAGGAAGATTTATTTCCTTATATCCAAAAAAATACTTGTACCTGCTTATAAAAAACAATAATATGTGTTTTATACTAAAATAGTCAGAAATAAATGATTTTCTGATCGGCAACGTGTCTATGGATCCCATTATGCAACAAAATTAATTACTGTTAATCATTAGATCAGATCATTTTCGCACATCGAGTGTAATAGTTTTGCATATACCCGTTATTACTTTAAACACCTATTGGCATACTGGACTACCTGGCATATTTGCGACGTCATATTCTTCAGTAAAATTACTTAATACCTATTGCAACAAAATCTAGTCATTGCGATAGCAAGAATACAATTTATATTTATGTATTAAATTAAAATCAAACCTTAACAAACCTCAAGTATTAAGTGAGTACTTACCTTAAGTATTTCTTTCCGTAGGGTGAGCGTGGCGATGACGGACCTATGGGTCCTCCGGGAGAACGTGGAAAAGATGCTATGATACCACATAATTTAACTCGAGGACCTCCGGGAGAAAAAGGGTCGGTTGGAGAGAAGGGTTCACCGGGTCCCAAAGGTGAAAAGGGCCGCGAGGGACCTAAAGGTGACCGTGGTTTAAATGGTATGCCGGGACAAAAAGGCGACCAAGGTAGAATGGGGCCTCCGGGTGTTGATGGAATTCCGGGTAGTGATGGAATACCTGGTGTACCAGGTCGAAAAGGTATTTCCTTAAAAGGAGAAAAGGGTACGCCAGGTGATATTGGACTAAAGGGTGACAAGGGATTCCCTGGAAGAGACGGTCTTAAGGGCGAGGCCGGTCAATGTCCAGCCAACCTTCAAGAATTGACTAAAGGTGATAAAGGGTTTACTGGTCCACCAGGGCCTCAAGGACCACAAGGTGAAGTATTTTTTGTAAACGGGCAAGCACAATGCCTTTCACGATTTAAACGGGCTTCATATCACGATACTGAATTTATAATAATATAAAAAATATTGCAGGTGATACAGGTGACAAAGGAGACACCGGTTTACCAGGTCCAAGAGGGGATAAAGGTGACATGGGATTACCAGGAAAACCAGGAACAGTAGGACCGCGTGGGCTTATGGGTCTAAAAGGAGACAAAGGTGATTTAGGAGCGATGGGTTTCCCCGGAACTCCTGGACAGAGTGGTGCTAGAGGTTTCCCAGGATTGCCAGGACGAAAAGGGGATAAAGGAGAAATTGGACCATCTATGCCTGGACCACCAGGGCAACCGGGATCAAGAGGACTGAAGGGAGAACCGGGTTTGAGAGGTATACCAGGCACTCCTGGTAATGATGGCCCCCCGGGGCCTATTGGGTTGCAAGGAGAAAAAGGAGATAGAGGATTGACAGGACGACAAGGACTTCCTGGTCCGCAAGGACAGAAAGGTGATTCTGGGCCTGTTGGTCCACCAGGGGTACCCGGTACTCCAGGTACACCAGGAAGGCCAGGACCAAAGGGGCAACAAGGATTTCAAGGTTTAATTGGAACACCAGGTCTTATAGGATTACCTGGAAATAAAGGTGAACCTGGAGTGCAAGGACCTGAAGGACCGAAAGGATTCCCGGGACCACGCGGTCGATCCGGTCTCATTGGCACTACGGGTATCGATGGTACTCCTGGCTTAAAAGGAGATAAAGGACAAACGGGTTTCCCAGGGCAACCAGGCCAAAACGGATTGATGGGTCCGCCTGGACCAATAGGGTTACCGGGATTTAAGGGCGACCAAGGATTCCAAGGATCTCCGGGACTTCCTGGAAGAATGGGTTTACCTGGCGATAAAGGTGATCGTGGTTTCCCGGGTGATTTTGGACTGAAAGGAGAACCAGGACAGTCATCAGACAAAGGACAAAAGGGTGAAAAAGGGGTGCCAGGAACAAGGGGATTAAACGGTGTACCAGGACGGGAGGGCCCCAAAGGCGATAAAGGAGATACAGGTGCTCCAGGTTTTGGGATTCCTGGAACACCAGGTGAGAAGGGTGACGCCGGATTTCCTGGTCTAATTGGTATACCAGGGGAACGCGGAGAAAAGGGTGATAGAGGCTTTCAAGGATTACCAGGGTTAAAGGGTGATATTGGAATGCCCGGTTTCCAAGGTCAACCAGGTGTTCCTGGTTTAGATGGAGCTCCAGGATTACCAGGCGAGGTAGGACTGCCTGGATTAGAAGGAATTAAGGGCGATAAAGGTGAAATTGGTTTCTCTGGTCGAGATGGATTTGATGGCGCTAAAGGAGAAACTGGACCAGTTGGGCCTATGGGACCCCCTGGTTTACGAGGTTATCCTGGACCGAAGGGAGATACTGGACTCCCAGGAATAGCTATAAATATCAAGGGACAAAAGGGAGAAATTGGGTTACCAGGTGTGCCTGGTACTCCTGGTCCTAAAGGTGACACGGGCTTAATGGGTTTGCAAGGTCAAAAAGGAGAGAGAGGTGAACGCGGCTTTACTGGAATCAAAGGAGAAATGGGTCTAATCGGTATTACCGGAAATAAAGGTAAATACAAATTTATACAAAGTATTACTATTAAACTCTTGTTTACTTATGTTAACCTCTTTTTTCTTTATAGGAGATACCGGACCTGCTGGACCAATGGGTATATCTGGACTTACAATCAAAGGAGAAAAAGGACTTCCTGGTTTACCCGGAAAACATGGAGCCCGTGGTTTCCAAGGGTCGCCTGGAGAAAAAGGTGATATAGGCTTACCAGGCCTTCCTGGACTAATTGGGCGACCTGGTACCCCTGGAAGTCCGGGACAAAGAGGTGAAAAGGGTGATCAAGGACGTGAAGGTCTTGCTGGTCCTCCCGGTTACCAAGGCCCACAGGGATTACCTGGTCTACCTGGACTTGATGGGCCCAGAGGTGAAAAGGGCAACAAAGGTGCTACTGGTTATGGTTTACCGGGAGAAAGAGGAAACCCAGGACCACAAGGTATTCCTGGAGAGAAGGGTGCAAAGGGAGATAAAGGCGACAGGGGATTCGATGGTCTTAATGGACAACCTGGTCCCATGGGTATTCAAGGCGAAAAGGGGGATACAGGCATTCAAGGCAGGCCAGGTATAGTCGGTATCAGAGGAGATAAAGGAGATAAAGGTGAACCGGCTTCGATTGTATTTGGTCCTAAAGGGGAAGCTGGACCACCTGGTCCTCCTGGCTTTGATGGTGCGCCAGGAAGAGATGGTATGCCGGGTGCACCAGGGCTAGACGGAGCTAAGGGAGACAAGGGAGATCGTGGTTATCCTGGCATACCTGGACCACCAGGTCCAATAGGTCTTCAAGGCATACAAGGCGAACGGGGAGATAGAGGTGAAATTGGACGTACTGGATCGTCTGGTTTGCCAGGTCCCCCTGGTACACCTTGCGCTCCAATTGATTACTTAACTGGTAATCTCTTAGTTAAACACAGTCAATCAGAAATTATACCTCAATGTGAACCAGGTCACATTAAACTATGGGACGGATATTCTTTACTTTACATCGATGGAAACGAAAAAGCCCATAATCAGGACTTAGGCTATGCAGGTTCTTGTATAAGAAAATTCAGTACCATGCCATTCCTTTTCTGTGATCTTAACGATGTATGTAATTATGCAAGCCGAAACGACAGAAGTTATTGGTTATCTACTAATGAACCTATACCAATGATGCCAGTTCAAGATAAAGAAATTTCTCGATATATATCAAGATGCGTCGTTTGCGAAGTTCCTGCCAATGTTATAGCTGTGCATAGTCAAACATTAGATACACCAAGTTGCCCTCCAAGTTGGAGTATCTTATGGATTGGATACAGTTTTGTAATGGTGAGTAATTACAATACACATAACTAGTTATAGCATAATAGTTATTTAATAAAACATTATTTATCTTGCCATCGACGCACATTGCATTGCAGTTTACAAAAATAATTTATCAGTAACGTAACAATTTGACGTCATTACACAACTACCTACATGTAAAAGGTCTTATCATCTCATCTTAAAGCTCTAAAACGATAAGTCTAAAAATTATTATATTTTTAGCACACTGGCGCTGGTGGACAAGGAGGTGGTCAAGCATTAGAGAGTCCTGGCTCTTGTCTTGAAGGATTCCGATCAACACCATTCATAGAATGCAATGGAGAAGCTGGCACGTGTCATCATTTCGCTAATAAACTCAGTTTTTGGCTGACAACCATAGATGACAACAAACAGTTTTCCGCCCCTGAACGGGAAACTTTAAAAGCAGGTTGGCTATTACAGCGCGTATCCAGGTGTTCAGTTTGCATAAAAAATACTACGTAGTCTAATAACAACTTTATAGCACTCGTTCGACTTTTCATCTACATAATATGCTTGTGATTTTGCCATTTCTTTTTACATAGGTACCCTAATCGCATCGAAATTATAAAATCAATCATAATTAATGTATACATTTGTACTTAGTAAGGTTTCCATATCTACATAAACAAAATTATACATAGGTACCTATTTACTTACAATAGCATTTATTCGAGTGTCTTGATGAACCAAAGTTATTTGCAAATAAGTTTTATAATTAAGTACATTGTAATATTAGAAAGATTTATTTAAAACCTACCTACATGCACTTCATCGGCAATTTTTGCATTTTAAGTATCCATGTGATTTTTGTGTTTTTGGAGTACCTACAATATAAGTTCAGTTATAGTGTTTCTTTAGTTGATTTATAGTAATTCTCATGGACATTATAATTATTATAAGTCATCGTGTATTGTCACCTAGTCACATTAAGAACGATAATAAAAATAATAAAATAATTTGTGACTTCAAAAATAAGAATTAAATTCATTTAGCTTATTTATCCTTAAGTATATTATTGTAAATATTATGCCTTTTGAAACTTTAACTGTGCAAATTTACTACATCAATCTGCATTTAATGTACCTACCTACCTATCTACCTACCTAAATTATAATATTATGTATATTGAAATAATTACTGTTAAATAAATATTGTAAAACTAATCACATTCTTATACCTAAATGTTAACATGATAACGTATTGAAATGATCGATATCTGTATACACTGCCAAATAGATAAAATAACCTTATACTTATAGGTAGAACTTTAATGAGAATTCAATAAATATAAAAAAATAATCCACAATGTTTCATTGGTATTTCCTTCCTAGCCGTATGTATGTATGTATTTGCAGGTGGAAACAAATTGAAAGTACGAAAGTGAAATGAGCTATTTCACCAGTCGCGATAGTAGTTGGTAGTAATATTAAAAGCGCCTACATTATGATCGTTTACCTGGAGTGAGTTACTAATGATTTTGGTCAATGACACCTATTAATGATTAAAATAGCCGGTGGCCCATAAATATCTATAATAATAATAATTAAACTGTAACATTAAATATTTTTTGGAAATTTATTCTGGTAGGCTTCGTTACATAATTTTTTTTTTGGATTTATTGTCCGTTTCTATACGAGCCGGACGACAGTTTTATACGAGATTTACATTTTTGTAACTTATACATTCAACTATTTGCGAGCTGTTGCCCGCAGCTTTGCTTGCGGAAATTATAGTTTTGTGATTATATTTACCCCCTTAAGGGTTGATTTAAATCAACAACACATTTATTTGGATAAAGATTAATATTATTATGTTGGTACAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTCGATCCGGAATTTTTATCATGATTGATTTTTTATTGATTTTTGAGGCGATACGAAGTTCGCCGGGTCAGCTAGTAATATTAATATAGATTTACATTTTTGTAACTTATACATTCAACTATTATAATTTTTTTAAGTAGATAAAGATAATACCATAAATAATACATATTAATATGGAGGATAATACTTCTTCAGGCACGTTGATAGGGTGCTGTAGTGTCGGTCGTACAAATGACGAACCGCGCATTCATTTTTAAAAAAAGTCAGTGAAACAAACCGCGGAAACTACACGCCACCGCACCGCGAGAAACATAAATTATTTTCTCACTGACCTCAAAATAAATGTTTTAAAACTAAAACTACTACTACTACTAGTTAATTTTTTTTTGTCGTGCATGTTGGCGTTGCCCATTTTTTTCGCGCCTAATGTCATTCATTCCCATACATTTATTAAAGATTGCGTTATTTGCGTTTGGCGTGAGCGTTCGAAGGAAAGGGCATTTGGCTAGAGGCTCAGACGCGAAATCGTAAAATGGCGAATCCTGCAACAATTCATATAATGATTCACGTGCAATTCATATTAGCAATTCTGTTGTTTGTTAAGGTATGCTTGCTAAGCTTAGAGTAATTACTATTCACAGTGTAGACATAAAAATAACAAGCAGACCTAACATAAGGGGGAAAAAAGCGTGCCACTTGCGCACACAATAGACAAAGTAGCTATGACGGGGCCGAACTCTGTCCGTGTGAATTATGATTTAAATACGAATATACGATATCCATAAAATCAAAAAATATTTTTAATCAATTTCCAATAACGCGGTAGGATTCTAAATTACAATATTACATTTGGGTAAAAAATACTTTAATTTAATTTATTGTACTTATAATGTTATAAAGGTGTAATGGAGTCGAAACTATACATTGAACACTATTTGGAAATTTTAATTAAATGCAGTCCTAAAAATACTTATTATTATGAAATATACTTTTCCTTTTCTACCCTAAACTAAATGAAGCAGAGCGGAGAGATGCGAGGTTACAAGTTATTAATGCTGATAATATCTTCGGATAAAGTCTTCATACAATTATGAAAGAACTTCACTTACAGGTAAGTTCGTTTAATCACTAATTAGTTCTCAAGCTAAATACGGCCCAGTATAATTTGAATCCATAAAATATGTTCGAAAAGGTGCTTGCTAAGAAATTCACCGTGTCTATTGCAATTGCAAGTAAGATCAATTCATCATGTACTTACAATCTAGCGTGTACTATATAGCGACGTTAGCCTACCCGCCAAAATTGATGGCACGCGCTCTTTTTTGTTTATGTGTGAGTGAATGTAGCAACACAAAGAAAAACTGCAATAGAATGATTCACGTTCAGTGGGTGAGAAAGAGACAAAAGTTTTGTTTATTGTTGTCATTGTGGTAGTAAATATTATGTTTGGTCTGCTTGTTATTTATGGCTGTGTTACTATTGTATATTATTACTCTGTGGTAATATCATACGTTAGTCTGTGGTTATATATATTTATTTTTTATGCTTACAATACTACATCATGCTAATGTCAAATGTGGCAATGGCAACACCGTGAAGCCTATTGTCACATCGCGGGAAATCAACAAAAACTCAATTTATTTACTGAAAATCGAAACAAAAATTATAAATTAAGTGCAAATTATAAATTTTTATTGTTTTATGTACAATATATATTTTTACTATGTTCGAATGGGCTTACAGTGGTGCAAATAAAGCCGTCCCTCGCAATGTGGGCCCTGAATGTGCATATTTTTTATCTCCTCAAAGGCAATTTATTGAAACTGTTATAGTAACAACGGTTTGCATTTATATATTAGTAAGTAAAGTAATTAAATAATATATTATACGAAAATTAATTTTGTTTTACCGATAAACCTTAAATAGTTTCCCACTTATTCATTATTAGTTCATTAATTAATCGAACGAAAGTCGGTTCGATAAAATGCTTATTTATAGAAAATAACGAAATAAAAGTTTTATTGACTTTATTTTACAGGTAAAAACATACCCGAAGTTACAAATACCGAAAGATATAGACTATGTCAAGAGTGATAGAGGTGGAAAAAGATTACTTGTTATCCTCTTAGCATTGCTGTGGGGGATGGAAATTGGATTTAAGTTTGCATCACGTACTGTTATATATTTGCTTAATCCCTGTCACGTAACTACTTTAATACAGGTCAGTGTTTTAGTAATTACTCTATGTTGTAGTCAAATCATATCTTACCAAGTATACAAACAAAACTCTTTCAATAAAAATAATGTAAATAACTCATAAGTAATCAAGATAATTATTACTAAAATTTAATTTATGTGCCAAAAATTACCAAACTGGAATCCTGTTGTATGTTGGACTTCCAACAGAAAAACAGTAATGATTAAAATGTGTTTTTTATACAGATATAATTATGCATATTGTACAACTAATTTGTTAAGTGAGCAAACTAATTGAATTCATTATTCAATTATTCACATAGGTACCAATACTTAAAGGAGCCTTAAAATAGAAATACCGTAAAAAGAAATACCAAAAATTTTACATTATCCTAGTTAATCTGTAAAGTAAGCCCTCAAGCAGTGTCTCCAACATCATTATGAACTCTTGTATCATTCTAATATTTTCTCAGTATTATGTTGGATCATAACACTGACCACTACAGGTCTAATAGTTCATTTCGGAGGCATAATACATCTACATATTAATGAATACATAAAGAGTTTAACTGGAGACCTGTTTAAGAATAATGCAGACCTTTATGGGTTTGAGATTTTGCATTGAAGTTTGCGTGGAGTTGTATTTTGAAGCTATGGGACTTCAAGTCAGCTCTACCTGATCATCATACAATTTGCTGACTTACAGTTTAACCACCATGGAATACTAGTTTTCTATATTATCCTGACCTTCATATTTTACCATGATCTCTTTAGTGGAACAGCAAAACATTTACGAATTGTCTCAGTTGGTAGTAAGCCAGTTATTGAATCTAAATTCCTTGATTTTACAGATGTTTAGCAATTTTCTGAATTGACATTTAACAATTTAATTTTAATTCTTTCTTTTCAGATATACTTATTATCAGCACCTCCAAGTAAAACTGTCACTGCATTGTTTAGAATACATTTAAATTTACTAAATGGGCCACTTTTAGCATTTCTGTTTCCAGAAACTGCGTCTAGAACGGTATGATAGTAGCATTAATGTTAATTTATTTTATCGTCTGTTTAAGTGTTTAATAATTTATTTATCAGATTTTAGCAGAAGCTGCTTTGTACTGGATACAACATGGAATGATGTTCGTAATACCTTATTACTTATTAAGAATTGGTGGTATGTTATTCTGTTTTGAATTTGGTATTTTATTTCATAATAATAATAACATTGTGTTACTTAGTTTGTTTAAATTTTCAGGTGTTTATAATGTAGAGCCATTCTGGGATTTCTCTTGGTCCATCTTTAGCTACAGTTTGAATTTGTTGTATCACTTCATAATCCTACAGCCAATTGCAATAGTAAGTAGCATTATTTAATTTAGAATCTAATGTTCTAATTACTACCGTTCACTTCTCTATCTGTCTTTACCCTGACCTTGCATCATTCGTGATACTTAGACATTACAAAAGCCAAGTGTCCACTTTTGGAGGCAATATATAGCTAGATAAAACCCTAAGGTTATAATATTAATTGTTAATCCACATTTTCGTGGCAAGCAATACTTCAATATTTTTTTTCTTAACTTTACCTTTTTATGTATAATTTTATTGACTTTTAGTTGGTTGGTGTCTTAAAAATAGCCTACTCACTATTGTGCATAGCACGGGTCAATTACCTACCGCACTCAATTATGCGTAAAATCTACATTTTAACTCTCGCGTAATGAGGCGCCCATTTTCGTCAAATTTAAAAATATTTGCAGTTATTGCACTATGATGTTTACGACCATTTAACATCAGACTAATAATAACGATTTTATTATTTGTTTCAGCCAGCCCAAGTTAATCTGAACCACATGCTCTGTCCTGCTATTTTGGATCCCTTCGATGGTCCTTGGTACCGTATAGCGGCCGTAACCCACCAAGCAATTCTTTGTCCGTTGCTAGGCAAACTATTTTGCCTCATTGCAGATTTTTGCCTTACAAAGTTTCCACCGACGAAAGTTAAACCACAAATGAAAGATCATTTAATCGATGATAGATTTCTTAAAGAATATAAACTTCATCACAATGAGTAG"

## build dictionaries for each gene containing total counts of amino acid codons
## calculate total synonymous and nonsynonymous substitutions for each gene
print("Begin AACount for Danaus Plexiuppus")
danaus_plexippus_dict, danaus_plexippus_Syn, danaus_plexippus_Non = AACount(COLIVA1_danaus_plexippus, True)
print("Begin AACount for Papilio Xuthus")
papilio_xuthus_dict, papilio_xuthus_Syn, papilio_xuthus_Non = AACount(COLIVA1_papilio_xuthus, True)
print("Begin AACount for Papilio Glaucus")
papilio_glaucus_dict, papilio_glaucus_Syn, papilio_glaucus_Non = AACount(COLIVA1_papilio_glaucus, True)
print("Begin AACount for Lerema Accius")
lerema_accius_dict, lerema_accius_Syn, lerema_accius_Non = AACount(COLIVA1_lerema_accius, True)

Begin AACount for Danaus Plexiuppus
AACount Error: Remaining nucleotides at end of string - G

Begin AACount for Papilio Xuthus

Begin AACount for Papilio Glaucus
AACodon Error: No amino acid could be determined. Input = NNN
AACodon Error: No amino acid could be determined. Input = NNN
CalcSubstitutions Error: Given string is not a valid codon
AACodon Error: No amino acid could be determined. Input = NNN
AACodon Error: No amino acid could be determined. Input = NNN
CalcSubstitutions Error: Given string is not a valid codon
AACodon Error: No amino acid could be determined. Input = NNN
AACodon Error: No amino acid could be determined. Input = NNN
CalcSubstitutions Error: Given string is not a valid codon
AACodon Error: No amino acid could be determined. Input = NNN
AACodon Error: No amino acid could be determined. Input = NNN
CalcSubstitutions Error: Given string is not a valid codon
AACodon Error: No amino acid could be determined. Input = NNN
AACodon Error: No amino acid could be deter

In [64]:
## examine results
print("Danaus Plexippus:", danaus_plexippus_dict)
print("Total synonymous substitutions:", danaus_plexippus_Syn, "Total nonsynonymous substitutions:", danaus_plexippus_Non)
print()
print("Papilio Xuthus:", papilio_xuthus_dict)
print("Total synonymous substitutions:", papilio_xuthus_Syn, "Total nonsynonymous substitutions:", papilio_xuthus_Non)
print()
print("Papilio Glaucus:", papilio_glaucus_dict)
print("Total synonymous substitutions:", papilio_glaucus_Syn, "Total nonsynonymous substitutions:", papilio_glaucus_Non)
print()
print("Lerema Accius:", lerema_accius_dict)
print("Total synonymous substitutions:", lerema_accius_Syn, "Total nonsynonymous substitutions:", lerema_accius_Non)

Danaus Plexippus: {'Methionine': 63, 'Lysine': 232, 'Leucine': 247, 'Alanine': 91, 'Glycine': 293, 'Valine': 178, 'Isoleucine': 213, 'Arginine': 197, 'Serine': 150, 'Phenylalanine': 162, 'Glutamine': 104, 'Aspartic acid': 106, 'Glutamic acid': 132, 'Asparagine': 116, 'Tryptophan': 58, 'Threonine': 153, 'Cysteine': 46, 'Proline': 142, 'Histidine': 49, 'Tyrosine': 109, 'Translation STOP': 158}
Total synonymous substitutions: 5808 Total nonsynonymous substitutions: 21183

Papilio Xuthus: {'Methionine': 77, 'Threonine': 166, 'Valine': 258, 'Lysine': 261, 'Arginine': 273, 'Tryptophan': 56, 'Serine': 216, 'Phenylalanine': 208, 'Alanine': 188, 'Glycine': 198, 'Proline': 106, 'Aspartic acid': 121, 'Tyrosine': 173, 'Leucine': 354, 'Glutamine': 141, 'Isoleucine': 254, 'Histidine': 80, 'Asparagine': 155, 'Cysteine': 93, 'Translation STOP': 174, 'Glutamic acid': 166}
Total synonymous substitutions: 7132 Total nonsynonymous substitutions: 26330

Papilio Glaucus: {'Methionine': 86, 'Valine': 245, 'I