In [136]:
import copy 
import re 
import csv

## 2) What 21 codon substitutions should be made at this site? The goal here is to maximize those with Hamming/edit distance >1 and to annotate those with Hamming/edit distance = 1 to ensure we also add a silent mutation elsewhere. Also, if these codon substitutions do not alter the seed or PAM, it should be noted that we will also want to introduce a silent mutation in the seed/PAM

In [137]:
def hamming_distance(s1, s2):
    #Return the Hamming distance between equal-length sequences
    if len(s1) != len(s2):
        raise ValueError("Undefined for sequences of unequal length")
    return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))

def reverse_complement(dna):
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'a' : 't', 't' : 'a', 'c' : 'g', 'g' : 'c'}
    return ''.join([complement[base] for base in dna[::-1]])


def find_str(s, char):
    index = 0
    if char in s:
        c = char[0]
        for ch in s:
            if ch == c:
                if s[index:index+len(char)] == char:
                    return index
            index += 1
    return -1

codon_to_aa = { 
    'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M', 
    'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', 
    'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K', 
    'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',                  
    'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 
    'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 
    'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q', 
    'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R', 
    'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 
    'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A', 
    'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', 
    'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 
    'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 
    'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L', 
    'TAC':'Y', 'TAT':'Y', 'TAA':'Z', 'TAG':'Z', 
    'TGC':'C', 'TGT':'C', 'TGA':'Z', 'TGG':'W', 
}

all_codone = list(codon_to_aa.keys())

aa_to_codons ={'I':['ATA', 'ATC', 'ATT'], 
               'M':['ATG'], 
               'T':['ACA', 'ACC', 'ACG', 'ACT'],
               'N':['AAC', 'AAT'],
               'K':['AAA', 'AAG'], 
               'S':[ 'AGC', 'AGT', 'TCA', 'TCC', 'TCG', 'TCT'],
               'R':['AGA', 'AGG','CGA', 'CGC', 'CGG', 'CGT'],
               'L':['CTA', 'CTC', 'CTG', 'CTT','TTA', 'TTG'],
               'P':['CCA', 'CCC', 'CCG', 'CCT'],
               'H':['CAC', 'CAT'],
               'V':['GTA', 'GTC', 'GTG', 'GTT'],
               'A':['GCA', 'GCC', 'GCG', 'GCT'],
               'D':['GAC', 'GAT'],
               'E':['GAA', 'GAG'],
               'G':['GGA', 'GGC', 'GGG', 'GGT'],
               'F':['TTC', 'TTT', 'TTA', 'TTG'],
               'L':['TTA', 'TTG'],
               'Y':['TAC', 'TAT'],
               'Z':['TAA', 'TAG','TGA'],
               'W':['TGG'],
               'Q' : ['CAA', 'CAG'],
               'C' : ['TGC', 'TGT']
              }

codon_freq = {'TTT': 17.08, 'TCT': 16.88, 'TAT': 12.06, 'TGT': 10.48, 'TTC': 17.53, 
              'TCC': 17.35, 'TAC': 13.46, 'TGC': 10.9, 'TTA': 8.78, 'TCA': 14.15, 
              'TAA': 0.43, 'TGA': 0.77, 'TTG': 13.42, 'TCG': 4.08, 'TAG': 0.34, 
              'TGG': 11.67, 'CTT': 14.11, 'CCT': 19.07, 'CAT': 11.87, 'CGT': 4.56, 
              'CTC': 17.85, 'CCC': 18.98, 'CAC': 14.65, 'CGC': 8.78, 'CTA': 7.49, 
              'CCA': 18.67, 'CAA': 14.15, 'CGA': 6.39, 'CTG': 36.24, 'CCG': 6.19, 
              'CAG': 35.42, 'CGG': 10.7, 'ATT': 16.5, 'ACT': 14.26, 'AAT': 18.4, 
              'AGT': 13.98, 'ATC': 18.67, 'ACC': 17.8, 'AAC': 18.22, 'AGC': 19.7, 
              'ATA': 8.14, 'ACA': 16.54, 'AAA': 27.65, 'AGA': 13.32, 'ATG': 21.39, 
              'ACG': 5.64, 'AAG': 31.85, 'AGG': 12.19, 'GTT': 11.72, 'GCT': 18.85, 
              'GAT': 23.99, 'GGT': 10.77, 'GTC': 13.44, 'GCC': 25.84, 'GAC': 24.29, 
              'GGC': 19.83, 'GTA': 7.67, 'GCA': 17.05, 'GAA': 33.72, 'GGA': 17.07, 
              'GTG': 25.9, 'GCG': 6.03, 'GAG': 39.75, 'GGG': 15.33}


def generate_codon_substitutions(wild_type):
    to_return = {}
    wt_amino = codon_to_aa[wild_type]
    for amino, codons in aa_to_codons.items():
        max_dist = -1
        arg_max = None
        for codon in codons:
            dist = hamming_distance(codon, wild_type)
            if dist > max_dist:
                max_dist = dist
                arg_max = codon
            elif dist == max_dist and codon_freq[codon] > codon_freq[arg_max]:
                arg_max = codon
        to_return.update({amino : arg_max})
    return to_return 


In [138]:
# sequence_pos = "gaacagctttgaggtgcgtgtttgtgccTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAGCGAggtaagcaagcaggacaagaagcggtggag" 
# #sequence_neg = "ctgggggcagctcgtggtgaggctcccctttcttGCGGAGATTCTCTTCCTCTGTGCGCCGGTCTCTCCCAGGACAGGCACAAACACGCACCTCAAAGCTGTTCCGTCCCAGTAGATTACCACTactcagcccaggaaaagagaagcaagaggcagta"
# target_sequence_pos = "TGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAGCGA"

day = "Sat"
time = "205"

#TP53
# sequence_pos = "tatcctgagtagtggtaatctactgggacggaacagctttgaggtgCGTGTTTGTGCCTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGagcactaagcgaggtaagcaagcaggacaagaagcggtggaggagacc"
# target_sequence_pos = "CGTGTTTGTGCCTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGG"
# tm_file = "tp53_tm_values.txt"
# output_file = "tp53_630_" + day + time + "_condensed"

sequence_pos = "tcctatcctgagtagtggtaatctactgggacggaacagctttgaggtgCGTGTTTGTGCCTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAGcgaggtaagcaagcaggacaagaagcggtggaggagaccaagggtgca"
target_sequence_pos = "CGTGTTTGTGCCTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAG"
tm_file = "tp53_tm_values.txt"
output_file = "tp53_" + day + time + "_condensed"

#LDLR
# sequence_pos = "cctcctcagtggccgcctctactgggttgactccaaacttcactccatcTCAAGCATCGATGTCAACGGGGGCAACCGGAAGACCATCTTGGAGGATGAAAAGAGGCTGGCCCACCCCTTCTCCTTGGCCGTCTTTGAGgtgtggcttacgtacgagatgcaagcacttaggtggcggatagacaca"
# target_sequence_pos = "TCAAGCATCGATGTCAACGGGGGCAACCGGAAGACCATCTTGGAGGATGAAAAGAGGCTGGCCCACCCCTTCTCCTTGGCCGTCTTTGAG"
# tm_file = "ldlr_tm_values.txt"
# output_file = "ldlr_630_" + day + time + "_condensed"

#seq as of 110819
#sequence_pos = "ctgggactggcatcagcacgtgacctctccttatccacttgtgtgtctaGATCTCCTCAGTGGCCGCCTCTACTGGGTTGACTCCAAACTTCACTCCATCTCAAGCATCGATGTCAACGGGGGCAACCGGAAGACCATCTTGGAGGATGAAAAGAGGCTGGCCCACCCCTTCTCCTTGGCCGTCTTTGAGgtgtggcttacgtacgagatgcaagcacttaggtggcggatagacaca"
#target_sequence_pos = "GATCTCCTCAGTGGCCGCCTCTACTGGGTTGACTCCAAACTTCACTCCATCTCAAGCATCGATGTCAACGGGGGCAACCGGAAGACCATCTTGGAGGATGAAAAGAGGCTGGCCCACCCCTTCTCCTTGGCCGTCTTTGAG"
#tm_file = "ldlr_tm_values.txt"
#output_file = "ldlr_" + day + time + "_condensed"


# #NF2_james out

#sequence_pos = "CCagaataaaaagggcacagagctgctgcttggagtggatgccctggggCTTCACATTTATGACCCTGAGAACAGACTGACCCCCAAGATCTCCTTCCCGTGGAATGAAATCCGAAACATCTCGTACAGTGACAAGGAGgtaggacatgtgtgtactgcagatgggtccagcagatctttccctgtc"
#target_sequence_pos = "CTTCACATTTATGACCCTGAGAACAGACTGACCCCCAAGATCTCCTTCCCGTGGAATGAAATCCGAAACATCTCGTACAGTGACAAGGAG"
#tm_file = "nf2_tm_values.txt"
#output_file = "nf2_630_" + day + time + "_condensed"

#seq as of 081119
#sequence_pos = "gctgttcttattggatccacagaataaaaagggcacagagctgctgcttGGAGTGGATGCCCTGGGGCTTCACATTTATGACCCTGAGAACAGACTGACCCCCAAGATCTCCTTCCCGTGGAATGAAATCCGAAACATCTCGTACAGTGACAAGGAGgtaggacatgtgtgtactgcagatgggtccagcagatctttccctgtc"
#target_sequence_pos = "GGAGTGGATGCCCTGGGGCTTCACATTTATGACCCTGAGAACAGACTGACCCCCAAGATCTCCTTCCCGTGGAATGAAATCCGAAACATCTCGTACAGTGACAAGGAG"
#tm_file = "nf2_tm_values.txt"
#output_file = "nf2_" + day + time + "_condensed"


#target_sequence_pos = "CTTCACATTTATGACCCTGAGAACAGACTGACCCCCAAGATCTCCTTCCCGTGGAATGAAATCCGAAACATCTCGTACAGTGACAAGGAG"
print(find_str(sequence_pos, target_sequence_pos))





49


## 1) What gRNA should be used to target a particular codon for saturation mutagenesis? This can be determined based on which gRNA puts the codon closest to the nick site in the correct direction.

In [139]:
#returns leftmost G index in GG pair
def find_highest_acceptable_GG(sequence, position):
    #position of the identifier
    highest_index = 0 
    for i in range(position + 5):
        #print(i, sequence[i] + sequence[i+1])
        if (sequence[i] + sequence[i+1]) in ["GG", "gg", "gG", "Gg"]:
            highest_index = i
    return highest_index

#returns position of the left C in a CC pair 
def find_lowest_acceptable_CC(sequence, position):
    lowest_index = float('inf')
    for i in range(len(sequence) - 1 , position - 3 , -1):
        if (sequence[i - 1] + sequence[i]) in ["CC", "cc", "Cc", "cC"]:
            lowest_index = i 
    return lowest_index -1


print(find_lowest_acceptable_CC(sequence_pos, 100))
# print(find_highest_acceptable_GG(sequence_pos, 40))
# print(sequence_pos[40], sequence_pos[41], sequence_pos[42], sequence_pos[43], sequence_pos[44], sequence_pos[45])


99


In [140]:
def shift_position_dictionary(shift_size, dictionary):
    new_dict = copy.deepcopy(dictionary)
    for k,v in new_dict.items():
        new_list = list(map(lambda x: x + shift_size, v))
        new_dict.update({k : new_list})
    return new_dict

In [141]:
codons_positions = {}

for i in range(0,len(target_sequence_pos),3):
    codon = target_sequence_pos[i:i+3]
    if codon in codons_positions.keys():
        codons_positions.update({codon : codons_positions[codon] + [i]})
    else:
        codons_positions.update({codon : [i]})
        
print(codons_positions)

{'CGT': [0], 'GTT': [3], 'TGT': [6, 12], 'GCC': [9], 'CCT': [15, 66], 'GGG': [18, 60, 87], 'AGA': [21], 'GAC': [24], 'CGG': [27], 'CGC': [30, 51], 'ACA': [33], 'GAG': [36, 42, 63, 75], 'GAA': [39], 'AAT': [45], 'CTC': [48], 'AAG': [54, 96], 'AAA': [57], 'CAC': [69, 72], 'CTG': [78], 'CCC': [81], 'CCA': [84], 'AGC': [90], 'ACT': [93]}


In [142]:
def generate_pam_sites_for_codons(positions_dict, sequence):
    data_table = [] 
    data_table.append(["Codon", "Codon Position", "Pam Site", "Pam Site Position"])
    for k, v in positions_dict.items():
        for value in v:
            highest_GG = find_highest_acceptable_GG(sequence, value)
            lowest_CC = find_lowest_acceptable_CC(sequence, value)
            if value == 40:
                print("in the function, values comparing ", highest_GG , lowest_CC , value, ((highest_GG - 1) - value), abs(lowest_CC + 1 - value))
            if abs((highest_GG - 4) - value) < abs(lowest_CC + 5 - value):
                #NGG is the pam site we want
                data_table.append([k, value, sequence_pos[highest_GG: highest_GG+2], highest_GG - 1])
            else:
                 data_table.append([k, value, sequence_pos[lowest_CC :lowest_CC +2], lowest_CC + 1])
    return data_table




pam_sites_and_codons = generate_pam_sites_for_codons(shift_position_dictionary(find_str(sequence_pos, target_sequence_pos), codons_positions), sequence_pos)
print(pam_sites_and_codons)

def generate_primers_from_pam_sites(pam_sites_and_codons, sequence):
    data_table = []
    data_table.append(["codon", "codon_position", "pam_site", "pam_position", "non_extended_sequence", "pre_corrected_sequence", "extended_sequence", "extended_seq_length", "extended_beginning_position", "extended_end_position"])
    for row in pam_sites_and_codons:
        codon = row[0]
        codon_pos = row[1]
        pam_site = row[2]
        pam_position = row[3]
        if pam_site in ["cc", "CC", "cC", "Cc"]:
            derived_codon = sequence[codon_pos : pam_position + 3]
            derived_pam_site = sequence[pam_position : pam_position + 3]
            if pam_position  <= codon_pos:
                #non_extended = sequence[pam_position - 1 : codon_pos + 3]
                non_extended = sequence[pam_position - 1 : pam_position + 5]
                pre_corrected = sequence[codon_pos - 7 : pam_position + 5]
                extended = copy.deepcopy(pre_corrected)
                print("case 1 ", codon, pam_position, codon_pos, codon, non_extended, extended )
                end_pos = codon_pos - 7
            else:
                non_extended = sequence[codon_pos: pam_position + 5 ]
                #add the seven nucleotides
                pre_corrected = sequence[codon_pos - 7: pam_position + 5 ]
                extended = copy.deepcopy(pre_corrected)
                end_pos = codon_pos - 7
                print("case 2", codon, non_extended, pre_corrected)
#             if codon == "CCA":
#                 print("CCA", pam_position - 4, end_pos, pre_corrected, extended, end_pos, codon_pos, pam_position)
#                 print(sequence[end_pos - 5], sequence[end_pos - 4], sequence[end_pos - 3], sequence[end_pos - 2], sequence[end_pos - 1], sequence[end_pos], sequence[end_pos + 1], sequence[end_pos + 2])
            while extended[0] == 'C' or extended[0] == 'c':
                end_pos = end_pos - 1
                extended = sequence[end_pos] + extended
            #print("case 2", codon, codon_pos, pam_site, pam_position, non_extended, pre_corrected, extended, len(extended), find_str(sequence_pos, extended), find_str(sequence_pos, extended) + len(extended) - 1)
                #print(end_pos, sequence[end_pos], extended )
            #print(extended)
#             if codon == "CCA":
#                 print("CCA", pre_corrected, extended, end_pos, codon_pos, pam_position)
            data_table.append([codon, codon_pos, pam_site, pam_position, non_extended, pre_corrected, extended, len(extended), find_str(sequence_pos, extended), find_str(sequence_pos, extended) + len(extended) - 1])
            #print(codon, codon_pos, pam_site, pam_position, non_extended, pre_corrected, extended, len(extended), find_str(sequence_pos, extended), find_str(sequence_pos, extended) + len(extended) - 1)
        elif pam_site in ["gg", "GG", "gG", "Gg"]:
            if pam_position >= codon_pos:
                non_extended = sequence[pam_position - 3  : pam_position + 3]
                pre_corrected = sequence[pam_position - 3  : codon_pos + 10]
                extended = copy.deepcopy(pre_corrected)
                print("case 3" , codon, codon_pos, non_extended, pre_corrected)
            else:
                non_extended = sequence[pam_position - 3: codon_pos + 3]
                #add the 7 nucleotides 
                pre_corrected = sequence[pam_position - 3: codon_pos + 10]
                extended = copy.deepcopy(pre_corrected)
                print("case 4",  codon, codon_pos, non_extended, pre_corrected)
            while extended[len(extended) - 1] == "G" or extended[len(extended) -1] == "g":
                extended = extended + sequence[pam_position - 3 + len(extended)]
            #print("case 2" , pam_position, codon_pos, codon, non_extended, pre_corrected, extended)
            #print(codon, codon_pos, pam_site, pam_position, non_extended, pre_corrected, extended, len(extended), find_str(sequence_pos, extended), find_str(sequence_pos, extended) + len(extended) - 1)
            data_table.append([codon, codon_pos, pam_site, pam_position, non_extended,pre_corrected, extended, len(extended), find_str(sequence_pos, extended), find_str(sequence_pos, extended) + len(extended) - 1])
    return data_table
            
x = generate_primers_from_pam_sites(pam_sites_and_codons, sequence_pos)
# for row in x:
#     print(row)


#sequence_pos = "gaacagctttgaggtgcgtgtttgtgccTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAGCGAggtaagcaagcaggacaagaagcggtggag" 

old = [['TGT', 28, 'cc', 27], 
       ['CCT', 31, 'CC', 32], 
       ['CCT', 82, 'CC', 83], 
       ['GGG', 34, 'GG', 34], 
       ['GGG', 76, 'GG', 77], 
       ['GGG', 103, 'GG', 103], 
       ['AGA', 37, 'GG', 34], 
       ['GAC', 40, 'CC', 43], 
       ['CGG', 43, 'CC', 43], 
       ['CGC', 46, 'GG', 43], 
       ['CGC', 67, 'CC', 67], 
       ['ACA', 49, 'GG', 43], 
       ['GAG', 52, 'GG', 53], 
       ['GAG', 58, 'GG', 53], 
       ['GAG', 79, 'GG', 77], 
       ['GAG', 91, 'CC', 98], 
       ['GAA', 55, 'GG', 53], 
       ['AAT', 61, 'CC', 67], 
       ['CTC', 64, 'CC', 67], 
       ['AAG', 70, 'CC', 83], 
       ['AAG', 112, 'GG', 103], 
       ['AAA', 73, 'GG', 76], 
       ['CAC', 85, 'CC', 83], 
       ['CAC', 88, 'CC', 88], 
       ['CTG', 94, 'CC', 98], 
       ['CCC', 97, 'CC', 98], 
       ['CCA', 100, 'CC', 98], 
       ['AGC', 106, 'GG', 103], 
       ['ACT', 109, 'GG', 103], 
       ['CGA', 115, 'gg', 117]]


#sequence_pos = "gaacagctttgaggtgcgtgtttgtgccTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAGCGAggtaagcaagcaggacaagaagcggtggag" 



[['Codon', 'Codon Position', 'Pam Site', 'Pam Site Position'], ['CGT', 49, 'gg', 44], ['GTT', 52, 'gg', 44], ['TGT', 55, 'CC', 60], ['TGT', 61, 'CC', 60], ['GCC', 58, 'CC', 60], ['CCT', 64, 'GG', 67], ['CCT', 115, 'CC', 116], ['GGG', 67, 'CC', 65], ['GGG', 109, 'GG', 110], ['GGG', 136, 'CC', 134], ['AGA', 70, 'GG', 67], ['GAC', 73, 'GG', 76], ['CGG', 76, 'GG', 76], ['CGC', 79, 'GG', 76], ['CGC', 100, 'CC', 100], ['ACA', 82, 'GG', 76], ['GAG', 85, 'GG', 86], ['GAG', 91, 'GG', 86], ['GAG', 112, 'GG', 110], ['GAG', 124, 'CC', 131], ['GAA', 88, 'GG', 86], ['AAT', 94, 'CC', 100], ['CTC', 97, 'CC', 100], ['AAG', 103, 'CC', 116], ['AAG', 145, 'GG', 136], ['AAA', 106, 'GG', 109], ['CAC', 118, 'CC', 116], ['CAC', 121, 'CC', 121], ['CTG', 127, 'CC', 131], ['CCC', 130, 'CC', 131], ['CCA', 133, 'GG', 136], ['AGC', 139, 'GG', 136], ['ACT', 142, 'GG', 136]]
case 4 CGT 49 ttgaggtgCGT ttgaggtgCGTGTTTGTG
case 4 GTT 52 ttgaggtgCGTGTT ttgaggtgCGTGTTTGTGCCT
case 2 TGT TGTGCCTGTC gCGTGTTTGTGCCTGTC
case 1  

## Task 3) Generating all possible subs with all possible 10-15 nt primers, later to be filtered as we get the TM values for the extended primers

In [143]:
def generate_all_sequences_from_primer_info(amino_swaps, primer_info, sequence):
    table = [] 
    table.append(primer_info[0] + ["targeted_wt_codon", "subbed_codon", "length of addition", "addition_sequence", "altered_codon_seq", "altered_codon_seq_with_extension"])
    for row in primer_info[1:]:
        codon, codon_pos, pam_identifier, pam_position, extended_seq, extended_length, extended_start, extended_end = row[0], row[1], row[2], row[3], row[6], row[7], row[8], row[9]
        subs_dict = generate_codon_substitutions(codon)
        if pam_identifier in ['GG', 'gg', 'Gg', 'Gg']:
            for i in [10, 11, 12, 13, 14, 15]:
                #print(extended_seq, sequence[extended_start - i : extended_start])
                additional_nucleotides =  sequence[extended_start - i : extended_start]
#                 if pam_position >= codon_pos:
#                     print(codon, pam_position, codon_pos, extended_seq, extended_length, extended_start, extended_end, additional_nucleotides)
                for k,v in subs_dict.items():
                    altered_codon_seq = extended_seq[:codon_pos - extended_start] + v + extended_seq[codon_pos - extended_start + 3:]
                    #total_new_seq = additional_nucleotides + altered_codon_seq 
                    total_new_seq = additional_nucleotides + altered_codon_seq
                    print("GG", codon, codon_pos, additional_nucleotides, altered_codon_seq, total_new_seq)
                    
                    table.append(row + [codon, v, i, additional_nucleotides, altered_codon_seq, total_new_seq])
        elif pam_identifier in ["cc", "CC", "Cc", "cC"]:
            for i in [10, 11, 12, 13, 14, 15]:
                additional_nucleotides = sequence[extended_end + 1: extended_end + i + 1]
                #print(codon, pam_position, codon_pos, extended_seq, additional_nucleotides)
#                 if codon_pos in [31, 40]:
#                     print(extended_seq, additional_nucleotides)
                for k,v in subs_dict.items():
                    altered_codon_seq = extended_seq[:codon_pos - extended_start] + v + extended_seq[codon_pos - extended_start + 3:]
                    total_new_seq =  altered_codon_seq 
                    print("CC", codon_pos, altered_codon_seq, additional_nucleotides, total_new_seq)
                    #total_new_seq =  additional_nucleotides + altered_codon_seq
                    #print(codon, v, i, additional_nucleotides, altered_codon_seq, total_new_seq)
                    table.append(row + [codon, v, i, additional_nucleotides, altered_codon_seq, total_new_seq])
    return table

#sequence_pos = "gaacagctttgaggtgcgtgtttgtgccTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAGCGAggtaagcaagcaggacaagaagcggtggag" 


In [144]:
all_sequences = generate_all_sequences_from_primer_info(codons_positions, x, sequence_pos)

GG CGT 49 cggaacagct ttgaggtgATCGTTTGTGC cggaacagctttgaggtgATCGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgATGGTTTGTGC cggaacagctttgaggtgATGGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgACCGTTTGTGC cggaacagctttgaggtgACCGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgAACGTTTGTGC cggaacagctttgaggtgAACGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgAAGGTTTGTGC cggaacagctttgaggtgAAGGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgTCCGTTTGTGC cggaacagctttgaggtgTCCGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgAGAGTTTGTGC cggaacagctttgaggtgAGAGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgTTGGTTTGTGC cggaacagctttgaggtgTTGGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgCCCGTTTGTGC cggaacagctttgaggtgCCCGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgCACGTTTGTGC cggaacagctttgaggtgCACGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgGTGGTTTGTGC cggaacagctttgaggtgGTGGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgGCCGTTTGTGC cggaacagctttgaggtgGCCGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgGACGTTTGTGC cggaacagctttgaggtgGACGTTTGTGC
GG CGT 49 cggaacagct ttgaggtgGAGGTTTGTGC cggaacagctttgaggtgGAGGTTTGTGC
GG CGT

GG GTT 52 tgggacggaacagct ttgaggtgCGTAGCTGTGCCT tgggacggaacagctttgaggtgCGTAGCTGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTAGATGTGCCT tgggacggaacagctttgaggtgCGTAGATGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTTTGTGTGCCT tgggacggaacagctttgaggtgCGTTTGTGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTCCCTGTGCCT tgggacggaacagctttgaggtgCGTCCCTGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTCACTGTGCCT tgggacggaacagctttgaggtgCGTCACTGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTGTGTGTGCCT tgggacggaacagctttgaggtgCGTGTGTGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTGCCTGTGCCT tgggacggaacagctttgaggtgCGTGCCTGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTGACTGTGCCT tgggacggaacagctttgaggtgCGTGACTGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTGAGTGTGCCT tgggacggaacagctttgaggtgCGTGAGTGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTGGCTGTGCCT tgggacggaacagctttgaggtgCGTGGCTGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTTTCTGTGCCT tgggacggaacagctttgaggtgCGTTTCTGTGCCT
GG GTT 52 tgggacggaacagct ttgaggtgCGTTACTGTGCCT tgggacggaacagcttt

GG CCT 64 CGTGTTTGTGCCTGT ACCGGGAGAGA CGTGTTTGTGCCTGTACCGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT AACGGGAGAGA CGTGTTTGTGCCTGTAACGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT AAGGGGAGAGA CGTGTTTGTGCCTGTAAGGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT AGCGGGAGAGA CGTGTTTGTGCCTGTAGCGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT AGAGGGAGAGA CGTGTTTGTGCCTGTAGAGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT TTGGGGAGAGA CGTGTTTGTGCCTGTTTGGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT CCCGGGAGAGA CGTGTTTGTGCCTGTCCCGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT CACGGGAGAGA CGTGTTTGTGCCTGTCACGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT GTGGGGAGAGA CGTGTTTGTGCCTGTGTGGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT GCCGGGAGAGA CGTGTTTGTGCCTGTGCCGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT GACGGGAGAGA CGTGTTTGTGCCTGTGACGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT GAGGGGAGAGA CGTGTTTGTGCCTGTGAGGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT GGCGGGAGAGA CGTGTTTGTGCCTGTGGCGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT TTCGGGAGAGA CGTGTTTGTGCCTGTTTCGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT TACGGGAGAGA CGTGTTTGTGCCTGTTACGGGAGAGA
GG CCT 64 CGTGTTTGTGCCTGT

CC 136 GCCCCCAATG AGCACTAAGcgagg GCCCCCAATG
CC 136 GCCCCCAACC AGCACTAAGcgagg GCCCCCAACC
CC 136 GCCCCCAAAT AGCACTAAGcgagg GCCCCCAAAT
CC 136 GCCCCCAAAA AGCACTAAGcgagg GCCCCCAAAA
CC 136 GCCCCCATCC AGCACTAAGcgagg GCCCCCATCC
CC 136 GCCCCCAAGA AGCACTAAGcgagg GCCCCCAAGA
CC 136 GCCCCCATTA AGCACTAAGcgagg GCCCCCATTA
CC 136 GCCCCCACCT AGCACTAAGcgagg GCCCCCACCT
CC 136 GCCCCCACAC AGCACTAAGcgagg GCCCCCACAC
CC 136 GCCCCCAGTC AGCACTAAGcgagg GCCCCCAGTC
CC 136 GCCCCCAGCC AGCACTAAGcgagg GCCCCCAGCC
CC 136 GCCCCCAGAC AGCACTAAGcgagg GCCCCCAGAC
CC 136 GCCCCCAGAA AGCACTAAGcgagg GCCCCCAGAA
CC 136 GCCCCCAGGC AGCACTAAGcgagg GCCCCCAGGC
CC 136 GCCCCCATTC AGCACTAAGcgagg GCCCCCATTC
CC 136 GCCCCCATAC AGCACTAAGcgagg GCCCCCATAC
CC 136 GCCCCCATAA AGCACTAAGcgagg GCCCCCATAA
CC 136 GCCCCCATGG AGCACTAAGcgagg GCCCCCATGG
CC 136 GCCCCCACAA AGCACTAAGcgagg GCCCCCACAA
CC 136 GCCCCCATGC AGCACTAAGcgagg GCCCCCATGC
CC 136 GCCCCCAATC AGCACTAAGcgaggt GCCCCCAATC
CC 136 GCCCCCAATG AGCACTAAGcgaggt GCCCCCAATG
CC 136 GCCCCCAACC AGCACTAAGcga

GG CGC 79 TGTCCTGGGAGA GACCGGTTTACAGAGGA TGTCCTGGGAGAGACCGGTTTACAGAGGA
GG CGC 79 TGTCCTGGGAGA GACCGGTATACAGAGGA TGTCCTGGGAGAGACCGGTATACAGAGGA
GG CGC 79 TGTCCTGGGAGA GACCGGTAAACAGAGGA TGTCCTGGGAGAGACCGGTAAACAGAGGA
GG CGC 79 TGTCCTGGGAGA GACCGGTGGACAGAGGA TGTCCTGGGAGAGACCGGTGGACAGAGGA
GG CGC 79 TGTCCTGGGAGA GACCGGCAGACAGAGGA TGTCCTGGGAGAGACCGGCAGACAGAGGA
GG CGC 79 TGTCCTGGGAGA GACCGGTGTACAGAGGA TGTCCTGGGAGAGACCGGTGTACAGAGGA
GG CGC 79 CTGTCCTGGGAGA GACCGGATTACAGAGGA CTGTCCTGGGAGAGACCGGATTACAGAGGA
GG CGC 79 CTGTCCTGGGAGA GACCGGATGACAGAGGA CTGTCCTGGGAGAGACCGGATGACAGAGGA
GG CGC 79 CTGTCCTGGGAGA GACCGGACAACAGAGGA CTGTCCTGGGAGAGACCGGACAACAGAGGA
GG CGC 79 CTGTCCTGGGAGA GACCGGAATACAGAGGA CTGTCCTGGGAGAGACCGGAATACAGAGGA
GG CGC 79 CTGTCCTGGGAGA GACCGGAAGACAGAGGA CTGTCCTGGGAGAGACCGGAAGACAGAGGA
GG CGC 79 CTGTCCTGGGAGA GACCGGTCTACAGAGGA CTGTCCTGGGAGAGACCGGTCTACAGAGGA
GG CGC 79 CTGTCCTGGGAGA GACCGGAGAACAGAGGA CTGTCCTGGGAGAGACCGGAGAACAGAGGA
GG CGC 79 CTGTCCTGGGAGA GACCGGTTGACAGAGGA CTGTCCTGGGAGAGACCGGTT

GG GAG 85 GAGAGACCGGCGCA CAGAAGAAGAGA GAGAGACCGGCGCACAGAAGAAGAGA
GG GAG 85 GAGAGACCGGCGCA CAGGCGAAGAGA GAGAGACCGGCGCACAGGCGAAGAGA
GG GAG 85 GAGAGACCGGCGCA CATTCGAAGAGA GAGAGACCGGCGCACATTCGAAGAGA
GG GAG 85 GAGAGACCGGCGCA CATACGAAGAGA GAGAGACCGGCGCACATACGAAGAGA
GG GAG 85 GAGAGACCGGCGCA CATGAGAAGAGA GAGAGACCGGCGCACATGAGAAGAGA
GG GAG 85 GAGAGACCGGCGCA CATGGGAAGAGA GAGAGACCGGCGCACATGGGAAGAGA
GG GAG 85 GAGAGACCGGCGCA CACAAGAAGAGA GAGAGACCGGCGCACACAAGAAGAGA
GG GAG 85 GAGAGACCGGCGCA CATGCGAAGAGA GAGAGACCGGCGCACATGCGAAGAGA
GG GAG 85 GGAGAGACCGGCGCA CAATCGAAGAGA GGAGAGACCGGCGCACAATCGAAGAGA
GG GAG 85 GGAGAGACCGGCGCA CAATGGAAGAGA GGAGAGACCGGCGCACAATGGAAGAGA
GG GAG 85 GGAGAGACCGGCGCA CAACCGAAGAGA GGAGAGACCGGCGCACAACCGAAGAGA
GG GAG 85 GGAGAGACCGGCGCA CAAATGAAGAGA GGAGAGACCGGCGCACAAATGAAGAGA
GG GAG 85 GGAGAGACCGGCGCA CAAAAGAAGAGA GGAGAGACCGGCGCACAAAAGAAGAGA
GG GAG 85 GGAGAGACCGGCGCA CAAGCGAAGAGA GGAGAGACCGGCGCACAAGCGAAGAGA
GG GAG 85 GGAGAGACCGGCGCA CAAGAGAAGAGA GGAGAGACCGGCGCACAAGAGAAGAGA
GG GAG 85 G

CC 124 TCACCACTGGCTGCCCCCA GGGAGCACTAAGc TCACCACTGGCTGCCCCCA
CC 124 TCACCACCAACTGCCCCCA GGGAGCACTAAGc TCACCACCAACTGCCCCCA
CC 124 TCACCACTGCCTGCCCCCA GGGAGCACTAAGc TCACCACTGCCTGCCCCCA
CC 124 TCACCACATCCTGCCCCCA GGGAGCACTAAGcg TCACCACATCCTGCCCCCA
CC 124 TCACCACATGCTGCCCCCA GGGAGCACTAAGcg TCACCACATGCTGCCCCCA
CC 124 TCACCACACCCTGCCCCCA GGGAGCACTAAGcg TCACCACACCCTGCCCCCA
CC 124 TCACCACAATCTGCCCCCA GGGAGCACTAAGcg TCACCACAATCTGCCCCCA
CC 124 TCACCACAAACTGCCCCCA GGGAGCACTAAGcg TCACCACAAACTGCCCCCA
CC 124 TCACCACAGCCTGCCCCCA GGGAGCACTAAGcg TCACCACAGCCTGCCCCCA
CC 124 TCACCACAGACTGCCCCCA GGGAGCACTAAGcg TCACCACAGACTGCCCCCA
CC 124 TCACCACTTACTGCCCCCA GGGAGCACTAAGcg TCACCACTTACTGCCCCCA
CC 124 TCACCACCCTCTGCCCCCA GGGAGCACTAAGcg TCACCACCCTCTGCCCCCA
CC 124 TCACCACCACCTGCCCCCA GGGAGCACTAAGcg TCACCACCACCTGCCCCCA
CC 124 TCACCACGTCCTGCCCCCA GGGAGCACTAAGcg TCACCACGTCCTGCCCCCA
CC 124 TCACCACGCCCTGCCCCCA GGGAGCACTAAGcg TCACCACGCCCTGCCCCCA
CC 124 TCACCACGACCTGCCCCCA GGGAGCACTAAGcg TCACCACGACCTGCCCCCA
CC 124 TCAC

CC 97 AGAGAATTCTCGCAA GAAAGGGGAG AGAGAATTCTCGCAA
CC 97 AGAGAATAGACGCAA GAAAGGGGAG AGAGAATAGACGCAA
CC 97 AGAGAATTTGCGCAA GAAAGGGGAG AGAGAATTTGCGCAA
CC 97 AGAGAATCCTCGCAA GAAAGGGGAG AGAGAATCCTCGCAA
CC 97 AGAGAATCATCGCAA GAAAGGGGAG AGAGAATCATCGCAA
CC 97 AGAGAATGTGCGCAA GAAAGGGGAG AGAGAATGTGCGCAA
CC 97 AGAGAATGCTCGCAA GAAAGGGGAG AGAGAATGCTCGCAA
CC 97 AGAGAATGATCGCAA GAAAGGGGAG AGAGAATGATCGCAA
CC 97 AGAGAATGAGCGCAA GAAAGGGGAG AGAGAATGAGCGCAA
CC 97 AGAGAATGGACGCAA GAAAGGGGAG AGAGAATGGACGCAA
CC 97 AGAGAATTTTCGCAA GAAAGGGGAG AGAGAATTTTCGCAA
CC 97 AGAGAATTATCGCAA GAAAGGGGAG AGAGAATTATCGCAA
CC 97 AGAGAATTGACGCAA GAAAGGGGAG AGAGAATTGACGCAA
CC 97 AGAGAATTGGCGCAA GAAAGGGGAG AGAGAATTGGCGCAA
CC 97 AGAGAATCAGCGCAA GAAAGGGGAG AGAGAATCAGCGCAA
CC 97 AGAGAATTGTCGCAA GAAAGGGGAG AGAGAATTGTCGCAA
CC 97 AGAGAATATTCGCAA GAAAGGGGAGC AGAGAATATTCGCAA
CC 97 AGAGAATATGCGCAA GAAAGGGGAGC AGAGAATATGCGCAA
CC 97 AGAGAATACACGCAA GAAAGGGGAGC AGAGAATACACGCAA
CC 97 AGAGAATAATCGCAA GAAAGGGGAGC AGAGAATAATCGCAA
CC 97 AGAGAATAAG

GG AAA 106 GAATCTCCGCAAG CGGGGGGAGC GAATCTCCGCAAGCGGGGGGAGC
GG AAA 106 GAATCTCCGCAAG TTGGGGGAGC GAATCTCCGCAAGTTGGGGGAGC
GG AAA 106 GAATCTCCGCAAG CCTGGGGAGC GAATCTCCGCAAGCCTGGGGAGC
GG AAA 106 GAATCTCCGCAAG CACGGGGAGC GAATCTCCGCAAGCACGGGGAGC
GG AAA 106 GAATCTCCGCAAG GTGGGGGAGC GAATCTCCGCAAGGTGGGGGAGC
GG AAA 106 GAATCTCCGCAAG GCCGGGGAGC GAATCTCCGCAAGGCCGGGGAGC
GG AAA 106 GAATCTCCGCAAG GACGGGGAGC GAATCTCCGCAAGGACGGGGAGC
GG AAA 106 GAATCTCCGCAAG GAGGGGGAGC GAATCTCCGCAAGGAGGGGGAGC
GG AAA 106 GAATCTCCGCAAG GGCGGGGAGC GAATCTCCGCAAGGGCGGGGAGC
GG AAA 106 GAATCTCCGCAAG TTCGGGGAGC GAATCTCCGCAAGTTCGGGGAGC
GG AAA 106 GAATCTCCGCAAG TACGGGGAGC GAATCTCCGCAAGTACGGGGAGC
GG AAA 106 GAATCTCCGCAAG TGAGGGGAGC GAATCTCCGCAAGTGAGGGGAGC
GG AAA 106 GAATCTCCGCAAG TGGGGGGAGC GAATCTCCGCAAGTGGGGGGAGC
GG AAA 106 GAATCTCCGCAAG CAGGGGGAGC GAATCTCCGCAAGCAGGGGGAGC
GG AAA 106 GAATCTCCGCAAG TGCGGGGAGC GAATCTCCGCAAGTGCGGGGAGC
GG AAA 106 AGAATCTCCGCAAG ATCGGGGAGC AGAATCTCCGCAAGATCGGGGAGC
GG AAA 106 AGAATCTCCGCAAG ATGGGGGAGC A

CC 130 ACGAGCTGAGACCA GGGAGCACTAAG ACGAGCTGAGACCA
CC 130 ACGAGCTGTTGCCA GGGAGCACTAAG ACGAGCTGTTGCCA
CC 130 ACGAGCTGCCTCCA GGGAGCACTAAG ACGAGCTGCCTCCA
CC 130 ACGAGCTGCATCCA GGGAGCACTAAG ACGAGCTGCATCCA
CC 130 ACGAGCTGGTGCCA GGGAGCACTAAG ACGAGCTGGTGCCA
CC 130 ACGAGCTGGCTCCA GGGAGCACTAAG ACGAGCTGGCTCCA
CC 130 ACGAGCTGGATCCA GGGAGCACTAAG ACGAGCTGGATCCA
CC 130 ACGAGCTGGAGCCA GGGAGCACTAAG ACGAGCTGGAGCCA
CC 130 ACGAGCTGGGACCA GGGAGCACTAAG ACGAGCTGGGACCA
CC 130 ACGAGCTGTTTCCA GGGAGCACTAAG ACGAGCTGTTTCCA
CC 130 ACGAGCTGTATCCA GGGAGCACTAAG ACGAGCTGTATCCA
CC 130 ACGAGCTGTGACCA GGGAGCACTAAG ACGAGCTGTGACCA
CC 130 ACGAGCTGTGGCCA GGGAGCACTAAG ACGAGCTGTGGCCA
CC 130 ACGAGCTGCAGCCA GGGAGCACTAAG ACGAGCTGCAGCCA
CC 130 ACGAGCTGTGTCCA GGGAGCACTAAG ACGAGCTGTGTCCA
CC 130 ACGAGCTGATTCCA GGGAGCACTAAGc ACGAGCTGATTCCA
CC 130 ACGAGCTGATGCCA GGGAGCACTAAGc ACGAGCTGATGCCA
CC 130 ACGAGCTGACACCA GGGAGCACTAAGc ACGAGCTGACACCA
CC 130 ACGAGCTGAATCCA GGGAGCACTAAGc ACGAGCTGAATCCA
CC 130 ACGAGCTGAAGCCA GGGAGCACTAAGc ACGAGCTGAA

GG ACT 142 CGAGCTGCCC CCAGGGAGCGAGAAGcgaggt CGAGCTGCCCCCAGGGAGCGAGAAGcgaggt
GG ACT 142 CGAGCTGCCC CCAGGGAGCGGCAAGcgaggt CGAGCTGCCCCCAGGGAGCGGCAAGcgaggt
GG ACT 142 CGAGCTGCCC CCAGGGAGCTTCAAGcgaggt CGAGCTGCCCCCAGGGAGCTTCAAGcgaggt
GG ACT 142 CGAGCTGCCC CCAGGGAGCTACAAGcgaggt CGAGCTGCCCCCAGGGAGCTACAAGcgaggt
GG ACT 142 CGAGCTGCCC CCAGGGAGCTGAAAGcgaggt CGAGCTGCCCCCAGGGAGCTGAAAGcgaggt
GG ACT 142 CGAGCTGCCC CCAGGGAGCTGGAAGcgaggt CGAGCTGCCCCCAGGGAGCTGGAAGcgaggt
GG ACT 142 CGAGCTGCCC CCAGGGAGCCAGAAGcgaggt CGAGCTGCCCCCAGGGAGCCAGAAGcgaggt
GG ACT 142 CGAGCTGCCC CCAGGGAGCTGCAAGcgaggt CGAGCTGCCCCCAGGGAGCTGCAAGcgaggt
GG ACT 142 ACGAGCTGCCC CCAGGGAGCATCAAGcgaggt ACGAGCTGCCCCCAGGGAGCATCAAGcgaggt
GG ACT 142 ACGAGCTGCCC CCAGGGAGCATGAAGcgaggt ACGAGCTGCCCCCAGGGAGCATGAAGcgaggt
GG ACT 142 ACGAGCTGCCC CCAGGGAGCACCAAGcgaggt ACGAGCTGCCCCCAGGGAGCACCAAGcgaggt
GG ACT 142 ACGAGCTGCCC CCAGGGAGCAACAAGcgaggt ACGAGCTGCCCCCAGGGAGCAACAAGcgaggt
GG ACT 142 ACGAGCTGCCC CCAGGGAGCAAGAAGcgaggt ACGAGCTGCCCCCAGGGAGCAAGAAGcgaggt
GG

In [145]:
all_codon_positions = {}
all_codon_positions_list = []
for i in range(0, len(sequence_pos), 3):
    all_codon_positions.update({i + 1 : sequence_pos[i+1:i+4]})
    all_codon_positions_list.append(i + 1)
    
print(all_codon_positions)

{1: 'cct', 4: 'atc', 7: 'ctg', 10: 'agt', 13: 'agt', 16: 'ggt', 19: 'aat', 22: 'cta', 25: 'ctg', 28: 'gga', 31: 'cgg', 34: 'aac', 37: 'agc', 40: 'ttt', 43: 'gag', 46: 'gtg', 49: 'CGT', 52: 'GTT', 55: 'TGT', 58: 'GCC', 61: 'TGT', 64: 'CCT', 67: 'GGG', 70: 'AGA', 73: 'GAC', 76: 'CGG', 79: 'CGC', 82: 'ACA', 85: 'GAG', 88: 'GAA', 91: 'GAG', 94: 'AAT', 97: 'CTC', 100: 'CGC', 103: 'AAG', 106: 'AAA', 109: 'GGG', 112: 'GAG', 115: 'CCT', 118: 'CAC', 121: 'CAC', 124: 'GAG', 127: 'CTG', 130: 'CCC', 133: 'CCA', 136: 'GGG', 139: 'AGC', 142: 'ACT', 145: 'AAG', 148: 'cga', 151: 'ggt', 154: 'aag', 157: 'caa', 160: 'gca', 163: 'gga', 166: 'caa', 169: 'gaa', 172: 'gcg', 175: 'gtg', 178: 'gag', 181: 'gag', 184: 'acc', 187: 'aag', 190: 'ggt', 193: 'gca', 196: ''}


## 4) Identifying pam sites and codons to change either in the seed or in the Binding site

In [146]:
pam_g_codons = ["AGG", "TGG", "GGG", "CGG"]
pam_c_codons = ["CCA", "CCG", "CCC", "CCT"]



def identify_pam_codon_and_swap(primer_info, sequence):
    table = [] 
    table.append(primer_info[0] + ["acceptable_swap_found", "alters_splice_site", "codon_to_swap", "new_codon", "codon_position"])
    for row in primer_info:
        codon, codon_pos, pam_identifier, pam_position, extended_seq, extended_length, extended_start, extended_end = row[0], row[1], row[2], row[3], row[6], row[7], row[8], row[9]
        if codon_pos == 120:
            print(codon, pam_identifier, pam_position)
        if pam_identifier in ["GG", "gg", "Gg", "gG"]:
            #print(pam_position, codon_pos)
            #print(sequence[pam_position], sequence[pam_position + 1], sequence[pam_position + 2])
            if codon_pos == 121:
                print("GG in here ", codon_pos, codon)
            if pam_position + 1 >= codon_pos:
                table.append(row + ["N/A", "N/A", "", "", ""])
            else:
                if pam_position in all_codon_positions_list:
                    # entire pam site is in a codon ---> /XXX~NGG
                    pam_codon = all_codon_positions[pam_position]
                    next_codon = all_codon_positions[pam_position + 3]
                    seed_codon = all_codon_positions[pam_position - 3]
                    synonymous_pam_codons = aa_to_codons[codon_to_aa[str.upper(pam_codon)]]
                    synonymous_seed_codons = aa_to_codons[codon_to_aa[str.upper(seed_codon)]]
                    found = False
                    for c in synonymous_pam_codons:
                        if not found:
                            if c.upper() not in ["GGA", "CGA", "TGA", "AGA", "AGG", "TGG", "CGG", "GGG"] and c.upper() != pam_codon.upper() :
                                found = True
                                table.append(row + ["True", "True", pam_codon, c, pam_position])
                                break
                    if not found:
                        for s in synonymous_seed_codons:
                            if s.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_g_codons and not found and s.upper() != seed_codon.upper() :
                                found = True
                                table.append(row + ["True", "False", seed_codon, s, pam_position - 3])
                                break
                    if not found:
                        table.append(row + ["False", "", "", "", ""])
                elif pam_position - 1 in all_codon_positions_list:
                    # GG nesecarilly falls in the next codon ---> X/XX~XNG~GXX 
                    pam_codon = all_codon_positions[pam_position - 1]
                    next_codon = all_codon_positions[pam_position + 2]
                    seed_codon = all_codon_positions[pam_position - 4]
                    synonymous_pam_codons = aa_to_codons[codon_to_aa[str.upper(pam_codon)]]
                    synonymous_seed_codons = aa_to_codons[codon_to_aa[str.upper(seed_codon)]]
                    synonymous_next_codons = aa_to_codons[codon_to_aa[str.upper(next_codon)]]
                    found = False
                    for p in synonymous_pam_codons:
                        if not found:
                            if p.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_g_codons  and p[2].lower() != "g" and p.upper() != pam_codon.upper():
                                #print("case 1", pam_codon, p)
                                table.append(row + ["True", "True", pam_codon, p, pam_position - 1])
                                found = True
                    if not found:
                        for n in synonymous_next_codons:
                            if not found and n.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_g_codons and n[0].lower() != 'g' and n.upper() != next_codon.upper():
                                found = True 
                                table.append(row + ["True", "True", next_codon, n, pam_position + 2])
                    if not found:
                        for s in synonymous_seed_codons:
                            if not found and s.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_g_codons and s.upper() != seed_codon.upper():
                                found = True
                                table.append(row + ["True", "False", seed_codon, s, pam_position - 4])
                    if not found:
                        table.append(row + ["False", "", "", "", ""])
                                #print("case 2", next_codon, n)
                    #print(seed_codon, pam_codon, next_codon)
                elif pam_position - 2 in all_codon_positions_list:
                    # GG split across two codons ---> XX/X~XXN~GGX
                    pam_codon = all_codon_positions[pam_position - 2]
                    next_codon = all_codon_positions[pam_position + 1]
                    seed_codon = all_codon_positions[pam_position - 5]
                    synonymous_pam_codons = aa_to_codons[codon_to_aa[str.upper(pam_codon)]]
                    synonymous_seed_codons = aa_to_codons[codon_to_aa[str.upper(seed_codon)]]
                    synonymous_next_codons = aa_to_codons[codon_to_aa[str.upper(next_codon)]]
                    found = False
                    for n in synonymous_next_codons:
                        if not found and n.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_g_codons  and n[:2].lower() != "gg" and n.upper() != next_codon.upper():
                            found = True
                            table.append(row + ["True", "True", next_codon, n, pam_position + 1])
                            break
                    if not found:
                        for p in synonymous_seed_codons:
                            print("changing seed ", codon)
                            if p.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_g_codons and not found and p.upper() != seed_codon.upper():
                                table.append(row + ["True", "False", seed_codon, p, pam_position - 5])
                                found = True
                                break
                    if not found:
                        table.append(row + ["False", "", "", "", ""])

        elif pam_identifier in ["CC", "cc", "Cc", "cC"]:
            # pam identifier refers to the position of the rightmost "C"
            # print(pam_position, sequence[pam_position - 1])
            # print(codon, codon_pos, sequence[pam_position-1 : pam_position + 2])
            if pam_position - 3 <= codon_pos:
                table.append(row + ["N/A", "N/A", "", "", ""])
            else:
                if (pam_position - 1) in all_codon_positions_list:
                    # entire pam site is a codon CCN~XXX/
                    next_codon = all_codon_positions[pam_position - 1]
                    next_codon = all_codon_positions[pam_position + 2]
                    #print(pam_codon, next_codon)
                    synonymous_pam_codons = aa_to_codons[codon_to_aa[str.upper(pam_codon)]]
                    synonymous_next_codons = aa_to_codons[codon_to_aa[str.upper(next_codon)]]
                    found = False
                    for p in synonymous_pam_codons:
                        if not found:
                            #changes splice site, use this one!
                            if p.upper() not in ["GGA", "CGA", "TGA", "AGA", "CCA", "CCC", "CCT", "CCG"] and p.upper() != pam_codon.upper():
                                found = True 
                                table.append(row + ["True", "True", pam_codon, p, pam_position - 1])
                                break
                    if not found:
                        for n in synonymous_next_codons:
                            if n.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_c_codons and not found and n.upper() != next_codon.upper():
                                found = True
                                table.append(row + ["True", "False", next_codon, n, pam_position + 2])
                                break
                    if not found:
                        table.append(row + ["False", "", "", "", ""])

                elif pam_position in all_codon_positions_list:
                    ## XXC~CNX~XX/X
                    seed_codon = all_codon_positions[pam_position + 3]
                    pam_codon = all_codon_positions[pam_position]
                    prev_codon = all_codon_positions[pam_position - 3]
                    #print(prev_codon, pam_codon, seed_codon)
                    synonymous_pam_codons = aa_to_codons[codon_to_aa[str.upper(pam_codon)]]
                    synonymous_seed_codons = aa_to_codons[codon_to_aa[str.upper(seed_codon)]]
                    synonymous_prev_codons = aa_to_codons[codon_to_aa[str.upper(prev_codon)]]
                    found = False 
                    for p in synonymous_prev_codons:
                        if not found and p.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_c_codons and p[2].lower() != "c" and p.upper() != prev_codon.upper():
                            found = True
                            table.append(row + ["True", "True", prev_codon, p, pam_position - 3])
                            break
                    if not found:
                        for pm in synonymous_pam_codons:
                            if not found and pm.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_c_codons and pm[0].lower() != "c" and pm.upper() != pam_codon.upper():
                                found = True 
                                table.append(row + ["True", "True", pam_codon, pm, pam_position])
                                break 
                    if not found:
                        for s in synonymous_seed_codons:
                            if not found and s.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_c_codons and s.upper() != seed_codon.upper():
                                print("CC cnanging seed ", codon)
                                found = True 
                                table.append(row + ["True", "False", seed_codon, s, pam_position + 3])
                                break 
                    if not found:
                        table.append(row + ["False", "", "", "", ""])
                elif pam_position + 1 in all_codon_positions_list:
                    ## XCC~NXX~XXX
                    seed_codon = all_codon_positions[pam_position + 4]
                    pam_codon = all_codon_positions[pam_position +1]
                    prev_codon = all_codon_positions[pam_position - 2]
                    #print(prev_codon, pam_codon, seed_codon)
                    synonymous_pam_codons = aa_to_codons[codon_to_aa[str.upper(pam_codon)]]
                    synonymous_seed_codons = aa_to_codons[codon_to_aa[str.upper(seed_codon)]]
                    synonymous_prev_codons = aa_to_codons[codon_to_aa[str.upper(prev_codon)]]
                    found = False 
                    for p in synonymous_prev_codons:
                        if not found and p.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_c_codons and p[:2].lower() != "cc" and p.upper() != prev_codon.upper():
                            table.append(row + ["True", "True", prev_codon, p, pam_position - 2])
                            found = True 
                            break
                    if not found:
                        for s in synonymous_seed_codons:
                            if  s.upper() not in ["GGA", "CGA", "TGA", "AGA"] + pam_c_codons and s.upper() != seed_codon.upper():
                                print("CC cnanging seed ", codon)
                                found = True
                                table.append(row + ["True", "False", seed_codon, s, pam_position + 4])
                                break
                    if not found:
                        table.append(row + ["False", "", "", "", ""])
    return table     
result = identify_pam_codon_and_swap(all_sequences, sequence_pos)    
print(len(result))
# for i in range(len(result[0])):
#     print(i, result[0][i])

4159


In [147]:
print("AAAAA"[:2])

AA


## Inserting the swapped codons if possible (task 4)

In [148]:
### testing to see if generated site is the same as the actual pam site 

def append_swapped_pam_codon_if_applicable(swaps_table):
    table2 = []
    table2.append(swaps_table[0] + ["altered_pam_site_sequence"])
    for row in swaps_table[1:]:
        codon, codon_pos = row[0], row[1]
        if row[16] == "True":
            addition_sequence = row[13]
            if row[2] in ["GG", "gg", "gG", "Gg"]:
                if row[20] - row[8] < 0:
                    non_addition_sequence = copy.deepcopy(row[15][row[12] + (row[20] - row[8]):])
                    altered_pam_site = row[19] + non_addition_sequence[3:]
                    altered_pam_site2 = altered_pam_site[abs((row[20] - row[8])):]
                    print(altered_pam_site2)
                    table2.append(row + [altered_pam_site2])
                else:
                    non_addition_sequence = copy.deepcopy(row[15][row[12]:])
                    #print("GG", codon, non_addition_sequence[:row[20] - row[8]], row[19], non_addition_sequence[row[20] - row[8] + 3 :])

                    #row 20 is new codon position
                    #row 8 is extended_beginning_position
                    #row 15 is altered_codon_seq_with_extension
                    #row 12 is length of addition
                    altered_pam_site = non_addition_sequence[:row[20] - row[8]] + row[19] + non_addition_sequence[row[20] - row[8] + 3 :] 
                    table2.append(row + [altered_pam_site])
            elif row[2] in ["CC", "cc", "Cc", "cC"]:
                non_addition_sequence = copy.deepcopy(row[15][:-1*row[12]])
                non_addition_sequence = copy.deepcopy(row[15])
                altered_pam_site = non_addition_sequence[:row[20] - row[8]] + row[19] + non_addition_sequence[row[20] - row[8] + 3 :]
#                 if codon_pos == 52:
#                     print(codon, row[15])
#                     print(altered_pam_site, row[18], row[19])
                table2.append(row + [altered_pam_site])
        else:
            if row[2] in ["GG", "gg", "gG", "Gg"]:
                non_addition_sequence = copy.deepcopy(row[15][row[12]:])
                table2.append(row + [non_addition_sequence])
            else:
                non_addition_sequence = copy.deepcopy(row[15])
                table2.append(row + [non_addition_sequence])
    return table2   



In [149]:
final_result = append_swapped_pam_codon_if_applicable(result)
print(len(final_result))

4159


In [150]:
with open("working_final.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(final_result)

In [152]:
with open("out5.fasta", "w", newline="") as f2:
    writer = csv.writer(f2)
    seen = set()
    for row in result[1:]: 
        seq_id = ">" + str(row[0]) + "_" + str(row[1]) + "_" + str(row[12]) + '\n'
        if seq_id not in seen:
            f2.write(seq_id)
            f2.write(str(row[13]) + '\n')
            seen.add(seq_id)

## Combining info with the table generated from the TM calculator 

In [153]:
## read the tp53 file

tp_file = open(tm_file, "r")

tp53_lines =  tp_file.readlines()
tp_53_best_temp_dict = {}
print(len(tp53_lines))
for line in tp53_lines[1:]:
    line = line.split("\t")
    position_info = line[0].split("_")
    codon, position, length = position_info[0], int(position_info[1]), position_info[2]
    temp = float(line[2])
    primer = line[1].strip()
    if position in tp_53_best_temp_dict.keys():
        if abs(temp - 47) < abs(tp_53_best_temp_dict[position][0] - 47):
            tp_53_best_temp_dict.update({position : [temp, codon, primer, length]})
    else:
        tp_53_best_temp_dict.update({position : [temp, codon, primer, length]})

print(tp_53_best_temp_dict)

199
{49: [49.9, 'CGT', 'ggacggaacagct', '13 '], 52: [49.9, 'GTT', 'ggacggaacagct', '13 '], 55: [50.4, 'TGT', 'ctgggagagaccg', '13 '], 61: [50.4, 'TGT', 'ctgggagagaccg', '13 '], 58: [50.4, 'GCC', 'ctgggagagaccg', '13 '], 64: [47.5, 'CCT', 'gtgtttgtgcctgt', '14 '], 115: [46.5, 'CCT', 'cacgagctgcc', '11'], 67: [48.5, 'GGG', 'agagaccggcg', '11 '], 109: [47.4, 'GGG', 'aatctccgcaaga', '13'], 136: [50.0, 'GGG', 'agcactaagcgagg', '14'], 70: [47.5, 'AGA', 'gtgtttgtgcctgt', '14 '], 73: [47.0, 'GAC', 'ctgtcctgggaga', '13 '], 76: [47.0, 'CGG', 'ctgtcctgggaga', '13 '], 79: [47.0, 'CGC', 'ctgtcctgggaga', '13 '], 100: [49.1, 'CGC', 'gaaaggggagcc', '12'], 82: [47.0, 'ACA', 'ctgtcctgggaga', '13 '], 85: [52.4, 'GAG', 'gaccggcgca', '10 '], 91: [52.4, 'GAG', 'gaccggcgca', '10 '], 112: [47.4, 'GAG', 'aatctccgcaaga', '13'], 124: [46.3, 'GAG', 'gggagcactaagc', '13'], 88: [52.4, 'GAA', 'gaccggcgca', '10 '], 94: [49.1, 'AAT', 'gaaaggggagcc', '12 '], 97: [49.1, 'CTC', 'gaaaggggagcc', '12 '], 103: [46.5, 'AAG', 

In [154]:
for i in range(len(final_result[0])):
    print(i, final_result[0][i])

0 codon
1 codon_position
2 pam_site
3 pam_position
4 non_extended_sequence
5 pre_corrected_sequence
6 extended_sequence
7 extended_seq_length
8 extended_beginning_position
9 extended_end_position
10 targeted_wt_codon
11 subbed_codon
12 length of addition
13 addition_sequence
14 altered_codon_seq
15 altered_codon_seq_with_extension
16 acceptable_swap_found
17 alters_splice_site
18 codon_to_swap
19 new_codon
20 codon_position
21 altered_pam_site_sequence


## Task 5) Filtering in conjunction with info identified regarding the best TM sequence to come up with the remaining 630 sequences, also including the RNA primers 

In [155]:
### generate library of 630 gRNAs
print(tp_53_best_temp_dict)
tp53_630 = []
counter = 0
codons_seen = {}
all_codons_seen = {}
tp53_630.append(final_result[0] + ["TM", "guide_rna"])
for row in final_result[1:]:
    codon = row[0]
    codon_position = row[1]
    optimal_temperature, optimal_primer_length, optimal_primer =  tp_53_best_temp_dict[codon_position][0],  tp_53_best_temp_dict[codon_position][1],  tp_53_best_temp_dict[codon_position][2]
    if row[2] in ["GG", "gg", "gG", "Gg"]:
        #print(sequence_pos[row[3]], sequence_pos[row[3] + 1], sequence_pos[row[3] + 2])
        #print(sequence_pos[row[3] - 20 : row[3]])
        guide_rna = sequence_pos[row[3] - 20 : row[3]]
        #print("GG", guide_rna, len(guide_rna))
        if row[13].lower() == optimal_primer.lower():
            if codon_position in all_codons_seen.keys():
                all_codons_seen.update({codon_position : all_codons_seen[codon_position] + 1})
            else:
                all_codons_seen.update({codon_position : 1})
            tp53_630.append(row + [optimal_temperature, guide_rna])
        else:
            if row[13] == "":
                print(codon, optimal_primer)
    elif row[2] in ["CC", "cc", "Cc", "cC"]:
#         print(sequence_pos[row[3] - 2 ], sequence_pos[row[3] - 1], sequence_pos[row[3]], sequence_pos[row[3] + 1], sequence_pos[row[3] + 2])
#         print(sequence_pos[row[3] + 1: row[3] + 21])
        guide_rna = sequence_pos[row[3] + 1: row[3] + 22]
        #print("CC", guide_rna, len(guide_rna))
        if row[13].lower() == optimal_primer.lower():
            if codon_position in all_codons_seen.keys():
                all_codons_seen.update({codon_position : all_codons_seen[codon_position] + 1})
            else:
                all_codons_seen.update({codon_position : 1})
            tp53_630.append(row + [optimal_temperature, reverse_complement(guide_rna[1:])])
        else:
            if row[13] == "":
                print(codon, optimal_primer)
            #print(row[13], optimal_primer)
        
print(all_codons_seen.keys())
print(len(tp53_630))
with open("nf2_1052_thur.csv", "w", newline="") as tp:
    writer = csv.writer(tp)
    writer.writerows(tp53_630)
    

    


{49: [49.9, 'CGT', 'ggacggaacagct', '13 '], 52: [49.9, 'GTT', 'ggacggaacagct', '13 '], 55: [50.4, 'TGT', 'ctgggagagaccg', '13 '], 61: [50.4, 'TGT', 'ctgggagagaccg', '13 '], 58: [50.4, 'GCC', 'ctgggagagaccg', '13 '], 64: [47.5, 'CCT', 'gtgtttgtgcctgt', '14 '], 115: [46.5, 'CCT', 'cacgagctgcc', '11'], 67: [48.5, 'GGG', 'agagaccggcg', '11 '], 109: [47.4, 'GGG', 'aatctccgcaaga', '13'], 136: [50.0, 'GGG', 'agcactaagcgagg', '14'], 70: [47.5, 'AGA', 'gtgtttgtgcctgt', '14 '], 73: [47.0, 'GAC', 'ctgtcctgggaga', '13 '], 76: [47.0, 'CGG', 'ctgtcctgggaga', '13 '], 79: [47.0, 'CGC', 'ctgtcctgggaga', '13 '], 100: [49.1, 'CGC', 'gaaaggggagcc', '12'], 82: [47.0, 'ACA', 'ctgtcctgggaga', '13 '], 85: [52.4, 'GAG', 'gaccggcgca', '10 '], 91: [52.4, 'GAG', 'gaccggcgca', '10 '], 112: [47.4, 'GAG', 'aatctccgcaaga', '13'], 124: [46.3, 'GAG', 'gggagcactaagc', '13'], 88: [52.4, 'GAA', 'gaccggcgca', '10 '], 94: [49.1, 'AAT', 'gaaaggggagcc', '12 '], 97: [49.1, 'CTC', 'gaaaggggagcc', '12 '], 103: [46.5, 'AAG', 'cac

In [156]:
counter = 0
for i in tp53_630[0]:
    print(counter, i)
    counter += 1
    
# for i in tp53_630:
#     if i[1] == 49:
#         print(i[21])

0 codon
1 codon_position
2 pam_site
3 pam_position
4 non_extended_sequence
5 pre_corrected_sequence
6 extended_sequence
7 extended_seq_length
8 extended_beginning_position
9 extended_end_position
10 targeted_wt_codon
11 subbed_codon
12 length of addition
13 addition_sequence
14 altered_codon_seq
15 altered_codon_seq_with_extension
16 acceptable_swap_found
17 alters_splice_site
18 codon_to_swap
19 new_codon
20 codon_position
21 altered_pam_site_sequence
22 TM
23 guide_rna


In [157]:
U6_primer = "CTTGTGGAAAGGACGAAACACCG"
gRNA_FE = "GTTTAAGAGCTATGCTGGAAACAGCATAGCAAGTTTAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGC"
seven_t = "TTTTTTT"
RV_primer_stub = "CCCGGGTGCAAAGATGGATAAAG"


new_file = [] 
new_file.append(["codon_wild_type", "codon_mutation", "guide_rna", "codon_postion", "pam_identifier",
                 "pam_position",  "PBS_10_to_15", "PBS_length", "cut_site_to_7nt_extension", "TM",  "pam/seed_swap_found", 
                 "alters_splice_site", "codon_to_swap", "new_codon","new_codon_position",
                 "U6", "gRNA_FE", "seven_t", "rV_Primer_stub" ])
for row in tp53_630[1:]:
    new_file.append([row[0], row[11], row[23], row[1], row[2], 
                     row[3], row[13], row[12], row[21], row[22], row[16], 
                     row[17], row[18], row[19], row[20], 
                     U6_primer, gRNA_FE, seven_t, RV_primer_stub])

    
print(new_file[:10])
with open(output_file, "w", newline="") as tp2:
    writer = csv.writer(tp2)
    writer.writerows(new_file)


[['codon_wild_type', 'codon_mutation', 'guide_rna', 'codon_postion', 'pam_identifier', 'pam_position', 'PBS_10_to_15', 'PBS_length', 'cut_site_to_7nt_extension', 'TM', 'pam/seed_swap_found', 'alters_splice_site', 'codon_to_swap', 'new_codon', 'new_codon_position', 'U6', 'gRNA_FE', 'seven_t', 'rV_Primer_stub'], ['CGT', 'ATC', 'actgggacggaacagctttg', 49, 'gg', 44, 'ggacggaacagct', 13, 'ttGAAgtgATCGTTTGTGC', 49.9, 'True', 'True', 'gag', 'GAA', 43, 'CTTGTGGAAAGGACGAAACACCG', 'GTTTAAGAGCTATGCTGGAAACAGCATAGCAAGTTTAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGC', 'TTTTTTT', 'CCCGGGTGCAAAGATGGATAAAG'], ['CGT', 'ATG', 'actgggacggaacagctttg', 49, 'gg', 44, 'ggacggaacagct', 13, 'ttGAAgtgATGGTTTGTGC', 49.9, 'True', 'True', 'gag', 'GAA', 43, 'CTTGTGGAAAGGACGAAACACCG', 'GTTTAAGAGCTATGCTGGAAACAGCATAGCAAGTTTAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGC', 'TTTTTTT', 'CCCGGGTGCAAAGATGGATAAAG'], ['CGT', 'ACC', 'actgggacggaacagctttg', 49, 'gg', 44, 'ggacggaacagct', 13, 'ttGAAgtgACCGTTTGTGC', 49.9,