In [1]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp
from Bio.SeqUtils import gc_fraction
import pandas as pd
import re
import sys

In [2]:
def load_codon_usage_table(file_name) :

    # Read the Excel file into a DataFrame
    df = pd.read_excel(file_name)

    # Create a dictionary from the Excel columns
    amino_acids = df["Amino acid"].tolist()
    codons_1 = df.groupby("Amino acid")["Codon 1"].apply(list).to_dict()
    codons_2 = df.groupby("Amino acid")["Codon 2"].apply(list).to_dict()
    
    return amino_acids, codons_1, codons_2

In [3]:
def load_plasmid(file_name) :
    
    with open(file_name, 'r') as plasmid_file:
        plasmid_lines = plasmid_file.readlines()

    plasmid_seq = Seq(plasmid_lines[0].strip())                     #Return
    start_gene_index = int(plasmid_lines[1]) - 1                    #Return
    end_gene_index = int(plasmid_lines[2]) - 1
    gene_seq = plasmid_seq[start_gene_index:(end_gene_index + 1)]   #Return
    protein_seq = gene_seq.translate()                              #Return
    
    return plasmid_seq, start_gene_index, gene_seq, protein_seq

In [4]:
def load_mutations(file_name) :

    # Create an empty list to store the values
    mutations_list = []

    # Count the number of mutations
    mutations_count = 0

    # Open the file for reading
    with open(file_name, 'r') as mutations_file:
        # Read each line and append it to the list
        for line in mutations_file:
            mutations_list.append(line.strip())  # strip() removes any leading/trailing whitespace    #Return
            mutations_count += 1                                                                      #Return
    
    return mutations_list, mutations_count

In [5]:
# Identify and separate components in mutations
def separate_mutation(mutation) :
    
    print('Mutation now is ', mutation)

    # Define a regular expression pattern to match a letter followed by a number and then another letter
    pattern = r'([A-Za-z])(\d{1,4})([A-Za-z])'

    # Use the search function to find the match in the input string
    match = re.search(pattern, mutation)

    # Check if a match was found
    if match:
        # Extract the individual components into variables
        original_AA = match.group(1)
        position = match.group(2)
        new_AA = match.group(3)
    else:
        print("Could not find correct mutation pattern.")
        sys.exit()

    return original_AA, position, new_AA

def separate_all_mutations(mutations_list) :
    
    mutations_separated = [[],[],[]]

    for mutation in mutations_list :
        separated_mutation = separate_mutation(mutation)
        mutations_separated[0].append(separated_mutation[0])      #Return
        mutations_separated[1].append(int(separated_mutation[1])) #Return
        mutations_separated[2].append(separated_mutation[2])      #Return
        
    return mutations_separated[0], mutations_separated[1], mutations_separated[2]

In [6]:
# Find position of mutation in the gene:

def make_mutations(mutations_count, mutation_position_AAs, new_AAs, gene_seq, codons_1, start_gene_index, plasmid_seq) :

    original_codons = []
    new_codons = []
    codons_start_index_plasmid = []
    new_plasmid_seqs = []

    for mutation in range(mutations_count) :
        codon_start_gene = ((mutation_position_AAs[mutation] - 1) * 3)
        codon_end_gene = codon_start_gene + 3
        original_codon = gene_seq[codon_start_gene:codon_end_gene]
        print('Original codon is ', original_codon)
        original_codons.append(original_codon)                     #Return
        new_codon = Seq((str(codons_1[new_AAs[mutation]])).strip("[]'"))
        print('New codon is ', new_codon)
        new_codons.append(new_codon)                               #Return
        codon_start_plasmid = start_gene_index + codon_start_gene
        codons_start_index_plasmid.append(codon_start_plasmid)     #Return
        codon_end_plasmid = start_gene_index + codon_end_gene
        new_plasmid_seq = plasmid_seq[:codon_start_plasmid] + new_codon + plasmid_seq[codon_end_plasmid:]
        new_plasmid_seqs.append(new_plasmid_seq)                   #Return
    
    return original_codons, new_codons, new_plasmid_seqs, codons_start_index_plasmid

In [7]:
def find_mutation_positions(original_codon, new_codon, codon_start_plasmid) :
    found_first_position = False
    found_last_position = False
    for position in range(0,3) :
        print('Base in original codon is ', original_codon[position])
        print('Base in new codon is ', new_codon[position])
        if original_codon[position] != new_codon[position] and found_first_position == False :
            first_mutation_position = codon_start_plasmid + position
            found_first_position = True
            print('First base mutation found at position ', first_mutation_position)
            continue
        if original_codon[position] != new_codon[position] and found_first_position == True :
            last_mutation_position = codon_start_plasmid + position
            found_last_position = True
            print('Last base mutation found at position ', last_mutation_position)
    if found_last_position == False :
        last_mutation_position = first_mutation_position
        print('Last base mutation not found, so first base mutation will be used, which is in index ', last_mutation_position)
        
    return first_mutation_position, last_mutation_position

In [8]:
def find_overlap_flanking_position_left(new_plasmid_seq, start_search_position_left) :
    current_position = start_search_position_left - 1
    base_at_current_position = new_plasmid_seq[current_position]
    print('Starting to look at position ', current_position, 'with base ', base_at_current_position)
    
    while base_at_current_position not in ['T', 't'] :
        current_position -= 1                          #Return
        if current_position == -1 :
            print('\nError: T could not be found at the left side of the mutations.')
            sys.exit()
        base_at_current_position = new_plasmid_seq[current_position]
        print('Current index is ', current_position, ' whith base ', base_at_current_position)
    
    print('\nT flanking on the left side of the mutations was found at index ', current_position)
        
    return current_position


def find_overlap_flanking_position_right(new_plasmid_seq, start_search_position_right) :
    current_position = start_search_position_right + 1
    base_at_current_position = new_plasmid_seq[current_position]
    last_plasmid_index = len(new_plasmid_seq) - 1
    print('Starting to look at position ', current_position, 'with base ', base_at_current_position)
    
    while base_at_current_position not in ['A', 'a'] :
        current_position += 1                          #Return
        if current_position == last_plasmid_index :
            print('\nError: A could not be found at the right side of the mutations.')
            sys.exit()
        base_at_current_position = new_plasmid_seq[current_position]
        print('Current index is ', current_position, ' whith base ', base_at_current_position)
    
    print('\nA flanking on the right side of the mutations was found at index ', current_position)
        
    return current_position
    

def find_overlap_flanking_positions(start_search_position_left, start_search_position_right, new_plasmid_seq) :
    
    overlap_flanking_position_left = find_overlap_flanking_position_left(new_plasmid_seq, start_search_position_left)    #Return
    overlap_flanking_position_right = find_overlap_flanking_position_right(new_plasmid_seq, start_search_position_right) #Return
    
    return overlap_flanking_position_left, overlap_flanking_position_right

In [9]:
def find_overlap_end_position_left(overlap_flanking_position_left, plasmid) :
    search_range_end_overlap = range((overlap_flanking_position_left - 5), (overlap_flanking_position_left - 10), -1)
    overlap_end_condition = ['A', 'a']
    
    print('\nCurrently looking for the A to make an overlap flanking the left side of the mutation.')
    print('The index range to look for the last overlap base is', search_range_end_overlap)

    for position in search_range_end_overlap:
        print('Current base is', plasmid[position])
        if plasmid[position] in overlap_end_condition:
            overlap_end_position_left = position                        #Return
            print('Found end position at', overlap_end_position_left)
            break
    else:
        return False

    return overlap_end_position_left

    
def find_overlap_end_position_right(overlap_flanking_position_right, plasmid) :
    search_range_end_overlap = range((overlap_flanking_position_right + 5), (overlap_flanking_position_right + 10))
    overlap_end_condition = ['T', 't']
    
    print('\nCurrently looking for the T to make an overlap flanking the left side of the mutation.')
    print('The index range to look for the last overlap base is', search_range_end_overlap)

    for position in search_range_end_overlap:
        print('Current base is', plasmid[position])
        if plasmid[position] in overlap_end_condition:
            overlap_end_position_right = position                        #Return
            print('Found end position at', overlap_end_position_right)
            break
    else:
        return False

    return overlap_end_position_right
    

def find_overlap_end_positions(plasmid, overlap_flanking_position_left, overlap_flanking_position_right) :
    
    overlap_end_position_left = find_overlap_end_position_left(overlap_flanking_position_left, plasmid)     #Return
                                                                                                                                                                              
    overlap_end_position_right = find_overlap_end_position_right(overlap_flanking_position_right, plasmid)  #Return
                                                                                                                                                                                  
    return overlap_end_position_left, overlap_end_position_right

In [10]:
def make_overlap_seq(plasmid, overlap_flanking_position_left, overlap_end_position_left, 
                     overlap_flanking_position_right, overlap_end_position_right) :

    overlap_left_fw = plasmid[overlap_end_position_left:(overlap_flanking_position_left + 1)]
    overlap_left_rv = overlap_left_fw.reverse_complement()
    overlap_right_fw = plasmid[overlap_flanking_position_right:(overlap_end_position_right + 1)]
    overlap_right_rv = overlap_right_fw.reverse_complement()

    print('Left forward overlapping sequence is: ', overlap_left_fw)
    print('Left reverse overlapping sequence is: ', overlap_left_rv)
    print('Right forward overlapping sequence is: ', overlap_right_fw)
    print('Right reverse overlapping sequence is: ', overlap_right_rv)
    
    return overlap_left_fw, overlap_left_rv, overlap_right_fw, overlap_right_rv

In [11]:
def make_single_non_overlap(plasmid, start_position, non_overlap_search_range, primer_direction, t_anneal):
    print('\nCurrently calculating the non-overlapping region for the primer.')

    start_position = start_position + primer_direction
    print('\nStart search position is: ', start_position, 'with base', plasmid[start_position])

    for non_overlap_length in non_overlap_search_range:
        current_position = start_position + non_overlap_length
        print('\nCurrent position is: ', current_position)
        print('Base in this position is: ', plasmid[current_position])

        if current_position > start_position:
            seq = plasmid[start_position:(current_position + 1)]
        else:
            seq = plasmid[current_position:(start_position + 1)]

        tm = MeltingTemp.Tm_NN(seq)

        if (tm >= t_anneal and seq[-1] in ['C', 'c', 'G', 'g']) or (tm >= t_anneal and seq[0] in ['C', 'c', 'G', 'g']):
            break

    else:
        print('\nError: could not resolve non-overlapping Tm for the primer.')
        sys.exit()
        
    if tm > (t_anneal + 5) :
        print('\nError: Oh, no! The Tm for one of the non-overlapping regions is too high! **cry**')
        sys.exit()
        
    if len(seq) < 5 :
        print('\nError: Hm... The length of this non-overlapping region is too short. **sad**')
        sys.exit()

    print('\nThe non-overlapping sequence of the primer of this insert is', seq)
    print('and the Tm is', tm)

    return seq, tm

In [12]:
def make_all_non_overlaps(plasmid, last_mutation_position, overlap_end_position_left, first_mutation_position, 
                          overlap_end_position_right, t_anneal) :
    
    non_overlap_search_range_fwd = range(10, 60)
    non_overlap_search_range_rv = range(-10, -60, -1)
    
    non_overlap_left_fw = make_single_non_overlap(plasmid, last_mutation_position, non_overlap_search_range_fwd, 1, t_anneal)
    non_overlap_left_fw_seq = non_overlap_left_fw[0]
    tm_left_fw = non_overlap_left_fw[1]
    
    non_overlap_left_rv = make_single_non_overlap(plasmid, overlap_end_position_left, non_overlap_search_range_rv, -1, t_anneal)
    non_overlap_left_rv_seq = non_overlap_left_rv[0].reverse_complement()
    tm_left_rv = non_overlap_left_rv[1]
    
    non_overlap_right_fw = make_single_non_overlap(plasmid, overlap_end_position_right, non_overlap_search_range_fwd, 1, t_anneal)
    non_overlap_right_fw_seq = non_overlap_right_fw[0]
    tm_right_fw = non_overlap_right_fw[1]
    
    non_overlap_right_rv = make_single_non_overlap(plasmid, first_mutation_position, non_overlap_search_range_rv, -1, t_anneal)
    non_overlap_right_rv_seq = non_overlap_right_rv[0].reverse_complement()
    tm_right_rv = non_overlap_right_rv[1]
       
    return non_overlap_left_fw_seq, tm_left_fw, non_overlap_left_rv_seq, tm_left_rv, non_overlap_right_fw_seq, tm_right_fw, non_overlap_right_rv_seq, tm_right_rv

In [13]:
def make_inter_sequence(plasmid, overlap_flanking_position_left, last_mutation_position, first_mutation_position, 
                        overlap_flanking_position_right) :

    inter_seq_left_fw = plasmid[(overlap_flanking_position_left + 1):(last_mutation_position + 1)]
    inter_seq_right_rv = plasmid[first_mutation_position:overlap_flanking_position_right].reverse_complement()
    
    print('Left forward primer inter-sequence is ', inter_seq_left_fw)
    print('Right reverse primer inter-sequence is ', inter_seq_right_rv)

    return inter_seq_left_fw, inter_seq_right_rv

In [14]:
def fuse(overlap_left_fw, overlap_left_rv, overlap_right_fw, overlap_right_rv, non_overlap_left_fw, non_overlap_left_rv, 
         non_overlap_right_fw, non_overlap_right_rv, inter_seq_left_fw, inter_seq_right_rv):
        
    primer_left_fw = overlap_left_fw[:-1] + 'U' + inter_seq_left_fw + non_overlap_left_fw
    print('The final left forward primer is ', primer_left_fw)
    primer_left_rv = overlap_left_rv[:-1] + 'U' + non_overlap_left_rv
    print('The final left reverse primer is ', primer_left_rv)
    primer_right_fw = overlap_right_fw[:-1] + 'U' + non_overlap_right_fw
    print('The final right forward primer is ', primer_right_fw)
    primer_right_rv = overlap_right_rv[:-1] + 'U' + inter_seq_right_rv + non_overlap_right_rv
    print('The final right reverse primer is ', primer_right_rv)
    
    return primer_left_fw, primer_left_rv, primer_right_fw, primer_right_rv

In [15]:
def check_gc_content(primer_sequence) :
    
    gc_content = gc_fraction(primer_sequence) * 100  # as a percentage
    
    return gc_content


def check_gc_content_all_primers(primer_left_fw, primer_left_rv, primer_right_fw, primer_right_rv) :
    
    gc_content_primer_left_fw = check_gc_content(primer_left_fw)
    gc_content_primer_left_rv = check_gc_content(primer_left_rv)
    gc_content_primer_right_fw = check_gc_content(primer_right_fw)
    gc_content_primer_right_rv = check_gc_content(primer_right_rv)

    print('\nGC content of primer_left_fw is ', gc_content_primer_left_fw)
    print('\nGC content of primer_left_rv is ', gc_content_primer_left_rv)
    print('\nGC content of primer_right_fw is ', gc_content_primer_right_fw)
    print('\nGC content of primer_right_rv is ', gc_content_primer_right_rv)
    
    return gc_content_primer_left_fw, gc_content_primer_left_rv, gc_content_primer_right_fw, gc_content_primer_right_rv 

In [16]:
def check_primer(primer, gc_content,) :
    primer_unusable = False
    
    if gc_content < 30 or gc_content > 70 :
        primer_unusable = True
        print('\nThe GC content of at least one primer in this pair is at least 20% off the ideal 50% content, so this primer pair will be considered unusable.')
         
    if len(primer) < 15 or len(primer) > 60 :
        primer_unusable = True
        print('\nThe length of at least one primer in this pair is too short (<15 bp) or too long (>60 bp), so this primer pair will be considered unusable.')
        
    return primer_unusable

def check_tm_pair(tm_fw, tm_rv, t_anneal) :
    primers_unusable = False

    if abs(tm_fw - tm_rv) > 5 :
        print('\nThe Tm difference between primer pair is too big (>5), so this primer pair will be considered unusable.')
        primers_unusable = True

    if abs(tm_fw - t_anneal) > 5 or abs(tm_rv - t_anneal) > 5 :
        print('\nThe Tm of at least one primer in this pair is too far from the annealing temperature, so this primer pair will be considered unusable.')
        primers_unusable = True
    
    return primers_unusable

def calculate_homopolymeric_penalty(primer) :
    count = 0
    sum = 0
    length = len(primer)
    
    for n in range(1, length) :
        if primer[n - 1] == primer[n] :
            count += 1
        else :
            if count >= 4 :
                penalty = count - 4
                sum += penalty
        count = 0

    score = sum * 10

    print('Homopolymeric penalty for this primer is ', score)
    
    return score

def score_primers(t_anneal, primer_left_fw, primer_left_rv, primer_right_fw, primer_right_rv, tm_left_fw, tm_left_rv, tm_right_fw, 
                  tm_right_rv, gc_content_primer_left_fw, gc_content_primer_left_rv, gc_content_primer_right_fw, gc_content_primer_right_rv) :

    primers_unusable_left, primers_unusable_right = False, False
    
    primer_left_fw_unusable = check_primer(primer_left_fw, gc_content_primer_left_fw)
    primer_left_rv_unusable = check_primer(primer_left_rv, gc_content_primer_left_rv)
    primer_right_fw_unusable = check_primer(primer_right_fw, gc_content_primer_right_fw)
    primer_right_rv_unusable = check_primer(primer_right_rv, gc_content_primer_right_rv)

    if (primer_left_fw_unusable == True) or (primer_left_rv_unusable == True) :
        primers_unusable_left = True
    else :
        primer_pair_left_unusable = check_tm_pair(tm_left_fw, tm_left_rv, t_anneal)
        if primer_pair_left_unusable == True :
            primers_unusable_left = True

    if (primer_right_fw_unusable == True) or (primer_right_rv_unusable == True) :
        primers_unusable_right = True
    else :
        primer_pair_right_unusable = check_tm_pair(tm_right_fw, tm_right_rv, t_anneal)
        if primer_pair_right_unusable == True :
            primers_unusable_right = True
    
    if (primers_unusable_left == False) and (primers_unusable_right == True) :
        primer_fw = primer_left_fw
        primer_rv = primer_left_rv
        tm_fw = tm_left_fw
        tm_rv = tm_left_rv
    elif (primers_unusable_right == False) and (primers_unusable_left == True) :
        primer_fw = primer_right_fw
        primer_rv = primer_right_rv
        tm_fw = tm_right_fw
        tm_rv = tm_right_rv
    elif (primers_unusable_right == True) and (primers_unusable_left == True) :
        primer_fw = str('Fail')
        primer_rv = str('Fail')
        print('Error: All designed primers for this mutant violated at least one critical rule for primer design. Hence, no primer will be proposed.')
    else:
        score_length_left = len(primer_left_fw) + len(primer_left_rv)
        score_length_right = len(primer_right_fw) + len(primer_right_rv)
        score_gc_content_left = abs(gc_content_primer_left_fw - 50) + abs(gc_content_primer_left_rv - 50)
        score_gc_content_right = abs(gc_content_primer_right_fw - 50) + abs(gc_content_primer_right_rv - 50)
        score_homopolymeric_penalty_left = calculate_homopolymeric_penalty(primer_left_fw) + calculate_homopolymeric_penalty(primer_left_rv)
        score_homopolymeric_penalty_right = calculate_homopolymeric_penalty(primer_right_fw) + calculate_homopolymeric_penalty(primer_right_rv)
        
        total_score_left = score_length_left + score_gc_content_left + score_homopolymeric_penalty_left
        total_score_right = score_length_right + score_gc_content_right + score_homopolymeric_penalty_right

        print('Total score left is ', total_score_left)
        print('Total score right is ', total_score_right)
    
        if total_score_left > total_score_right :
            primer_fw = primer_right_fw
            primer_rv = primer_right_rv
            tm_fw = tm_right_fw
            tm_rv = tm_right_rv
        else :
            primer_fw = primer_left_fw
            primer_rv = primer_left_rv
            tm_fw = tm_left_fw
            tm_rv = tm_left_rv

    return primer_fw, tm_fw, primer_rv, tm_rv

In [17]:
def design_primer(original_codon, new_codon, codon_start_plasmid, mutation, plasmid) :

    t_anneal = 56
    
    print('\nCurrently determining the position of the first and last base change in the mutation ', mutation)
    first_mutation_position, last_mutation_position = find_mutation_positions(original_codon, new_codon, codon_start_plasmid)
    
    start_search_position_left, start_search_position_right = first_mutation_position, last_mutation_position
    
    overlap_flanking_position_left, overlap_flanking_position_right = False, False
    overlap_end_position_left, overlap_end_position_right = False, False
    
    while (overlap_end_position_left == False) or (overlap_end_position_right == False) :
        if (overlap_end_position_left == False) and (overlap_flanking_position_left != False):
            start_search_position_left = overlap_flanking_position_left
        if (overlap_end_position_right == False) and (overlap_flanking_position_right != False):
            start_search_position_right = overlap_flanking_position_right
            
        print('\nCurrently looking for the first base to make an overlap flanking the mutation ', mutation)
        overlap_flanking_position_left, overlap_flanking_position_right = find_overlap_flanking_positions(start_search_position_left,
                                                                                                                                                           start_search_position_right,
                                                                                                                                                           plasmid)
    
        print('\nCurrently looking for the last base to make an overlap flanking the mutation ', mutation)
        overlap_end_position_left, overlap_end_position_right = find_overlap_end_positions(plasmid, 
                                                                                           overlap_flanking_position_left, 
                                                                                           overlap_flanking_position_right)
    
    print('\nMaking the overlapping sequence for the mutation ', mutation)
    overlap_left_fw, overlap_left_rv, overlap_right_fw, overlap_right_rv = make_overlap_seq(plasmid, 
                                                                                            overlap_flanking_position_left, 
                                                                                            overlap_end_position_left, 
                                                                                            overlap_flanking_position_right,
                                                                                            overlap_end_position_right)
    
    print('\nMaking the non-overlapping sequence for the mutation ', mutation)
    non_overlap_left_fw, tm_left_fw, non_overlap_left_rv, tm_left_rv, non_overlap_right_fw, tm_right_fw, non_overlap_right_rv, tm_right_rv = make_all_non_overlaps(plasmid, 
                                                                                                                                                                   last_mutation_position,
                                                                                                                                                                   overlap_end_position_left,
                                                                                                                                                                   first_mutation_position,
                                                                                                                                                                   overlap_end_position_right,
                                                                                                                                                                   t_anneal)
    print('\nMaking the inter-sequence for the mutation ', mutation)
    inter_seq_left_fw, inter_seq_right_rv = make_inter_sequence(plasmid, overlap_flanking_position_left,
                                                                last_mutation_position, first_mutation_position,
                                                                overlap_flanking_position_right)
    
    print('\nReplacing T with U in the overlapping region and combining with the non-overlapping region to finalize the primers for the variant ', mutation)
    primer_left_fw, primer_left_rv, primer_right_fw, primer_right_rv = fuse(overlap_left_fw, overlap_left_rv, 
                                                                            overlap_right_fw, overlap_right_rv, 
                                                                            non_overlap_left_fw, non_overlap_left_rv,
                                                                            non_overlap_right_fw, non_overlap_right_rv,
                                                                            inter_seq_left_fw, inter_seq_right_rv)

    print('\nCalculating GC content of designed primers.')
    
    gc_content_primer_left_fw, gc_content_primer_left_rv, gc_content_primer_right_fw, gc_content_primer_right_rv = check_gc_content_all_primers(primer_left_fw, 
                                                                                                                                                primer_left_rv, 
                                                                                                                                                primer_right_fw, 
                                                                                                                                                primer_right_rv)

    primer_fw, tm_fw, primer_rv, tm_rv = score_primers(t_anneal, 
                                         primer_left_fw, primer_left_rv, primer_right_fw, primer_right_rv, 
                                         tm_left_fw, tm_left_rv, tm_right_fw, tm_right_rv, 
                                         gc_content_primer_left_fw, gc_content_primer_left_rv, gc_content_primer_right_fw, gc_content_primer_right_rv)
    
    return primer_fw, tm_fw, primer_rv, tm_rv
        

def bulk_primer_design(mutations_count, original_codons, new_codons, codons_start_index_plasmid, mutations_list,
                       new_plasmid_seqs) :
    
    primers_list = []
    
    for plasmid in range(mutations_count) :
        primer_fw, tm_fw, primer_rv, tm_rv = design_primer(original_codons[plasmid], new_codons[plasmid],
                                                           codons_start_index_plasmid[plasmid], mutations_list[plasmid],
                                                           new_plasmid_seqs[plasmid])
        
        primers_list.append([primer_fw, tm_fw, primer_rv, tm_rv])  #Return
        
    return primers_list

In [18]:
def transpose_primers(primers_list, mutations_count) :
    transposed_primers_list = [[primers_list[j][i] for j in range(mutations_count)] for i in range(len(primers_list[0]))]
    
    return transposed_primers_list

In [19]:
def export_primers(transposed_primers_list, mutations_list, mutations_count) :
    
    final_primers_seq_list = []
    final_primers_names_list = []
    final_primers_number_list = []
    final_primers_tm_list = []

    for number in range(mutations_count):
        final_primers_seq_list.append(transposed_primers_list[0][number])
        final_primers_seq_list.append(transposed_primers_list[2][number])

        final_primers_names_list.append(mutations_list[number] + ' fw')
        final_primers_names_list.append(mutations_list[number] + ' rv')

        final_primers_tm_list.append(transposed_primers_list[1][number])
        final_primers_tm_list.append(transposed_primers_list[3][number])

        for n in range(1, 3):
            final_primers_number_list.append(n)
            
    df = pd.DataFrame({'Name': final_primers_names_list, 'Number': final_primers_number_list, 'Short name': None, 'Sequence': final_primers_seq_list, 'Tm': final_primers_tm_list})
    df.to_excel("primers_list.xlsx", index=False)

In [20]:
def main() :
    amino_acids, codons_1, codons_2 = load_codon_usage_table('codon_usage_frequency_escherichia_coli_b.xlsx')
    plasmid_seq, start_gene_index, gene_seq, protein_seq = load_plasmid('plasmid.txt')
    mutations_list, mutations_count = load_mutations('mutations.txt')
    
    original_AAs, mutation_position_AAs, new_AAs = separate_all_mutations(mutations_list)
    
    original_codons, new_codons, new_plasmid_seqs, codons_start_index_plasmid = make_mutations(mutations_count, 
                                                                                               mutation_position_AAs, 
                                                                                               new_AAs, gene_seq, codons_1,
                                                                                               start_gene_index, plasmid_seq)
    
    primers_list = bulk_primer_design(mutations_count, original_codons, new_codons, codons_start_index_plasmid,
                                      mutations_list, new_plasmid_seqs)
                                      
    transposed_primers_list = transpose_primers(primers_list, mutations_count)
    
    export_primers(transposed_primers_list, mutations_list, mutations_count)

In [21]:
if __name__ == "__main__":
    main()

Mutation now is  S10A
Mutation now is  R13K
Mutation now is  A15H
Original codon is  TCG
New codon is  GCG
Original codon is  CGG
New codon is  AAA
Original codon is  GCT
New codon is  CAT

Currently determining the position of the first and last base change in the mutation  S10A
Base in original codon is  T
Base in new codon is  G
First base mutation found at position  231
Base in original codon is  C
Base in new codon is  C
Base in original codon is  G
Base in new codon is  G
Last base mutation not found, so first base mutation will be used, which is in index  231

Currently looking for the first base to make an overlap flanking the mutation  S10A
Starting to look at position  230 with base  C
Current index is  229  whith base  C
Current index is  228  whith base  G
Current index is  227  whith base  T

T flanking on the left side of the mutations was found at index  227
Starting to look at position  232 with base  C
Current index is  233  whith base  G
Current index is  234  whith b