# Load libraries

In [1]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp
import pandas as pd

# Assign Tm calculation parameters

In [2]:
# Define custom concentrations (replace these values with your specific conditions)
dna_conc = 50    # DNA concentration in nanomolar
dntp_conc = 0.8  # dNTP concentration in millimolar
mg_conc = 1.5    # Mg2+ concentration in millimolar
na_conc = 20     # Na+ concentration in millimolar
k_conc = 0.0     # K+ concentration in millimolar

# Load data from files and create plasmids

In [3]:
# Open the file for reading
with open('template.txt', 'r') as template_file:
    # Read each line
    template_lines = template_file.readlines()
print('Template file was loaded.')

template_seq = Seq(template_lines[0].strip())
print('Template sequence was loaded.')

start_insert_index = int(template_lines[1]) - 1
end_insert_index = int(template_lines[2]) - 1
print('Start and end positions were read.')

empty_plasmid = template_seq[:start_insert_index] + template_seq[(end_insert_index + 1):]
print('Empty plasmid was generated.')
print('Empty plasmid is: ', empty_plasmid)
print('Notice: This empty plasmid sequence is merely for troubleshooting and is not used further.')

print('Plasmids are being built.')

plasmid_ids = []
plasmids = []
insert_lengths = []
number_seqs = 0

# Reading from a FASTA file
for sequence in SeqIO.parse("inserts.fasta", "fasta"):
    insert_lengths.append(len(sequence.seq))
    plasmid_ids.append(sequence.id)
    construct = template_seq[:start_insert_index] + sequence.seq + template_seq[(end_insert_index + 1):]
    print('Sequence for plasmid ID ', plasmid_ids[number_seqs], 'is ', construct)
    plasmids.append(construct)
    number_seqs += 1
    
print('Insert IDs and sequences of ', number_seqs, 'entries were fetched from file and the corresponding plasmids were built.')
print('Simultaneously, insert lengths were recorded.')

Template file was loaded.
Template sequence was loaded.
Start and end positions were read.
Empty plasmid was generated.
Empty plasmid is:  gcatctgtgcggtatttcacaccgcaatggtgcactctcagtacaatctgctctgatgccgcatagttaagccagtatacactccgctatcgctacgtgactgggtcatggctgcgccccgacacccgccaacacccgctgacgcgccctgacgggcttgtctgctcccggcatccgcttacagacaagctgtgaccgtctccgggagctgcatgtgtcagaggttttcaccgtcatcaccgaaacgcgcgaggcagctgcggtaaagctcatcagcgtggtcgtgaagcgattcacagatgtctgcctgttcatccgcgtccagctcgttgagtttctccagaagcgttaatgtctggcttctgataaagcgggccatgttaagggcggttttttcctgtttggtcactgatgcctccgtgtaagggggatttctgttcatgggggtaatgataccgatgaaacgagagaggatgctcacgatacgggttactgatgatgaacatgcccggttactggaacgttgtgagggtaaacaactggcggtatggatgcggcgggaccagagaaaaatcactcagggtcaatgccagcgcttcgttaatacagatgtaggtgttccacagggtagccagcagcatcctgcgatgcagatccggaacataatggtgcagggcgctgacttccgcgtttccagactttacgaaacacggaaaccgaagaccattcatgttgttgctcaggtcgcagacgttttgcagcagcagtcgcttcacgttcgctcgcgtatcggtgattcattctgctaaccagtaaggcaaccccgccagcctagccgggtcctcaacgacaggagcacga

# Define essential functions for primer building

In [4]:
def find_overlap_flanking_position(start_insert_index,plasmid,id,overlap_flanking_condition,overlap_end_search_direction) :
    
    print('\n\nCurrently looking for the correct base flanking the insert ID ', id)
    
    print('overlap direction is ', overlap_end_search_direction)
    print('start looking at position ', start_insert_index)
    print('and base is ', plasmid[start_insert_index])
    
    search_range_start_overlap = [overlap_end_search_direction, 2*overlap_end_search_direction, 0]
    
    for position_change in search_range_start_overlap :
        position = start_insert_index + position_change
        print('current base is ',plasmid[position])
        if plasmid[position] in overlap_flanking_condition :
            overlap_flanking_position = position #In this iteration, stores the position of T (start position)
            print('Found start position at ', overlap_flanking_position)
            break
        if position_change == search_range_start_overlap[-1] :
            print('\nError: could not find the correct base flanking the insert ID ', id)
            overlap_flanking_position = None
    
    return overlap_flanking_position

    
def find_overlap_end_position(overlap_flanking_position,plasmid,id,overlap_end_condition,overlap_end_search_direction) :
    
    print('\nCurrently looking for the correct base to make an overlap flanking the insert ID ', id)
    
    print('overlap direction is ', overlap_end_search_direction)
    print('start looking at position ', overlap_flanking_position)
    print('and base is ', plasmid[overlap_flanking_position])
    
    search_range_end_overlap = range((overlap_flanking_position+(5*overlap_end_search_direction)),
                      (overlap_flanking_position+(10*overlap_end_search_direction)),overlap_end_search_direction)
    
    print('The range to look for the last overlap base is ', search_range_end_overlap)
    
    for position in search_range_end_overlap :
        print('current base is ',plasmid[position])
        if plasmid[position] in overlap_end_condition :
            overlap_end_position = position #In this iteration, stores the position of A (end position)
            print('Found end position at ', overlap_end_position)
            break
        if position == search_range_end_overlap[-1] :
            print('\nError: could not find the correct base to make an overlap flanking the insert ID ', id)
            overlap_end_position = None
            
    return overlap_end_position


def calculate_non_overlap(plasmid,last_overlap_position,id,non_overlap_search_range,primer_direction) :
    
    print('\nCurrently calculating the length of the non-overlapping region for the primer of the insert ID ', id)
    
    start_position = last_overlap_position + primer_direction
    
    print('\nStart search position is: ', start_position, ' with base ', plasmid[start_position])
    
    for non_overlap_length in non_overlap_search_range :
        current_position = start_position + non_overlap_length
        print('\nCurrent position is: ', current_position)
        print('Base in this position is: ', plasmid[current_position])
        if current_position > start_position :
            seq = plasmid[start_position:(current_position + 1)]
            tm = MeltingTemp.Tm_NN(seq)
            if tm >= 56 and seq[-1] in ['C','c','G','g'] :
                break
        else :
            seq = plasmid[current_position:(start_position + 1)]
            tm = MeltingTemp.Tm_NN(seq)
            if tm >= 56 and seq[0] in ['C','c','G','g'] :
                break
        print('Sequence now is: ', seq)
        print('Tm now is: ', tm)
        
        if non_overlap_length == non_overlap_search_range[-1] :
            print('\nError: could not resolve non-overlapping Tm for the primer of the insert ID ', id)
            seq = None
            tm = None
    
    print('\nThe non-overlapping sequence of the primer of this insert is ', seq)
    
    return seq, tm

# Define main primer builder function

In [5]:
def primer_calculator(id,plasmid,start_insert_index,overlap_end_search_direction) :

    
    if overlap_end_search_direction == -1 :
        overlap_flanking_condition = ['T','t']
        overlap_end_condition = ['A','a']
        
    elif overlap_end_search_direction == 1 :
        overlap_flanking_condition = ['A','a']
        overlap_end_condition = ['T','t']
        
    else :
        overlap_flanking_condition = None
        overlap_end_condition = None
        print('Wrong value for overlap end search direction.')
        
    
    overlap_flanking_position = find_overlap_flanking_position(start_insert_index,plasmid,id,
                                                               overlap_flanking_condition,overlap_end_search_direction)
    
    
    overlap_end_position = find_overlap_end_position(overlap_flanking_position,plasmid,id,
                                                     overlap_end_condition,overlap_end_search_direction)
    
    
    print('\nMaking the overlapping sequence for the insert ID ', id)
    
    if overlap_end_search_direction == -1 :
        overlap_seq = plasmid[overlap_end_position:(overlap_flanking_position + 1)]
        
    if overlap_end_search_direction == 1 :
        overlap_seq = plasmid[overlap_flanking_position:(overlap_end_position + 1)]    
        
    overlap_seq_rv = overlap_seq.reverse_complement()
        
    print('Overlapping sequence is: ', overlap_seq)
    print('Reverse overlapping sequence is: ', overlap_seq_rv)
    
    
    non_overlap_search_range_fwd = range(10,60)
    non_overlap_search_range_rv = range(-10,-60,-1)
    
    if overlap_end_search_direction == -1 :
        non_overlap_fw = calculate_non_overlap(plasmid,overlap_flanking_position,id,non_overlap_search_range_fwd,1)
        non_overlap_fw_seq = non_overlap_fw[0]
        non_overlap_fw_tm = non_overlap_fw[1]
        non_overlap_rv = calculate_non_overlap(plasmid,overlap_end_position,id,non_overlap_search_range_rv,-1)
        non_overlap_rv_seq = non_overlap_rv[0].reverse_complement()
        non_overlap_rv_tm = non_overlap_rv[1]
        
    elif overlap_end_search_direction == 1 :
        non_overlap_fw = calculate_non_overlap(plasmid,overlap_end_position,id,non_overlap_search_range_fwd,1)
        non_overlap_fw_seq = non_overlap_fw[0]
        non_overlap_fw_tm = non_overlap_fw[1]
        non_overlap_rv = calculate_non_overlap(plasmid,overlap_flanking_position,id,non_overlap_search_range_rv,-1)
        non_overlap_rv_seq = non_overlap_rv[0].reverse_complement()
        non_overlap_rv_tm = non_overlap_rv[1]
        
    else :
        print('Wrong value for overlap end search direction.')
        
    
    print('\nReplacing T with U in the overlapping region and combining with the non-overlapping region to finalize the primer for the insert ID ', id)        
    primer_fw = overlap_seq[:-1] + 'U' + non_overlap_fw_seq
    print('The final primer is ', primer_fw)
    primer_rv = overlap_seq_rv[:-1] + 'U' + non_overlap_rv_seq
    print('The final reverse primer is ', primer_rv)
    
    
    return primer_fw, non_overlap_fw_tm, primer_rv, non_overlap_rv_tm

# Define function for bulk primer construction

In [6]:
def primers_insert_calculator(plasmid_ids,plasmids,start_insert_index,overlap_end_search_direction) :
    
    primers_list = []
    
    for plasmid_count in range(number_seqs) :
        start_point = start_insert_index
        if overlap_end_search_direction == 1 :
            start_point += (insert_lengths[plasmid_count] - 1)
        primer_fw, non_overlap_fw_tm, primer_rv, non_overlap_rv_tm = primer_calculator(plasmid_ids[plasmid_count],plasmids[plasmid_count],start_point,overlap_end_search_direction)
        primers_list.append([primer_fw, non_overlap_fw_tm, primer_rv, non_overlap_rv_tm])
    
    return primers_list

# Build all primers

In [7]:
overlap_end_search_direction_5_line = -1
results_5_line = primers_insert_calculator(plasmid_ids,plasmids,start_insert_index,overlap_end_search_direction_5_line)

overlap_end_search_direction_3_line = 1
results_3_line = primers_insert_calculator(plasmid_ids,plasmids,start_insert_index,overlap_end_search_direction_3_line)



Currently looking for the correct base flanking the insert ID  Caur_1461
overlap direction is  -1
start looking at position  2836
and base is  a
current base is  g
current base is  t
Found start position at  2834

Currently looking for the correct base to make an overlap flanking the insert ID  Caur_1461
overlap direction is  -1
start looking at position  2834
and base is  t
The range to look for the last overlap base is  range(2829, 2824, -1)
current base is  c
current base is  g
current base is  a
Found end position at  2827

Making the overlapping sequence for the insert ID  Caur_1461
Overlapping sequence is:  agccatat
Reverse overlapping sequence is:  atatggct

Currently calculating the length of the non-overlapping region for the primer of the insert ID  Caur_1461

Start search position is:  2835  with base  g

Current position is:  2845
Base in this position is:  a
Sequence now is:  gatgagcgaga
Tm now is:  29.374698125728003

Current position is:  2846
Base in this position is:

# Export all primers built

In [8]:
transposed_results_5_line = [[results_5_line[j][i] for j in range(number_seqs)] for i in range(len(results_5_line[0]))]
transposed_results_3_line = [[results_3_line[j][i] for j in range(number_seqs)] for i in range(len(results_3_line[0]))]

final_primers_seq_list = []
final_primers_names_list = []
final_primers_number_list = []
final_primers_tm_list = []


for number in range(number_seqs) :
    
    final_primers_seq_list.append(transposed_results_5_line[0][number])
    final_primers_seq_list.append(transposed_results_3_line[2][number])
    final_primers_seq_list.append(transposed_results_5_line[2][number])
    final_primers_seq_list.append(transposed_results_3_line[0][number])
    
    final_primers_names_list.append(plasmid_ids[number] + ' insert fw')
    final_primers_names_list.append(plasmid_ids[number] + ' insert rv')
    final_primers_names_list.append(plasmid_ids[number] + ' backbone rv')
    final_primers_names_list.append(plasmid_ids[number] + ' backbone fw')
    
    final_primers_tm_list.append(transposed_results_5_line[1][number])
    final_primers_tm_list.append(transposed_results_3_line[3][number])
    final_primers_tm_list.append(transposed_results_5_line[3][number])
    final_primers_tm_list.append(transposed_results_3_line[1][number])
    
    for n in range(1,5) :
        
        final_primers_number_list.append(n)
        
    
df = pd.DataFrame({'Name': final_primers_names_list, 'Number': final_primers_number_list, 'Short name': None, 'Sequence': final_primers_seq_list, 'Tm': final_primers_tm_list})
df.to_excel("primers_list.xlsx", index=False)