# Load libraries

In [1]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp

# Assign Tm calculation parameters

In [2]:
# Define custom concentrations (replace these values with your specific conditions)
dna_conc = 50    # DNA concentration in nanomolar
dntp_conc = 0.8  # dNTP concentration in millimolar
mg_conc = 1.5    # Mg2+ concentration in millimolar
na_conc = 20     # Na+ concentration in millimolar
k_conc = 0.0     # K+ concentration in millimolar

# Load data from files and create plasmids

In [3]:
# Open the file for reading
with open('template.txt', 'r') as template_file:
    # Read each line
    template_lines = template_file.readlines()
print('Template file was loaded.')

template_seq = Seq(template_lines[0].strip())
print('Template sequence was loaded.')

start_insert_index = int(template_lines[1]) - 1
end_insert_index = int(template_lines[2]) - 1
print('Start and end positions were read.')

empty_plasmid = template_seq[:start_insert_index] + template_seq[(end_insert_index + 1):]
print('Empty plasmid was generated.')
print('Empty plasmid is: ', empty_plasmid)
print('Notice: This empty plasmid sequence is merely for troubleshooting and is not used further.')

print('Plasmids are being built.')

plasmid_ids = []
plasmids = []
insert_lengths = []
number_seqs = 0

# Reading from a FASTA file
for sequence in SeqIO.parse("inserts.fasta", "fasta"):
    insert_lengths.append(len(sequence.seq))
    plasmid_ids.append(sequence.id)
    construct = template_seq[:start_insert_index] + sequence.seq + template_seq[(end_insert_index + 1):]
    print('Sequence for plasmid ID ', plasmid_ids[number_seqs], 'is ', construct)
    plasmids.append(construct)
    number_seqs += 1
    
print('Insert IDs and sequences of ', number_seqs, 'entries were fetched from file and the corresponding plasmids were built.')
print('Simultaneously, insert lengths were recorded.')

Template file was loaded.
Template sequence was loaded.
Start and end positions were read.
Empty plasmid was generated.
Empty plasmid is:  actggtcccatAgcgcggcTAgctagcaTcgtcgatgcatcatatatatatatattatatggcgcagattcgatatgctagtcgatgctagctctcagtctgatctctagcgcgctagctagctagcatatcga
Notice: This empty plasmid sequence is merely for troubleshooting and is not used further.
Plasmids are being built.
Sequence for plasmid ID  Protein_1 is  actggtcccatAgcgcggcTagactacgggctagctatcgatccgctagcgtacgattatatacgatgcatgctcAgctagcaTcgtcgatgcatcatatatatatatattatatggcgcagattcgatatgctagtcgatgctagctctcagtctgatctctagcgcgctagctagctagcatatcga
Sequence for plasmid ID  Protein_2 is  actggtcccatAgcgcggcTacctttaggaatctcgatcggctctaatgcccgattccattttacctaggtagAgctagcaTcgtcgatgcatcatatatatatatattatatggcgcagattcgatatgctagtcgatgctagctctcagtctgatctctagcgcgctagctagctagcatatcga
Sequence for plasmid ID  Protein_3 is  actggtcccatAgcgcggcTttgtcacaggaaaatcgtatagcttcgcccgcatcggagctagctatagatattatagctagctaAgctagcaTcgtcgatgcatcatatatat

# Define essential functions for primer building

In [4]:
def find_overlap_flanking_position(start_insert_index,plasmid,id,overlap_flanking_condition,overlap_end_search_direction) :
    
    print('\n\nCurrently looking for the T flanking the 5-line region of the insert ID ', id)
    
    print('overlap direction is ', overlap_end_search_direction)
    print('start looking at position ', start_insert_index)
    print('and base is ', plasmid[start_insert_index])
    
    search_range_start_overlap = [overlap_end_search_direction, 2*overlap_end_search_direction, 0]
    
    for position_change in search_range_start_overlap :
        position = start_insert_index + position_change
        print('current base is ',plasmid[position])
        if plasmid[position] in overlap_flanking_condition :
            overlap_flanking_position = position #In this iteration, stores the position of T (start position)
            print('Found start position at ', overlap_flanking_position)
            break
        if position_change == search_range_start_overlap[-1] :
            print('\nError: could not find a T flanking the 5-line region of the insert ID ', id)
            overlap_flanking_position = None
    
    return overlap_flanking_position

    
def find_overlap_end_position(overlap_flanking_position,plasmid,id,overlap_end_condition,overlap_end_search_direction) :
    
    print('\nCurrently looking for the A to make an overlap flanking the 5-line region of the insert ID ', id)
    
    print('overlap direction is ', overlap_end_search_direction)
    print('start looking at position ', overlap_flanking_position)
    print('and base is ', plasmid[overlap_flanking_position])
    
    search_range_end_overlap = range((overlap_flanking_position+(5*overlap_end_search_direction)),(overlap_flanking_position+(11*overlap_end_search_direction)),overlap_end_search_direction) # Stores the range to look for A of all primers in the 5-line region
    
    for position in search_range_end_overlap :
        print('current base is ',plasmid[position])
        if plasmid[position] in overlap_end_condition :
            overlap_end_position = position #In this iteration, stores the position of A (end position)
            print('Found end position at ', overlap_end_position)
            break
        if position == search_range_end_overlap[-1] :
            print('\nError: could not find an A to make an overlap flanking the 5-line region of the insert ID ', id)
            overlap_end_position = None
            
    return overlap_end_position


def calculate_non_overlap(plasmid,non_overlap_start_position,id) :
    
    print('\nCurrently calculating the length of the non-overlapping region for the forward primer of the insert ID ', id)
    
    for non_overlapping_length in range(10,60) :
        seq = plasmid[(non_overlap_start_position + 1):(non_overlap_start_position + 1 + non_overlapping_length)]
        print('\nSequence now is: ', seq)
        tm = MeltingTemp.Tm_NN(seq)
        print('Tm now is: ', tm)
        if tm >= 59.5 and seq[-1] in ['C','c','G','g'] :
            break
        if non_overlapping_length == 59 :
            print('\nError: could not resolve non-overlapping Tm for the forward primer of the insert ID ', id)
            seq = None
            tm = None
    
    print('\nThe non-overlapping sequence of the forward primer of this insert is ', seq)
    
    return seq

# Define main primer builder function

In [5]:
def primer_calculator(id,plasmid,start_insert_index,overlap_end_search_direction) :

    if overlap_end_search_direction == -1 :
        overlap_flanking_condition = ['T','t']
        overlap_end_condition = ['A','a']
        
    elif overlap_end_search_direction == 1 :
        overlap_flanking_condition = ['A','a']
        overlap_end_condition = ['T','t']
        
    else :
        overlap_flanking_condition = None
        overlap_end_condition = None
        print('Wrong value for overlap end search direction.')
        
    
    overlap_flanking_position = find_overlap_flanking_position(start_insert_index,plasmid,id,
                                                               overlap_flanking_condition,overlap_end_search_direction)
    
    
    overlap_end_position = find_overlap_end_position(overlap_flanking_position,plasmid,id,
                                                     overlap_end_condition,overlap_end_search_direction)
    
    
    print('\nMaking the overlapping sequence for the insert ID ', id)
    
    if overlap_end_search_direction == -1 :
        overlap_seq = plasmid[overlap_end_position:(overlap_flanking_position + 1)]
        
    if overlap_end_search_direction == 1 :
        overlap_seq = plasmid[overlap_flanking_position:(overlap_end_position + 1)]    
        
    print('Overlapping sequence is: ', overlap_seq)
    
    if overlap_end_search_direction == -1 :
        non_overlap_seq = calculate_non_overlap(plasmid,overlap_flanking_position,id)
    
    if overlap_end_search_direction == 1 :
        non_overlap_seq = calculate_non_overlap(plasmid,overlap_end_position,id)
    
    print('\nReplacing T with U in the overlapping region and combining with the non-overlapping region to finalize the primer for the insert ID ', id)        
    primer = overlap_seq[:-1] + 'U' + non_overlap_seq
    print('The final primer is ', primer)
    
    return primer

# Define function for bulk primer construction

In [6]:
def primers_insert_calculator(plasmid_ids,plasmids,start_insert_index,overlap_end_search_direction) :
    
    primers_list = []
    
    for plasmid_count in range(number_seqs) :
        start_point = start_insert_index
        if overlap_end_search_direction == 1 :
            start_point += (insert_lengths[plasmid_count] - 1)
        primers_list.append(primer_calculator(plasmid_ids[plasmid_count],plasmids[plasmid_count],start_point,overlap_end_search_direction))
    
    return primers_list

# Build each primer type

In [7]:
overlap_end_search_direction_5_line = -1
fwd_primers_insert = primers_insert_calculator(plasmid_ids,plasmids,start_insert_index,overlap_end_search_direction_5_line)

overlap_end_search_direction_3_line = 1
fwd_primers_vector = primers_insert_calculator(plasmid_ids,plasmids,start_insert_index,overlap_end_search_direction_3_line)



Currently looking for the T flanking the 5-line region of the insert ID  Protein_1
overlap direction is  -1
start looking at position  20
and base is  a
current base is  T
Found start position at  19

Currently looking for the A to make an overlap flanking the 5-line region of the insert ID  Protein_1
overlap direction is  -1
start looking at position  19
and base is  T
current base is  g
current base is  c
current base is  g
current base is  A
Found end position at  11

Making the overlapping sequence for the insert ID  Protein_1
Overlapping sequence is:  AgcgcggcT

Currently calculating the length of the non-overlapping region for the forward primer of the insert ID  Protein_1

Sequence now is:  agactacggg
Tm now is:  24.271883058204082

Sequence now is:  agactacgggc
Tm now is:  32.54976711060988

Sequence now is:  agactacgggct
Tm now is:  35.91847504239939

Sequence now is:  agactacgggcta
Tm now is:  36.82321438962987

Sequence now is:  agactacgggctag
Tm now is:  39.84551738856771

# Print all primers built

In [8]:
for primer in fwd_primers_insert :
    print(primer)

print('\n')

for primer in fwd_primers_vector :
    print(primer)

AgcgcggcUagactacgggctagctatcgatccgc
AgcgcggcUacctttaggaatctcgatcggctctaatgc
AgcgcggcUttgtcacaggaaaatcgtatagcttcgccc


AgctagcaUcgtcgatgcatcatatatatatatattatatggcgcagattcg
AgctagcaUcgtcgatgcatcatatatatatatattatatggcgcagattcg
AgctagcaUcgtcgatgcatcatatatatatatattatatggcgcagattcg
