# Load libraries

In [1]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp

# Assign Tm calculation parameters:

In [2]:
# Define custom concentrations (replace these values with your specific conditions)
dna_conc = 50    # DNA concentration in nanomolar
dntp_conc = 0.8  # dNTP concentration in millimolar
mg_conc = 1.5    # Mg2+ concentration in millimolar
na_conc = 20     # Na+ concentration in millimolar
k_conc = 0.0     # K+ concentration in millimolar

# Load data from files and create plasmids

In [3]:
# Open the file for reading
with open('template.txt', 'r') as template_file:
    # Read each line
    template_lines = template_file.readlines()
print('Template file was loaded.')

template_seq = Seq(template_lines[0].strip())
print('Template sequence was loaded.')

start_insert = int(template_lines[1]) - 1
end_insert = int(template_lines[2])
print('Start and end positions were read.')

empty_plasmid = template_seq[:start_insert] + template_seq[end_insert:]
print('Empty plasmid was generated.')
print('Empty plasmid is: ', empty_plasmid)
print('Notice: This empty plasmid sequence is merely for troubleshooting and is not used further.')

print('Plasmids are being built.')

plasmid_ids = []
plasmids = []
insert_lengths = []
number_seqs = 0

# Reading from a FASTA file
for sequence in SeqIO.parse("inserts.fasta", "fasta"):
    insert_lengths.append(len(sequence.seq))
    plasmid_ids.append(sequence.id)
    construct = template_seq[:start_insert] + sequence.seq + template_seq[end_insert:]
    print('Sequence for plasmid ID ', plasmid_ids[number_seqs], 'is ', construct)
    plasmids.append(construct)
    number_seqs += 1
    
print('Insert IDs and sequences of ', number_seqs, 'entries were fetched from file and the corresponding plasmids were built.')
print('Simultaneously, insert lengths were recorded.')

Template file was loaded.
Template sequence was loaded.
Start and end positions were read.
Empty plasmid was generated.
Empty plasmid is:  ACTGGTCCCATAGCGCGGCTAGCTAGCATCGTCGATGCATCATATATATATATATTATATGGCGC
Notice: This empty plasmid sequence is merely for troubleshooting and is not used further.
Plasmids are being built.
Sequence for plasmid ID  Protein_1 is  ACTGGTCCCATAGCGCGGCTagactacgggctagctatcAGCTAGCATCGTCGATGCATCATATATATATATATTATATGGCGC
Sequence for plasmid ID  Protein_2 is  ACTGGTCCCATAGCGCGGCTacctttaggaatctcgatcgAGCTAGCATCGTCGATGCATCATATATATATATATTATATGGCGC
Sequence for plasmid ID  Protein_3 is  ACTGGTCCCATAGCGCGGCTttgtcacaggaaaatcgtatagcttcgcccgcatcgAGCTAGCATCGTCGATGCATCATATATATATATATTATATGGCGC
Insert IDs and sequences of  3 entries were fetched from file and the corresponding plasmids were built.
Simultaneously, insert lengths were recorded.


# Build primers

In [4]:
print('Currently building primers.')

search_range_start_overlap = [-1, -2, 0]
tm_fwd_inserts = [] #Stores the Tms of the non-overllaping sequences of the insert forward primers (insert 5-line region, towards insert)
primer_fwd_inserts = [] #Stores the final sequence of the forward primer for all inserts (insert 5-line region, towards insert)

for plasmid_count in range(number_seqs) :
    
    print('\n\nCurrently looking for the T flanking the 5-line region of the insert ID ', plasmid_ids[plasmid_count])
    
    for position_change in search_range_start_overlap :
        position = start_insert + position_change
        if plasmids[plasmid_count][position] in ['T','t'] :
            start_position = position + 1 #In this iteration, stores the position of T (start position)
            print('Start position is ', start_position)
            break
        if position_change == search_range_start_overlap[-1] :
            print('\nError: could not find a T flanking the 5-line region of the insert ID ', plasmid_ids[plasmid_count])
            start_position = None
    
    print('\nCurrently looking for the A to make an overlap flanking the 5-line region of the insert ID ', plasmid_ids[plasmid_count])
    
    search_range_end_overlap = range((start_position-6),(start_position-11),-1) # Stores the range to look for A of all primers in the 5-line region
    
    for x in search_range_end_overlap :
        if plasmids[plasmid_count][x] in ['A','a'] :
            end_position = x #In this iteration, stores the position of A (end position)
            print('End position is ', end_position)
            break
        if x == search_range_end_overlap[-1] :
            print('\nError: could not find an A to make an overlap flanking the 5-line region of the insert ID ', plasmid_ids[plasmid_count])
            end_position = None
    
    print('\nMaking the overlapping sequence flanking the 5-line region of the insert ID ', plasmid_ids[plasmid_count])
    
    overlap_start = plasmids[plasmid_count][end_position:start_position]
    
    print('Overlapping sequence is: ', overlap_start)
    
    print('\nCurrently calculating the length of the non-overlapping region for the forward primer of the insert ID ', plasmid_ids[plasmid_count])
    
    for non_overlapping_length in range(10,60) :
        seq = plasmids[plasmid_count][(start_position):(start_position+non_overlapping_length)]
        print('\nSequence now is: ', seq)
        tm = MeltingTemp.Tm_NN(seq)
        print('Tm now is: ', tm)
        if tm >= 60 and seq[-1] in ['C','c','G','g'] :
            break
        if non_overlapping_length == 59 :
            print('\nError: could not resolve non-overlapping Tm for the forward primer of the insert ID ', plasmid_ids[plasmid_count])
            seq = None
            tm = None
    
    print('\nFound Tm = ', tm)
    tm_fwd_inserts.append(tm)
    
    print('\nThe non-overlapping sequence of the forward primer of this insert is ', seq)
    
    print('\nReplacing T with U in the overlapping region and combining with the non-overlapping region to finalize the forward primer towards the insert ID ', plasmid_ids[plasmid_count])
    primer_fwd_insert = overlap_start[:-1] + 'U' + seq
    primer_fwd_inserts.append(primer_fwd_insert)
    print('The final insert forward primer is ', primer_fwd_inserts[plasmid_count])

Currently building primers.


Currently looking for the T flanking the 5-line region of the insert ID  Protein_1
Start position is  20

Currently looking for the A to make an overlap flanking the 5-line region of the insert ID  Protein_1
End position is  11

Making the overlapping sequence flanking the 5-line region of the insert ID  Protein_1
Overlapping sequence is:  AGCGCGGCT

Currently calculating the length of the non-overlapping region for the forward primer of the insert ID  Protein_1

Sequence now is:  agactacggg
Tm now is:  24.271883058204082

Sequence now is:  agactacgggc
Tm now is:  32.54976711060988

Sequence now is:  agactacgggct
Tm now is:  35.91847504239939

Sequence now is:  agactacgggcta
Tm now is:  36.82321438962987

Sequence now is:  agactacgggctag
Tm now is:  39.845517388567714

Sequence now is:  agactacgggctagc
Tm now is:  44.89942814534169

Sequence now is:  agactacgggctagct
Tm now is:  46.939669022100816

Sequence now is:  agactacgggctagcta
Tm now is:  47.0132002

In [5]:


def primer_calculator() :

    start_position = find_T()
        
    end_position = find_A()
        
    overlap_seq = plasmids[plasmid_count][end_position:start_position]
        
    non_overlap_seq = calculate_non_overlap()
        
    primer = overlap_seq[:-1] + 'U' + non_overlap_seq
    
    return primer


def primers_calculator() :
    
    for plasmid_count in range(number_seqs) :
        primers_list.apprend(primer_calculator())
    
    return primers_list

fwd_primers_insert = primers_calculator(fwd_insert_)

NameError: name 'fwd_insert_' is not defined