Variable instructions:



*   "mutations_file_name": name of the file stored in MyDrive
*   "plasmid": capitalised nucleotides for the entire plasmid sequence
*   "insert": capitalised nucleotides for the entire insert sequence (including STOP codon)
*   "desired_tm": integer value for the desired Tm across all primers
*   "tm_range": integer value for the accepted range across all primers (note: 1.2 has worked well previously)
*   "lib_name": library name to be generated using the primers (in 'LIBxxx' format)

In [1]:
#@title Input variables, then hit 'Runtime' -> 'Run all'
mutations_file_name = "FN1_SDM.csv" # @param {type:"string"}
plasmid = "" # @param {type:"string"}
insert = "" # @param {type:"string"}
desired_tm = 72 # @param {type:"integer"}
tm_range = "1.2" # @param {type:"string"}
lib_name = "FN1821" # @param {type:"string"}

# The code (click toggle to view) generates the single point mutants and stores an output Excel file on the common folder. Do not edit the code.

In [2]:
from google.colab import drive
drive.mount('/content/drive') # Mount gdrive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Installation of packages and libraries

!pip install biopython # Installs Biopython
from Bio.SeqUtils import MeltingTemp as mt # Imports the Tm calculator
from Bio.SeqUtils import Seq # Imports the Seq class
from Bio.SeqUtils import gc_fraction # Imports the GC fraction calculator
import pandas as pd



In [4]:
# Open the mutations and store them in a list

mutations = open('/content/drive/My Drive/' + mutations_file_name, 'r', encoding='utf-8-sig')

submitted_mutations = [] # Create a list to store the mutants

for line in mutations:
  line = line.strip() # Remove empty lines from the mutation list
  submitted_mutations.append(line)

In [5]:
# Generate dictionary of most frequently used codons for each amino acid in E. coli

ecoli_codons = {'G':'GGC',
                'E':'GAA',
                'D':'GAT',
                'V':'GTT',
                'A':'GCG',
                'R':'CGC',
                'S':'AGC',
                'K':'AAA',
                'N':'AAC',
                'M':'ATG',
                'I':'ATT',
                'T':'ACC',
                'W':'TGG',
                'C':'TGC',
                'Y':'TAT',
                'L':'CTG',
                'F':'TTT',
                'Q':'CAG',
                'H':'CAT',
                'P':'CCG'}

In [6]:
# Store the provided form data as varialbes

plasmid = plasmid
insert = insert
desired_tm = desired_tm
upper_tm = desired_tm + float(tm_range)
lower_tm = desired_tm - float(tm_range)

In [7]:
# Finds the index where the insert starts in the plasmid

start_index = plasmid.find(insert)

In [8]:
# This cell generates Fwd and Rev primers, ensuring they have the correct Tm and GC clamp properties

fwd_primers = {} # Create a dictionary to store the Fwd primers
rev_primers = {} # Create a dictionary to store the Rev primers

# Generate a Fwd primer with a GC clamp and calculate the Tm
for mutant in submitted_mutations:
  n = 1
  aa_to_introduce = mutant[len(mutant) - 1] # Identifies the AA to introduce
  index = mutant[1:len(mutant) - 1] # Identifies the index to mutate
  mut_index_in_bp = (int(index) * 3) + start_index # Identifies the index to mutate in bp and adds it to the insert start site index in the plasmid
  fwd_primer = ecoli_codons[aa_to_introduce] + plasmid[mut_index_in_bp:mut_index_in_bp + 30] # Adds the new AA as the first AA and appends 35 bp immediately downstream in the plasmid sequence
  #while 'G' not in fwd_primer[-1] and 'C' not in fwd_primer[-1]:
  #  fwd_primer = ecoli_codons[aa_to_introduce] + plasmid[mut_index_in_bp:mut_index_in_bp + 30 + n] # Lengthens the Fwd primer by one bp at a time if there is no GC clamp at the 3' position
  #  n += 1
  fwd_primer_tm = mt.Tm_NN(fwd_primer, nn_table=mt.DNA_NN3)

  # Checks whether the Fwd primer Tm is too high - if so, it shortens the primer until the correct Tm is found
  k = 0
  while fwd_primer_tm > upper_tm:
    fwd_primer = fwd_primer[:len(fwd_primer) - 1] # Shortens the Fwd primer by one bp at a time if the
    #if 'G' in fwd_primer[-1] or 'C' in fwd_primer[-1]: # Only calculates a new Fwd primer Tm if it has a GC clamp
    fwd_primer_tm = mt.Tm_NN(fwd_primer, nn_table=mt.DNA_NN3)
    k += 1
    if k > 15:
      #continue # Ensures that an infinite loop (where Tm and GC clamp requirements cannot be matched, e.g. if A/T rich region) is broken
      break
  # Checks whether the Fwd primer Tm is too low
  n = 1
  k = 0
  while fwd_primer_tm < lower_tm:
    fwd_primer = ecoli_codons[aa_to_introduce] + plasmid[mut_index_in_bp:mut_index_in_bp + 35 + n] # add a character to increase the tm
    n += 1
    #print(fwd_primer,n,k,fwd_primer_tm)
    #if 'G' in fwd_primer[-1] or 'C' in fwd_primer[-1]:
    fwd_primer_tm = mt.Tm_NN(fwd_primer, nn_table=mt.DNA_NN3)
    k += 1
    if k > 15:
      #continue # Ensures that an infinite loop (where Tm and GC clamp requirements cannot be matched, e.g. if A/T rich region) is broken
      break
  fwd_primer_gc = gc_fraction(fwd_primer) * 100 # Calculate the GC content for the primer

  fwd_primers[mutant + '_fwd'] = {'bases':fwd_primer, 'tm':round(fwd_primer_tm, 1), 'gc':round(fwd_primer_gc, 1)} # Store the nucleotides, Tm and GC content for the generated Fwd primer rounded to 1 decimal point1

  # Generate the complementary Rev primer based on the selected Fwd primer
  n = 1
  fwd_primer_start = plasmid.find(fwd_primer[3:]) # Identifies the plasmid index where the Fwd primer starts
  rev_primer = (plasmid[fwd_primer_start - 35:fwd_primer_start - 3]) # Generates a Rev primer starting 30 bp upstream of the Fwd primer start
  #killing while loop Rv
  #while 'G' not in rev_primer[0] and 'C' not in rev_primer[0]:
  #  rev_primer = (plasmid[fwd_primer_start - 35 - n:fwd_primer_start - 3]) # Lengthens the Rev primer by one bp at a time if there is no GC clamp at the 3' position
  #  n += 1
  rev_primer_tm = mt.Tm_NN(rev_primer, nn_table=mt.DNA_NN3)

  # Checks whether the Rev primer Tm is too high
  n = 1
  while rev_primer_tm > upper_tm:
    rev_primer = plasmid[fwd_primer_start - 35 + n:fwd_primer_start - 3] # Shortens the Rev primer by one bp at a time if the Tm is too high
    n += 1
    #if 'G' in rev_primer[0] or 'C' in rev_primer[0]:
    rev_primer_tm = mt.Tm_NN((rev_primer), nn_table=mt.DNA_NN3)

  n = 1
  while rev_primer_tm < lower_tm:
    rev_primer = plasmid[fwd_primer_start - 35 - n:fwd_primer_start - 3] # Lengthens the Rev primer by one bp at a time if the Tm is too low
    n += 1
    #if 'G' in rev_primer[0] or 'C' in rev_primer[0]:
    rev_primer_tm = mt.Tm_NN((rev_primer), nn_table=mt.DNA_NN3)

  rev_primer_rev_comp = str(Seq(rev_primer).reverse_complement())

  rev_primer_gc = gc_fraction(rev_primer) * 100

  rev_primers[mutant + '_rev'] = {'bases':rev_primer_rev_comp, 'tm':round(rev_primer_tm, 1), 'gc':round(rev_primer_gc, 1)} # Store the nucleotides, Tm and GC content for the generated Rev primer rounded to 1 decimal point

In [9]:
df_fwd_primers = pd.DataFrame.from_dict(fwd_primers).transpose() # Convert the Fwd primer dictionary to a dataframe
df_rev_primers = pd.DataFrame.from_dict(rev_primers).transpose() # Convert the Rev primer dictionary to a dataframe

df_primer_output = pd.DataFrame(columns = [''])

df_primer_output = pd.concat([df_fwd_primers, df_rev_primers]) # Concatenate the Fwd and Rev primer dataframes

In [10]:
# Change the output path to a common folder?

df_primer_output.to_excel('/content/drive/My Drive/primer_output_' + lib_name + '.xlsx')