# 15/01/2024 NJF MSDM Golden Gate primer design v0.1

This notebook is for generating Golden Gate primers for multisite-directed mutagenesis.

Run the first two cells, fill in the variables below and then click "Runtime" > "Run all".

<br>

General variables

- **project_name**: give the run a name, can be anything, this will be used to name output files
- **input_sequence**: DNA sequence for WT protein
- **input_mutants**: path to a text file which lists the mutations, one on each line and formatted e.g. for a double mutant: E100K_I200V, or a triple mutant E100K_I200V_K300L
- **output_dir**: path to where you want output to be saved

Golden Gate variables
- **cuf**: path to a csv containing E Coli codon frequency table
- **chosen_recognition_site**:
- **chosen_prefix**:
- **chosen_suffix**:
- **overhang1**:
- **overhang2**:
- **target_tm**:

<br>

To do:
- include variables for salt and primer concentrations?
- update Tm calculator to the one used by NEB?
- make updates to ensure no more than 4 fragments are outputed
- code currently doesn't validate input sequence




In [1]:
# @title Run this first [connects your Google Drive]
# connect to drive
from google.colab import drive
drive.mount('/content/drive')

# load ext to run R alongside py
%load_ext rpy2.ipython



Mounted at /content/drive


In [2]:
# @title Run this second [installs required R code, takes ~3.5 mins, don't worry about the warnings]
%%R

# install and load seqinr
install.packages("seqinr",verbose=0)
library(seqinr)

# load stringR lib
library(stringr)


# classes

ps<-setClass("Primerset", slots=c(oldsequence="character", primers="list", newsequence="character"))
pc<-setClass("Primer", slots=c(prefix="character" ,restriction_enzyme="character", suffix="character", vector="character", overhang="character", extra="character" ,binding_sequence="character", temperature="numeric", difference="numeric"))
setMethod("initialize", "Primer",
          function(.Object, prefix="", restriction_enzyme="", suffix="", vector=c("", ""), overhang="", extra="", binding_sequence="", temperature=60, difference=0,...) {
            .Object<-callNextMethod(.Object, ...)
            .Object@prefix<-prefix
            .Object@restriction_enzyme<-restriction_enzyme
            .Object@suffix<-suffix
            .Object@vector<-vector
            .Object@overhang<-overhang
            .Object@extra<-extra
            .Object@binding_sequence<-binding_sequence
            .Object@temperature<-temperature
            .Object@difference<-difference
            .Object
          }
          )
pc_msd<-setClass("Primer_MSD", contains="Primer")
pc_spm<-setClass("Primer_SPM", contains="Primer")
fragment<-setClass("Fragment", slots=c(start="numeric", stop="numeric", start_mutation="vector", stop_mutation="vector"))
eps<-setClass("Extended_Primerset", contains="Primerset", slots=c(fragments="list"))

# functions

setGeneric("print_primer" , function(primer) {
  standardGeneric("print_primer")
})
#' @rdname print_primer-methods
#' @aliases print_primer,Primer-method
setMethod("print_primer", signature(primer="Primer"),
          function(primer){
            cat(primer@prefix, primer@restriction_enzyme, primer@suffix, primer@vector, primer@overhang, primer@extra, primer@binding_sequence, "\n", sep="")
            cat("Temperature of binding site: ", primer@temperature, " \u00b0C" , "\n")
            cat("Temperature difference: ", primer@difference, " K", "\n")
          }
)
#' @rdname print_primer-methods
#setMethod("print_primer", signature(primer="Primer_MSD"),
#          function(primer){
#            cat(primer@prefix, primer@restriction_enzyme, primer@suffix, primer@vector, primer@overhang, primer@extra, primer@binding_sequence, "\n", sep="")
#            cat("Temperature of binding site: ", primer@temperature, " \u00b0C" , "\n")
#            cat("Temperature difference: ", primer@difference, " K", "\n")
#          }
#)
#' @rdname print_primer-methods
#' @aliases print_primer,Primerset-method
setMethod("print_primer", signature(primer="Primerset"),
          function(primer){
            primerset<-primer
            for(i in 1:length(primerset@primers)){
              cat("Fragment ", i, "\n", "Forward\n", sep="")
              print_primer(primerset@primers[[i]][[1]])
              cat("Reverse\n")
              print_primer(primerset@primers[[i]][[2]])
              cat("\n")
            }
            cat("Input Sequence:\n", primerset@oldsequence,"\n" )
            cat("\nModified Sequence:\n", primerset@newsequence, "\n")
          }
)
#' @rdname print_primer-methods
#' @aliases print_primer,Extended_Primerset-method
setMethod("print_primer", signature(primer="Extended_Primerset"),
          function(primer){
            primerset<-primer
            for(i in 1:length(primerset@fragments)){
              cat("Fragment ", i, "\n", sep="")
              cat("Start ", primerset@fragments[[i]]@start, ", ", sep="")
              cat("Stop ",  primerset@fragments[[i]]@stop, ", ", sep="")
              cat("Length ",(primerset@fragments[[i]]@stop - primerset@fragments[[i]]@start)+1, "\n", sep="")
              cat("Forward\n")
              print_primer(primerset@primers[[i]][[1]])
              cat("Reverse\n")
              print_primer(primerset@primers[[i]][[2]])
              cat("\n")
            }
            cat("Input Sequence:\n", primerset@oldsequence,"\n" )
            cat("\nModified Sequence:\n", primerset@newsequence, "\n")
          }
)

#' Domestication of the input sequence
#'
#' The domesticate function checks for internal cleavage sites. If corresponding sites are present silent mutations are introduced to destroy the recognition sites.
#' The functions returns a list containing the position of the choosen amino acid residue for silent mutation.
#'
#' @param input_sequence The sequence which should be modified. This is an object of type character containing the sequence.
#' @param restriction_enzyme Recognition site sequence of the respective restriction enzyme [default: GGTCTC]
#' @param cuf The Codon Usage Table which is being used to select the codon for an exchanged amino acid (and in this case to select the codon which shoulb be replaced). [default: e_coli_316407.csv]
#'
#' @return A list with replacments: Each element has a vector with the codon number at the first slot and the amino acid of this position at the second slot.
#' @export
#' @import seqinr
#' @importFrom seqinr translate
#' @examples
#' #Load the setup of the Point Mutation vignette and run the domestication
#' data(Point_Mutagenesis_BbsI_setup)
#' domesticate(input_sequence, restriction_enzyme=recognition_site_bbsi, cuf=cuf)
domesticate<-function(input_sequence, restriction_enzyme="GGTCTC", cuf="e_coli_316407.csv"){
  cuf_vector<-get_cu_table(cuf, list=F)
  restriction_enzyme<-str_to_upper(restriction_enzyme)
  input_sequence<-str_to_upper(input_sequence)
  sequence<-s2c(input_sequence)
  restriction_enzyme_s2c<-s2c(restriction_enzyme)
  restriction_enzyme_s2c_reverse<-comp(restriction_enzyme_s2c)
  restriction_enzyme_s2c_reverse<-rev(restriction_enzyme_s2c_reverse)
  restriction_enzyme_reverse<-str_to_upper(paste(restriction_enzyme_s2c_reverse, collapse = ""))
  prot_sequence<-translate(sequence)
  matches <- do.call(rbind, str_locate_all(input_sequence, c(restriction_enzyme, restriction_enzyme_reverse))) # Returns positions of every match in a string
  if(nrow(matches) == 0) {
    message("No domestication needed.")
    return(list())
  }
  split_seq<-splitseq(sequence)
  replacements<-vector(mode = "list", length = nrow(matches))
  for(i in 1:nrow(matches)){
    start<-ceiling(matches[i,"start"]/3)
    end<-ceiling(matches[i, "end"]/3)
    protein_domest<-split_seq[start:end]
    alt_codons<-syncodons(protein_domest) #we could use synsequence for this task
    for(j in 1:length(alt_codons)){ #filter out the codon which we already know
      codons<-alt_codons[[j]]
      codons<-codons[which(codons != names(alt_codons[j]))]
      alt_codons[[j]]<-cuf_vector[str_to_upper(codons)]
    }
    max_in_list<-which.max(unlist(lapply(alt_codons, function(x) x[which.max(x)])))
    replacements[[i]]<-c(as.numeric((start-1)+max_in_list),seqinr::translate(s2c(str_to_upper(names(alt_codons[[max_in_list]][which.max(alt_codons[[max_in_list]])])))))
  }
  return(replacements)
}

#' Calculate Primers for Point Mutagenesis
#'
#' The mutate function designs the necessary set of primers for the desired mutations.
#' An example is given in the vignette at \url{https://github.com/ipb-halle/GoldenMutagenesis/blob/master/vignettes/Point_Mutagenesis.md}
#'
#' @param input_sequence The sequence which should be modified. This is an object of type character containing the sequence.
#' @param prefix Additional nucleobases in 5' position of the recognition site [default: TT]
#' @param restriction_enzyme Recognition site sequence of the respective restriction enzyme [default: GGTCTC]
#' @param suffix Spacer nucleotides matching the cleavage pattern of the enzyme [default: A]
#' @param vector Four basepair overhangs complementary to the created overhangs in the acceptor vector  [default: c("AATG", "AAGC")]
#' @param replacements The desired substitutions
#' @param replacement_range  Maximum distance in amino acid residues between two randomization sites to be incoporated into a single primer (reverse, end of the fragment) - has a cascading effect for following mutations [default: 2]
#' @param binding_min_length The minimal threshold value of the length of the template binding sequence in amino acid residues [default: 4]
#' @param binding_max_length Maximal length of the binding sequence in amino acid residues [default: 9]
#' @param target_temp Melting temperature of the binding sequence in \code{print('\u00B0')}C [default: 60]
#' @param fragment_min_size Minimal size of a generated gene fragment in base pairs [default 100]
#' @param cuf The Codon Usage Table which is being used to select the codon for an exchanged amino acid. [default: e_coli_316407.csv]
#'
#' @return An object of class Primerset with the designed Primers.
#' @export
#'
#' @examples
#' #Load the setup of the Point Mutation vignette and design the primers
#' data(Point_Mutagenesis_BbsI_setup)
#' primers<-mutate_spm(input_sequence, prefix="TT", restriction_enzyme = recognition_site_bbsi,
#' suffix = "AA", vector=c("CTCA", "CTCG"), replacements = mutations, binding_min_length=4 ,
#' binding_max_length=9, target_temp=60, cuf=cuf)
#'
mutate_spm<-function(input_sequence, prefix="TT" ,restriction_enzyme="GGTCTC", suffix="A", vector=c("AATG", "AAGC"), replacements,replacement_range=2, binding_min_length=4, binding_max_length=9, target_temp=60, cuf="e_coli_316407.csv", fragment_min_size=100) {#change to binding_max_length_max? and min?
  cuf_list<-get_cu_table(cuf)
  prefix<-str_to_upper(prefix)
  vector<-str_to_upper(vector)
  suffix<-str_to_upper(suffix)
  replacements<-order_replacements(replacements)
  sequence<-s2c(input_sequence)
  codon_seq<-sequence_check(input_sequence)
  restriction_enzyme<-str_to_upper(restriction_enzyme)
  restriction_enzyme_s2c<-s2c(restriction_enzyme)
  restriction_enzyme_s2c_reverse<-comp(restriction_enzyme_s2c)
  restriction_enzyme_s2c_reverse<-rev(restriction_enzyme_s2c_reverse)
  restriction_enzyme_reverse<-str_to_upper(paste(restriction_enzyme_s2c_reverse, collapse = ""))
  min_fragment<-3*binding_max_length
  prot_sequence<-seqinr::translate(sequence)
  primers<-vector("list")

  check_offtargets<-function(codon_seq, codons, positions_aa, restriction_enzyme="GGTCTC", cuf="e_coli_316407.csv") {
    cuf_vector<-get_cu_table(cuf, list=F)
    seq<-paste(codon_seq, collapse = "")
    restriction_enzyme_s2c<-s2c(restriction_enzyme)
    restriction_enzyme_s2c_reverse<-comp(restriction_enzyme_s2c)
    restriction_enzyme_s2c_reverse<-rev(restriction_enzyme_s2c_reverse)
    restriction_enzyme_reverse<-str_to_upper(paste(restriction_enzyme_s2c_reverse, collapse = ""))
    matches <- do.call(rbind, str_locate_all(seq, c(restriction_enzyme, restriction_enzyme_reverse))) # Returns positions of every match in a string
    if(nrow(matches) > 0) {
      for(i in 1:nrow(matches)){
        start<-ceiling(matches[i,"start"]/3)
        end<-ceiling(matches[i, "end"]/3)
        positions<-positions_aa[positions_aa >= start & positions_aa <= end]
        if(length(positions > 0)){
          alt_codons<-syncodons(codon_seq_tmp[positions])
          for(j in 1:length(alt_codons)){ #filter out the codon which we already know
            codons_tmp<-alt_codons[[j]]
            codons_tmp<-codons_tmp[which(codons_tmp != names(alt_codons[j]))]
            alt_codons[[j]]<-cuf_vector[str_to_upper(codons_tmp)]
          }
          max_in_list<-which.max(unlist(lapply(alt_codons, function(x) x[which.max(x)])))
          codons[which(positions_aa==positions[max_in_list])]<-str_to_upper(names(alt_codons[[max_in_list]][which.max(alt_codons[[max_in_list]])]))
        }
      }
    }
    return(codons)
  }

  if(str_sub(vector[1], 2) == "ATG"){
    fragment_start<-2
  } else{
    fragment_start<-1
  }

  positions<-vector()
  positions_aa<-vector()
  aminoacids<-vector()
  codons<-vector()
  codon_seq_tmp<-codon_seq

  for(i in 1:length(replacements)) {
      position_aa<-as.numeric(replacements[[i]][1])
      positions_aa<-c(positions_aa, position_aa)
      position<-position_aa*3
      positions<-c(positions, position)
      aminoacid<-replacements[[i]][2]
      aminoacids<-c(aminoacids, aminoacid)
      codon<-str_to_upper(names(which.max(cuf_list[[aminoacid]]))[1])
      if(codon == codon_seq[position_aa]) {
        if(length(cuf_list[[aminoacid]] == 1)) {
          stop(paste("There is no syn. codon for ", aminoacid ,sep=""))
        }
        else {
          old_codon<-which.max(cuf_list[[aminoacid]])
          codon<-str_to_upper(names(which.max(cuf_list[[aminoacid]][-old_codon]))[1])
        }
      }
      codons<-c(codons, codon)
      codon_seq_tmp[position_aa]<-codon
  }

  codons<-check_offtargets(codon_seq = codon_seq_tmp, codons = codons, positions_aa = positions_aa, restriction_enzyme = "GGTCTC", cuf = cuf)
  codons<-check_offtargets(codon_seq = codon_seq_tmp, codons = codons, positions_aa = positions_aa, restriction_enzyme = "GAAGAC", cuf = cuf)
  if(restriction_enzyme != "GGTCTC" | restriction_enzyme != "GAAGAC") {
    codons<-check_offtargets(codon_seq = codon_seq_tmp, codons = codons, positions_aa = positions_aa, restriction_enzyme = restriction_enzyme, cuf = cuf)
  }

  fragments<-make_fragments(mutations=positions_aa,seq=codon_seq,start=fragment_start, distance=replacement_range, fsize=fragment_min_size, buffer=0)

  primers<-vector("list", length(fragments))
  for(n in 1:length(fragments)){
    cur_fragment<-fragments[[n]]
    primers[[n]]<-vector("list", 2)
    if(length(cur_fragment@start_mutation) != 0) {
      codon_seq[cur_fragment@start_mutation]<-codons[1:length(cur_fragment@start_mutation)]
      codons<-codons[-(1:length(cur_fragment@start_mutation))]
    }
    if(length(cur_fragment@stop_mutation) != 0) {
      codon_seq[cur_fragment@stop_mutation]<-codons[1:length(cur_fragment@stop_mutation)]
      codons<-codons[-(1:length(cur_fragment@stop_mutation))]
      }
    if(n==1) { #the first primer does not need any existing overlap
      vector_f=vector[1]
      vector_r<-""
      overhang_f<-""
      overhang_r<-paste(str_to_upper(comp(rev(s2c(paste(str_sub(codon_seq[cur_fragment@stop-1], start=3), codon_seq[cur_fragment@stop], sep=""))))), collapse="")
      suffix_f<-suffix
      suffix_r<-paste(comp(s2c(suffix), forceToLower = F), sep="", collapse = "")
      stop_r<-cur_fragment@stop-2
      if(n==length(fragments)){#if there is just one single fragment - e.g. one mutation at the start or end
        overhang_r<-""
        stop_r<-cur_fragment@stop
        vector_r<-vector[2]
      }
    }
    else if(n==length(fragments)) {
      vector_f<-""
      vector_r<-vector[2]
      overhang_f<-paste(str_to_upper(s2c(paste(str_sub(codon_seq[fragments[[n-1]]@stop-1], start=3), codon_seq[fragments[[n-1]]@stop], sep=""))), collapse="")
      overhang_r<-""
      suffix_f<-suffix
      suffix_r<-paste(comp(s2c(suffix), forceToLower = F), sep="", collapse = "")
      stop_r<-cur_fragment@stop
    }
    else {
      vector_f=""
      vector_r=""
      overhang_f<-paste(str_to_upper(s2c(paste(str_sub(codon_seq[fragments[[n-1]]@stop-1], start=3), codon_seq[fragments[[n-1]]@stop], sep=""))), collapse="")
      overhang_r<-paste(str_to_upper(comp(rev(s2c(paste(str_sub(codon_seq[fragments[[n]]@stop-1], start=3), codon_seq[fragments[[n]]@stop], sep=""))))), collapse="")
      suffix_f=suffix
      suffix_r<-paste(comp(s2c(suffix), forceToLower = F), sep="", collapse = "")
      stop_r<-cur_fragment@stop-2
    }
    #forward
    if(length(cur_fragment@start_mutation)==0) {
      temp_primer<-pc_spm(prefix=prefix ,restriction_enzyme=restriction_enzyme, suffix=suffix_f, vector=vector_f, overhang=overhang_f)
      temp_primer@binding_sequence<-paste(paste(codon_seq[cur_fragment@start:(cur_fragment@start+binding_max_length-1)], collapse=""), sep="")
      temp_primer<-sequence_length_temperature(temp_primer, primer_min=binding_min_length, target_temp=target_temp)
    }
    else {
      temp_primer<-pc_spm(prefix=prefix ,restriction_enzyme=restriction_enzyme, suffix=suffix_f, vector=vector_f, overhang=overhang_f)
      temp_primer@extra<-paste(paste(codon_seq[cur_fragment@start:max(cur_fragment@start_mutation)], collapse = ""), sep="")
      temp_primer@binding_sequence<-paste(paste(codon_seq[(max(cur_fragment@start_mutation)+1):((max(cur_fragment@start_mutation)+1)+binding_max_length-1)], collapse=""), sep="")
      temp_primer<-sequence_length_temperature(temp_primer, primer_min=binding_min_length, target_temp=target_temp)
    }
    primers[[n]][[1]]<-temp_primer
    rm(temp_primer)
    #reverse
    if(length(cur_fragment@stop_mutation)<=1){
      temp_primer<-pc_spm(prefix=prefix ,restriction_enzyme=restriction_enzyme, suffix=suffix_r, overhang=overhang_r, vector=vector_r)
      temp_primer@binding_sequence<-paste(paste(codon_seq[(cur_fragment@stop-2-binding_max_length-1):stop_r], collapse=""), sep="")
      if(n!=length(fragments))
        temp_primer@binding_sequence<-paste(temp_primer@binding_sequence, str_sub(codon_seq[cur_fragment@stop-1], end=2) ,sep="")
      #check if the mutation is also the end
      if(length(cur_fragment@stop_mutation)==1){
          if(cur_fragment@stop_mutation[1] != cur_fragment@stop) {
            if(cur_fragment@stop_mutation[1]<=stop_r) # if it is bigger than stop_r, it is partly in the overlap...
              temp_primer@extra<-paste(paste(codon_seq[cur_fragment@stop_mutation[1]:stop_r], collapse = ""), sep="")
            if(n!=length(fragments)) #...and handeled here, for the last fragment stop_r is stop
              temp_primer@extra<-paste(temp_primer@extra, str_sub(codon_seq[cur_fragment@stop-1], end=2) ,sep="")
            temp_primer@extra<-paste(comp(rev(s2c(temp_primer@extra)), ambiguous = T,forceToLower = F), collapse = "")
            temp_primer@binding_sequence<-paste(paste(codon_seq[(cur_fragment@stop_mutation[1]-1-binding_max_length-1):(cur_fragment@stop_mutation[1]-1)], collapse=""), sep="")
          }
      }
      temp_primer@binding_sequence<-paste(str_to_upper(comp(rev(s2c(temp_primer@binding_sequence)))), collapse="")
      temp_primer<-sequence_length_temperature(temp_primer, primer_min=binding_min_length, target_temp=primers[[n]][[1]]@temperature)
    }
    else{
      temp_primer<-pc_spm(prefix=prefix ,restriction_enzyme=restriction_enzyme, suffix=suffix_r, overhang=overhang_r, vector=vector_r)
      temp_primer@extra<-paste(paste(codon_seq[(min(cur_fragment@stop_mutation)):stop_r], collapse=""), sep="")
      temp_primer@extra<-paste(temp_primer@extra, str_sub(codon_seq[cur_fragment@stop-1], end=2), sep="")
      temp_primer@extra<-paste(comp(rev(s2c(temp_primer@extra)), ambiguous = T,forceToLower = F), collapse = "")
      temp_primer@binding_sequence<-paste(paste(codon_seq[(min(cur_fragment@stop_mutation)-1-binding_max_length-1):(min(cur_fragment@stop_mutation)-1)], collapse=""), sep="")
      temp_primer@binding_sequence<-paste(str_to_upper(comp(rev(s2c(temp_primer@binding_sequence)))), collapse="")
      temp_primer<-sequence_length_temperature(temp_primer, primer_min=binding_min_length, target_temp=primers[[n]][[1]]@temperature)
    }
    primers[[n]][[2]]<-temp_primer
    rm(temp_primer)
  }
  #Check the primers with the checkprimer function:
  #Check for primers with same overlap
  #Replace them based on ? -> Primer without mutation/length of the primer in total?
  #It is easier to modify the exisiting primer
  #If it is not possible to correct all overlaps -> return message with position for silent mutation
  primers<-check_primer_overhangs(primers, fragments, binding_min_length, target_temp)
  return(eps(oldsequence=input_sequence, primers=primers, newsequence=paste(codon_seq, collapse = ""), fragments=fragments))
}


#' Calculate Primers for Multiple Site Saturation Mutagenesis
#'
#' The mutate_msd function designs the necessary set of primers for the desired mutations.
#' Note that you can also select TGG in saturation mutagenesis to apply the 22c trick.
#'
#' @param input_sequence The sequence which should be modified. This is an object of type character containing the sequence.
#' @param codon The desired type of MSD mutation [default: NDT]
#' @param prefix Additional nucleobases in 5' position of the recognition site [default: TT]
#' @param restriction_enzyme Recognition site sequence of the respective restriction enzyme [default: GGTCTC]
#' @param suffix Spacer nucleotides matching the cleavage pattern of the enzyme [default: A]
#' @param vector Four basepair overhangs complementary to the created overhangs in the acceptor vector  [default: c("AATG", "AAGC")]
#' @param replacements The desired substitutions. Can be a numeric vector or a list of character vectors with position and codon. See \href{articles/MSD3.html}{\code{vignette("MSD3", package = "goldenmutagenesis")}} for examples.
#' @param replacement_range  Maximum distance in amino acid residues between two randomization sites to be incoporated into a single primer (reverse, end of the fragment) - has a cascading effect for following mutations [default: 3]
#' @param binding_min_length The minimal threshold value of the length of the template binding sequence in amino acid residues [default: 4]
#' @param binding_max_length Maximal length of the binding sequence in amino acid residues [default: 9]
#' @param target_temp Melting temperature of the binding sequence in \code{print('\u00B0')}C [default: 60]
#' @param fragment_min_size Minimal size of a generated gene fragment in base pairs [default 100]
#'
#' @return An object of class Primerset with the designed Primers.
#' @export
#'
#' @examples
#' #Load the setup of the MSD vignette and design the primers
#' data(MSD_BsaI_setup_lv2)
#' print(mutations)
#' print(recognition_site_bsai)
#' primers<-mutate_msd(input_sequence, prefix="TT" ,
#' restriction_enzyme=recognition_site_bsai, suffix="A",
#' vector=c("AATG", "AAGC"), replacements=mutations, replacement_range=5,
#' binding_min_length=4 , binding_max_length=9, target_temp=60,
#' fragment_min_size=60 )
mutate_msd<-function(input_sequence, codon="NDT" ,prefix="TT" ,restriction_enzyme="GGTCTC", suffix="A", vector=c("AATG", "AAGC"), replacements, replacement_range=3, binding_min_length=4 ,binding_max_length=9, target_temp=60, fragment_min_size=100 ) {
  codon<-str_to_upper(codon)
  prefix<-str_to_upper(prefix)
  suffix<-str_to_upper(suffix)
  vector<-str_to_upper(vector)
  restriction_enzyme<-str_to_upper(restriction_enzyme)
  input_sequence<-str_to_upper(input_sequence)
  possible_codons<-c("NNN", "NNK", "NNS", "NDT", "DBK", "NRT", "VHG", "VRK", "NYC", "KST", "TGG")
  if(!(codon %in% possible_codons)) {
    stop(paste(codon, "is not a valid codon. Please select one of the following:", paste(possible_codons, collapse = " ") ,sep=" "))
  }
  if(class(replacements)=="list"){
    replacements<-order_replacements(replacements)
    codons<-sapply(replacements, function(x){str_to_upper(as.character(x[2]))})
    if(all(is.element(codons, possible_codons))==F){
      stop(paste(codons, "contains invalid codons. Please select one of the following:", paste(possible_codons, collapse = " ") ,sep=" "))
    }
    replacements<-sapply(replacements, function(x){(as.numeric(x[1]))})
  }
  else{
    replacements<-sort(replacements)
    codons<-rep(codon, length(replacements))
  }
  sequence<-s2c(input_sequence)
  codon_seq<-sequence_check(input_sequence)
  restriction_enzyme_s2c<-s2c(restriction_enzyme)
  restriction_enzyme_s2c_reverse<-comp(restriction_enzyme_s2c)
  restriction_enzyme_s2c_reverse<-rev(restriction_enzyme_s2c_reverse)
  restriction_enzyme_reverse<-str_to_upper(paste(restriction_enzyme_s2c_reverse, collapse = ""))
  min_fragment<-3*binding_max_length
  prot_sequence<-seqinr::translate(sequence)
  primers<-vector("list")
  if(str_sub(vector[1], 2) == "ATG"){
    fragment_start<-2
  } else{
    fragment_start<-1
  }
  fragments<-make_fragments(mutations=replacements,seq=codon_seq,start=fragment_start, distance=replacement_range, fsize=fragment_min_size, buffer=2)

  primers<-vector("list", length(fragments))
  for(n in 1:length(fragments)){
    cur_fragment<-fragments[[n]]
    primers[[n]]<-vector("list", 2)
    if(n==1) { #the first primer does not need any existing overlap
      vector_f=vector[1]
      vector_r<-""
      overhang_f<-""
      overhang_r<-paste(str_to_upper(comp(rev(s2c(paste(str_sub(codon_seq[cur_fragment@stop-1], start=3), codon_seq[cur_fragment@stop], sep=""))))), collapse="")
      suffix_f<-suffix
      suffix_r<-paste(comp(s2c(suffix), forceToLower = F), sep="", collapse = "")
      stop_r<-cur_fragment@stop-2
      if(n==length(fragments)){#if there is just one single fragment - e.g. one mutation at the start or end
        overhang_r<-""
        stop_r<-cur_fragment@stop
        vector_r<-vector[2]
      }
    }
    else if(n==length(fragments)) {
      vector_f<-""
      vector_r<-vector[2]
      overhang_f<-paste(str_to_upper(s2c(paste(str_sub(codon_seq[fragments[[n-1]]@stop-1], start=3), codon_seq[fragments[[n-1]]@stop], sep=""))), collapse="")
      overhang_r<-""
      suffix_f<-suffix
      suffix_r<-paste(comp(s2c(suffix), forceToLower = F), sep="", collapse = "")
      stop_r<-cur_fragment@stop
    }
    else {
      vector_f=""
      vector_r=""
      overhang_f<-paste(str_to_upper(s2c(paste(str_sub(codon_seq[fragments[[n-1]]@stop-1], start=3), codon_seq[fragments[[n-1]]@stop], sep=""))), collapse="")
      overhang_r<-paste(str_to_upper(comp(rev(s2c(paste(str_sub(codon_seq[fragments[[n]]@stop-1], start=3), codon_seq[fragments[[n]]@stop], sep=""))))), collapse="")
      suffix_f<-suffix
      suffix_r<-paste(comp(s2c(suffix), forceToLower = F), sep="", collapse = "")
      stop_r<-cur_fragment@stop-2
    }
    #forward
    if(length(cur_fragment@start_mutation)==0) {
      temp_primer<-pc_msd(prefix=prefix ,restriction_enzyme=restriction_enzyme, suffix=suffix_f, vector=vector_f, overhang=overhang_f)
      temp_primer@binding_sequence<-paste(paste(codon_seq[cur_fragment@start:(cur_fragment@start+(binding_max_length-1))], collapse=""), sep="")
      temp_primer<-sequence_length_temperature(temp_primer, primer_min=binding_min_length, target_temp=target_temp)
    }
    else {
      temp_primer<-pc_msd(prefix=prefix ,restriction_enzyme=restriction_enzyme, suffix=suffix_f, vector=vector_f, overhang=overhang_f)
      codon_seq[cur_fragment@start_mutation]<-codons[1:length(cur_fragment@start_mutation)]
      codons<-codons[-(1:length(cur_fragment@start_mutation))]
      temp_primer@extra<-paste(paste(codon_seq[cur_fragment@start:max(cur_fragment@start_mutation)], collapse = ""), sep="")
      temp_primer@binding_sequence<-paste(paste(codon_seq[(max(cur_fragment@start_mutation)+1):((max(cur_fragment@start_mutation)+1)+binding_max_length-1)], collapse=""), sep="")
      temp_primer<-sequence_length_temperature(temp_primer, primer_min=binding_min_length, target_temp=target_temp)
    }
    primers[[n]][[1]]<-temp_primer
    rm(temp_primer)
    #reverse
    if(length(cur_fragment@stop_mutation)==0){
      temp_primer<-pc_msd(prefix=prefix ,restriction_enzyme=restriction_enzyme, suffix=suffix_r, overhang=overhang_r, vector=vector_r)
      temp_primer@binding_sequence<-paste(paste(codon_seq[(cur_fragment@stop-2-binding_max_length-1):stop_r], collapse=""), sep="")
      if(n!=length(fragments))
        temp_primer@binding_sequence<-paste(temp_primer@binding_sequence, str_sub(codon_seq[cur_fragment@stop-1], end=2) ,sep="")
      temp_primer@binding_sequence<-paste(str_to_upper(comp(rev(s2c(temp_primer@binding_sequence)))), collapse="")
      temp_primer<-sequence_length_temperature(temp_primer, primer_min=binding_min_length, target_temp=primers[[n]][[1]]@temperature)
    }
    else{
      temp_primer<-pc_msd(prefix=prefix ,restriction_enzyme=restriction_enzyme, suffix=suffix_r, overhang=overhang_r, vector=vector_r)
      codon_seq[cur_fragment@stop_mutation]<-codons[1:length(cur_fragment@stop_mutation)]
      codons<-codons[-(1:length(cur_fragment@stop_mutation))]
      temp_primer@extra<-paste(paste(codon_seq[(min(cur_fragment@stop_mutation)):stop_r], collapse=""), sep="")
      temp_primer@extra<-paste(temp_primer@extra, str_sub(codon_seq[cur_fragment@stop-1], end=2), sep="")
      temp_primer@extra<-paste(comp(rev(s2c(temp_primer@extra)), ambiguous = T,forceToLower = F), collapse = "")
      temp_primer@binding_sequence<-paste(paste(codon_seq[(min(cur_fragment@stop_mutation)-1-binding_max_length-1):(min(cur_fragment@stop_mutation)-1)], collapse=""), sep="")
      temp_primer@binding_sequence<-paste(str_to_upper(comp(rev(s2c(temp_primer@binding_sequence)))), collapse="")
      temp_primer<-sequence_length_temperature(temp_primer, primer_min=binding_min_length, target_temp=primers[[n]][[1]]@temperature)
    }
    primers[[n]][[2]]<-temp_primer
    rm(temp_primer)
  }
  #Check the primers with the checkprimer function:
  #Check for primers with same overlap
  #Replace them based on ? -> Primer without mutation/length of the primer in total?
  #It is easier to modify the exisiting primer
  #If it is not possible to correct all overlaps -> return message with position for silent mutation
  primers<-check_primer_overhangs(primers, fragments, binding_min_length, target_temp)
  return(eps(oldsequence=input_sequence, primers=primers, newsequence=paste(codon_seq, collapse = ""), fragments=fragments))
}

#' Add a level to exisiting Primerset
#'
#' This function replaces the prefix, the suffix and the restriction enzyme of a given Primerset to change the design to another level.
#' You can use this function to convert an exisiting Level 2 Primerset to a Level 0 Primerset for example.
#' Also the overhangs of the first and the last primer will be modified to match the plasmid of the new level.
#'
#' @param primerset An exisiting Primerset (in Level 2)
#' @param prefix Additional nucleobases in 5' position of the new recognition site [default: TT]
#' @param restriction_enzyme Recognition site sequence of the new restriction enzyme (Level 0) [default: GAAGAC]
#' @param suffix Spacer nucleotides matching the cleavage pattern of the enzyme (Level 0) [default: AA]
#' @param vector Four basepair overhangs complementary to the created overhangs in the acceptor vector [default: c("CTCA", "CTCG")]
#'
#' @return A Primerset in the new Level (Level 0)
#' @export
#'
#' @examples
#' #Load level 2 results of the MSD vignette
#' data(MSD_BsaI_result_lv2)
#' primer_add_level(primers,  prefix="TT",
#' restriction_enzyme="GAAGAC", suffix="AA", vector=c("CTCA", "CTCG"))
#'
primer_add_level<-function(primerset, prefix="TT" ,restriction_enzyme="GAAGAC", suffix="AA", vector=c("CTCA", "CTCG")){
  prefix<-str_to_upper(prefix)
  restriction_enzyme<-str_to_upper(restriction_enzyme)
  suffix<-str_to_upper(suffix)
  vector<-str_to_upper(vector)
  for(i in 1:length(primerset@primers)) {
    if(primerset@primers[[i]][[1]]@overhang=="" && primerset@primers[[i]][[1]]@vector!=""){
      primerset@primers[[i]][[1]]@overhang<-primerset@primers[[i]][[1]]@vector
    }
    primerset@primers[[i]][[1]]@vector<-vector[1]
    primerset@primers[[i]][[1]]@prefix<-prefix
    primerset@primers[[i]][[1]]@restriction_enzyme<-restriction_enzyme
    primerset@primers[[i]][[1]]@suffix<-suffix
    ############################

    if(primerset@primers[[i]][[2]]@overhang=="" && primerset@primers[[i]][[2]]@vector!=""){
      primerset@primers[[i]][[2]]@overhang<-primerset@primers[[i]][[2]]@vector
    }
    primerset@primers[[i]][[2]]@vector<-vector[2]
    primerset@primers[[i]][[2]]@prefix<-prefix
    primerset@primers[[i]][[2]]@restriction_enzyme<-restriction_enzyme
    primerset@primers[[i]][[2]]@suffix<-suffix
  }
  return(primerset)
}

#' Prepare a Single Point Mutation Primerset to be used in Level 2
#'
#' This function adds definied vector overhangs to Level 0 Primersets to express them in a Level 2 vector.
#' #'
#' @param primerset An exisiting Primerset (in Level 0)
#' @param vector Four basepair overhangs complementary to the created overhangs in the acceptor vector [default: c("AATG", "AAGC")]
#'
#' @return A Primerset prepared for expression in Level 2
#' @export
#'
#' @examples
#' #Load level 0 results of the SPM vignette
#' data(Point_Mutagenesis_BbsI_result)
#' primer_prepare_level(primers)
#'
primer_prepare_level<-function(primerset, vector=c("AATG", "AAGC")){
  vector<-str_to_upper(vector)
  if(str_sub(vector[1], 2, 4)!="ATG") {
    warning("Working with unknown sequence for Level 2 vector")
    primerset@primers[[1]][[1]]@extra<-paste(primerset@primers[[1]][[1]]@extra, vector[1], sep="")
  }
  else if(str_sub(paste(primerset@primers[[1]][[1]]@extra,primerset@primers[[1]][[1]]@binding_sequence, sep=""), 1, 3) == "ATG") {
    primerset@primers[[1]][[1]]@extra<-paste(str_sub(vector[1], 1, 1),primerset@primers[[1]][[1]]@extra, sep="")
  } else {
    stop("The extra+binding_sequence of the primer did not start with ATG. Something went wrong. Please send a bug report to us.")
  }
  primerset@primers[[length(primerset@primers)]][[2]]@extra<-paste(vector[2], primerset@primers[[length(primerset@primers)]][[2]]@extra,  sep="")
  return(primerset)
}

#' Create a graphical evaluation of sequencing results
#'
#' This function creates a graphical evalution of the sequencing results to determine the quality of the created library.
#'
#'The functions aligns the obtained sequencing results to the target gene sequence.
#'It also tries to align the reverse complement of the obtained sequence.
#'Afterwards it checks for mismatches between the sequences.
#'Mismatches are likely to be sucessfully mutated nucleotides.
#'Positions regarded as mismatches are displayed as pie charts.
#'The shown distributions are based on the signal intensities of the four nucleobases at the mismatch positions.
#'You can compare the pie charts with expected pattern of randomization, therefore validating the quality of the created library.
#' @importFrom dplyr slice
#' @importFrom graphics pie
#' @importFrom sangerseqR readsangerseq peakPosMatrix
#' @importFrom Biostrings pairwiseAlignment mismatchTable reverseComplement
#' @import RColorBrewer
#' @param input_sequence The sequence which was modified. This is an object of type character containing the sequence.
#' @param ab1file The path to the ab1file which was provided by the sequencer/sequencing service
#' @param replacements The mutations which were desired.
#' @param trace_cutoff The minimal sum of signals (4 nucleotides) for a position in the sequence. [default: 80]
#'
#' @return Plots on the active/default graphics device.
#' @export
#'
#' @examples
#' \dontrun{
#' data(MSD_BsaI_setup_lv2)
#' abfile<-"activesite_for_200718.ab1"
#' base_distribution(input_sequence=input_sequence, ab1file=abfile, replacements=mutations)
#' }
base_distribution<-function(input_sequence, ab1file, replacements, trace_cutoff=80){
  sanger_seq<-sangerseqR::readsangerseq(ab1file) #reading in the data
  global_Align<-Biostrings::pairwiseAlignment(input_sequence, sanger_seq@primarySeq)
  global_Align_rev<-Biostrings::pairwiseAlignment(input_sequence, Biostrings::reverseComplement(sanger_seq@primarySeq))
  reverse=F
  if(global_Align_rev@score > global_Align@score) {
    reverse=T
    global_Align<-global_Align_rev
    print("Reverse sequence detected!")
  }
  mismatches<-Biostrings::mismatchTable(global_Align)
  replacements_basepairs<-as.vector(sapply(replacements, FUN<-function(x){return(c(x*3-2, x*3-1, x*3))}))
  candidates<-unlist(sapply(replacements_basepairs, FUN = function(x){which(mismatches[,"PatternStart"]==x)}, simplify = array))
  mismatches_candidates<-mismatches[candidates, ]
  mismatches_candidates$pos<-mismatches_candidates[,"PatternStart"]%%3
  mismatches_candidates[mismatches_candidates["pos"]==0, "pos"]<-3
  subject_pos<-vector()
  pattern_pos<-vector()
  for (i in 1:nrow(mismatches_candidates)) {
    subject_start<-mismatches_candidates[i, "SubjectStart"]
    pos<-mismatches_candidates[i, "pos"]
    pattern_start<-mismatches_candidates[i, "PatternStart"]
    if(pos==1) {
      subject_pos<-c(subject_pos, subject_start, subject_start+1, subject_start+2)
      pattern_pos<-c(pattern_pos, pattern_start, pattern_start+1, pattern_start+2)

    }
    if(pos==2) {
      subject_pos<-c(subject_pos, subject_start-1, subject_start, subject_start+1)
      pattern_pos<-c(pattern_pos, pattern_start-1, pattern_start, pattern_start+1)

    }
    if(pos==3) {
      subject_pos<-c(subject_pos, subject_start-2, subject_start-1, subject_start)
      pattern_pos<-c(pattern_pos, pattern_start-2, pattern_start-1, pattern_start)

    }
  }
  subject_pos<-unique(subject_pos)
  pattern_pos<-unique(pattern_pos)

  if(reverse==T) {
    subject_pos<-length(sanger_seq@primarySeq)-subject_pos+1
  }
  tracematrix_subject<-sangerseqR::traceMatrix(sanger_seq)[sangerseqR::peakPosMatrix(sanger_seq)[subject_pos],]
  sums_row<-which(rowSums(tracematrix_subject)>=trace_cutoff)
  tracematrix_subject<-as.data.frame(tracematrix_subject[sums_row,])
  for(element in sums_row) {
    # plotting as pie chart
    sliceit <- dplyr::slice (tracematrix_subject,element)
    slices <- as.numeric(sliceit)
    lbls <- c("Adenine", "Cytosine", "Guanine", "Thymine")
    if(reverse==T) {
      lbls <- c("Thymine", "Guanine", "Cytosine", "Adenine")
    }
    pct <- round(slices/sum(slices)*100)
    lbls <- paste(lbls, pct) # add percents to labels
    lbls <- paste(lbls,"%",sep="") # ad % to labels
    pie(slices,labels = lbls, col=brewer.pal(4,"Spectral"),main = paste("Peak intensity distribution for \nPosition", pattern_pos[element], "(Template) -", subject_pos[element], "(Sequencing)", sep=" "))
    }
}

order_replacements<-function(replacements){return(replacements[order(sapply(replacements, function(x){as.numeric(x[1])}))])}

remove_newline<-function(x){
  gsub("\r?\n|\r", "", x)
}



#' Select a Codon Usage Table
#'
#' Get a list or an array of values for the selected Codon Usage Table
#'
#' @param name The filname of the codon usage table. You can use list_cu_table() to get the overview.
#' @param list A boolean parameter that decides if an array or a list is returned. The array output is requiered by the domesticate function
#'
#' @return An array or a list with values for the codons/amino acids.
#' @export
#'
#' @examples
#' list_cu_table()
#' \dontrun{
#' get_cu_table("e_coli_316407.csv")
#' }
#'
get_cu_table<-function(name, list=T) {
  stopifnot(is.character(name))
  #file<-system.file("cuf", name, package="GoldenMutagenesis")
  cuf<-read.csv(name)
  cuf[, "codon"]<-str_replace_all(cuf[,"codon"], "U", "T")
  cuf_vector<-as.vector(cuf$relative_frequency)
  names(cuf_vector)<-gsub("U","T",cuf$codon)
  cuf_list<-lapply(unique(cuf$amino_acid), function(x){a<-cuf[which(cuf$amino_acid==x),"relative_frequency"];a<-a*1000;names(a)<-str_to_lower(cuf[which(cuf$amino_acid==x),"codon"]);return(as.table(a))})
  names(cuf_list)<-unique(cuf$amino_acid)
  if(list==T) {
    return(cuf_list)
  }
  else{
    return(cuf_vector)
  }
}

make_fragments<-function(mutations, fsize, buffer=0, seq, start, distance=2){
  start=start
  end=NULL
  fsize<-ceiling(fsize/3)
  cm<-1
  fragments<-c()
  distance<-distance+1
  if(buffer > distance){
    stop("You can not try to use a buffer bigger than the allowed distance to integrate two mutations on the same primer!")
  }
  #Make fragments
  repeat{
    this_fragment<-fragment(start=start)
    endm<-0
    if(cm > length(mutations)){#This generates a last fragment without any mutations
      this_fragment@stop<-length(seq)
      fragments<-c(fragments, this_fragment)
      break
    }
    i<-cm
    repeat{
      if((mutations[i] + buffer - start) >= fsize){#Iterate until we have a mutation on the end of the fragment
          if(length(seq) - mutations[i] < fsize){# the last fragment would be too small
             #next mutation should have at least 1 or buffer +1 difference
              if(((length(seq) - fsize) - start >= fsize) & !is.null(fragments)) {
                this_fragment@stop<-length(seq)-fsize
                fragments<-c(fragments, this_fragment)
                start=length(seq)-(fsize-1)
                this_fragment<-fragment(start=start)
                end=length(seq)
                endm=length(mutations)
              } else {
                end<-length(seq)
                endm=length(mutations)
              }
              break
            }
          if(i<length(mutations)){
            if((mutations[i+1] - mutations[i])<distance){#Integrate next mutation
              i<-i+1
              next
            }
          }
        endm<-i
        end<-mutations[i] + buffer
        if(i == 1) { #if there was more than one mutation, we are already safe
          if(length(seq)-end < fsize) {
            this_fragment@stop<-length(seq)-fsize-1-buffer
            fragments<-c(fragments, this_fragment)
            this_fragment@start<-length(seq)-fsize-buffer
            end<-length(seq)
            endm<-1
          }
        }
        #buffer and distance must be at least equal
        break
      }
      else{
        if(i==length(mutations)){#If there is a mutation at the end, but the end of the fragment has to be the end of the sequence
          endm<-i
          end<-length(seq)
          break
        }
      }
      i<-i+1
    }
    #Is the mutation at the end or start of a fragment?
    this_fragment@stop=end
    mid<-this_fragment@start+round((this_fragment@stop - this_fragment@start)/2)
    if(cm<=length(mutations)){
      for(j in cm:endm){
        if(mutations[j] < mid){
          this_fragment@start_mutation<-c(this_fragment@start_mutation, mutations[j])
        }
        else{
          this_fragment@stop_mutation<-c(this_fragment@stop_mutation, mutations[j])
        }
      }
    }
    fragments<-c(fragments, this_fragment)
    if(end==length(seq)){
      break
    }
    else{
      cm<-endm+1
    }
    start<-this_fragment@stop+1
  }
  #Optimize Fragments - Shift start and stop
  if(length(fragments)>1){
    for (k in 1:(length(fragments)-1)) {
      if(length(fragments[[k]]@stop_mutation)>0){
        stop_pos<-fragments[[k]]@stop_mutation[1]
        if(length(fragments[[k+1]]@start_mutation)>0){
          start_pos<-fragments[[k+1]]@start_mutation[length(fragments[[k+1]]@start_mutation)]
          mid_stop<-stop_pos+(round((start_pos-stop_pos)/2))-1
          newstop<-max(fragments[[k]]@stop, min(mid_stop, (fragments[[k+1]]@start_mutation[1]-1)))
          fragments[[k]]@stop<-newstop
          fragments[[k+1]]@start<-newstop+1
        }
        else{
          next
        }
      }
      else{
        next
        #This should never be the case
      }
    }
  }
  return(fragments)
}

calculate_tm<-function(x, salt_concentration=50, primer_concentration=50, offset=0){
  oligo_sequence<-s2c(x)
  oligo_sequence<-oligo_sequence[offset:length(oligo_sequence)]
  #  Tm= 100.5 + (41 * (yG+zC)/(wA+xT+yG+zC)) - (820/(wA+xT+yG+zC)) + 16.6*log10([Na+])
  counts<-count(s2c(x), wordsize=1, by=1, alphabet = c("A", "C", "G", "T"))
  tm<-100.5 + (41 * as.numeric(counts["G"] + counts["C"])/as.numeric(counts["A"]+counts["T"]+counts["G"]+counts["C"])) - (820/as.numeric(counts["A"]+counts["T"]+counts["G"]+counts["C"])) + 16.6*log10(salt_concentration/1000)
  return(as.numeric(tm))
}

calculate_UPAC<-function(x, func=calculate_tm, selection="max", temp=0, cuf="e_coli_316407.csv") {
  cuf_vector<-get_cu_table(cuf, list=F)
  x_matrix<-expand.grid(sapply(s2c(x), function(x){amb(x, forceToLower = TRUE, checkBase = TRUE, IUPAC = s2c("acgturymkswbdhvn"), u2t = TRUE)}))
  results<-apply(x_matrix, 1, function(x){func(str_to_upper(paste(x, collapse="")))})
  if(selection=="max") {
    candidates<-which(results==max(results))
  }
  if(selection=="min"){
    candidates<-which(results==min(results))
  }
  if(selection=="diff"){
    candidates<-which(abs(results-temp)==min(abs(results-temp)))
  }
  if(length(candidates)>1){
    #select sequence with highest probability
    sum_of_prob<-apply(x_matrix[candidates,], 1, function(x){c_x<-count(s2c(str_to_upper(paste(x, collapse=""))), wordsize = 3, by=1, alphabet = c("A", "C", "G", "T")); return(sum(c_x*cuf_vector[names(c_x)]))})
    candidate<-candidates[which(sum_of_prob==max(sum_of_prob))]
    return(str_to_upper(paste(apply(x_matrix[candidate,], 1, as.character), collapse="")))
  }
  else {
    return(str_to_upper(paste(apply(x_matrix[candidates[1],], 1, as.character), collapse="")))
  }
}

calculate_DeltaG<-function(x){
  g <- -5.0
  oligo_sequence<-s2c(x)
  counts<-count(s2c(x), wordsize=2, by=1, alphabet = c("A", "C", "G", "T"))
  g <-g + (as.numeric(counts["AA"])+as.numeric(counts["TT"]))*1.2
  g <-g + as.numeric(counts["AT"])*0.9
  g <-g + as.numeric(counts["TA"])*0.9
  g <-g + (as.numeric(counts["CA"])+as.numeric(counts["TG"]))*1.7
  g <-g + (as.numeric(counts["GT"])+as.numeric(counts["AC"]))*1.5
  g <-g + (as.numeric(counts["CT"])+as.numeric(counts["AG"]))*1.5
  g <-g + (as.numeric(counts["GA"])+as.numeric(counts["TC"]))*1.5
  g <-g + as.numeric(counts["CG"])*2.8
  g <-g + as.numeric(counts["GC"])*2.3
  g <-g + (as.numeric(counts["GG"])+as.numeric(counts["CC"]))*2.1
  return(g)

}

calculate_DeltaH<-function(x){
  h <- 0.0
  oligo_sequence<-s2c(x)
  counts<-count(s2c(x), wordsize=2, by=1, alphabet = c("A", "C", "G", "T"))
  h <-h + (as.numeric(counts["AA"])+as.numeric(counts["TT"]))*8
  h <-h + as.numeric(counts["AT"])*5.6
  h <-h + as.numeric(counts["TA"])*6.6
  h <-h + (as.numeric(counts["CA"])+as.numeric(counts["TG"]))*8.2
  h <-h + (as.numeric(counts["GT"])+as.numeric(counts["AC"]))*9.4
  h <-h + (as.numeric(counts["CT"])+as.numeric(counts["AG"]))*6.6
  h <-h + (as.numeric(counts["GA"])+as.numeric(counts["TC"]))*8.8
  h <-h + as.numeric(counts["CG"])*11.8
  h <-h + as.numeric(counts["GC"])*10.5
  h <-h + (as.numeric(counts["GG"])+as.numeric(counts["CC"]))*10.9
  return(h)
}

calculate_DeltaS<-function(x){
  s<-0.0
  oligo_sequence<-s2c(x)
  counts<-count(s2c(x), wordsize=2, by=1, alphabet = c("A", "C", "G", "T"))
  s <-s + (as.numeric(counts["AA"])+as.numeric(counts["TT"]))*21.9
  s <-s + as.numeric(counts["AT"])*15.2
  s <-s + as.numeric(counts["TA"])*18.4
  s <-s + (as.numeric(counts["CA"])+as.numeric(counts["TG"]))*21.0
  s <-s + (as.numeric(counts["GT"])+as.numeric(counts["AC"]))*25.5
  s <-s + (as.numeric(counts["CT"])+as.numeric(counts["AG"]))*16.4
  s <-s + (as.numeric(counts["GA"])+as.numeric(counts["TC"]))*23.5
  s <-s + as.numeric(counts["CG"])*29.0
  s <-s + as.numeric(counts["GC"])*26.4
  s <-s + (as.numeric(counts["GG"])+as.numeric(counts["CC"]))*28.4
  return(s)
}

#' Calculate melting temperature based on next neighbor calculation
#'
#' The implementation is based on the explanations of \url{http://biotools.nubic.northwestern.edu/OligoCalc.html}.
#'
#' More details at \url{https://doi.org/10.1093/nar/gkm234}
#'
#' @param oligo_sequence A string containing an oligo sequence.
#' @param primer_concentration The concentration of the primer in nanomole [default: 50]
#' @param salt_concentration The concentration of Na+ in nanomole [default: 50]
#' @param offset You can skip a prefix of your oligo sequence with this parameter. The first n bases are not considered in the calculation. [default: 0]
#' @return An array or a list with values for the codons/amino acids.
#' @return The melting temperature in \code{print('\u00B0')}C
#'
#' @examples
#' \dontrun{
#' GoldenMutagenesis::calculate_tm_nnb("AAAAAATGGTGTGTGATGTGTCCCTCTATC")
#' }
#'
calculate_tm_nnb<-function(oligo_sequence, primer_concentration=50, salt_concentration=50, offset=0){
  oligo_sequence_s2c<-s2c(oligo_sequence)
  oligo_sequence<-paste(oligo_sequence_s2c[offset:length(oligo_sequence_s2c)], collapse="")
  K<-1/(primer_concentration*1e-9) #Convert from nanomoles to moles
  R<-1.987
  RlnK<-R*log(K)
  result<-(1000*(calculate_DeltaH(oligo_sequence)-3.4)/(calculate_DeltaS(oligo_sequence)+RlnK)-272.9)
  result<-result+7.21*log(salt_concentration/1000)
  return(result)
}


setGeneric("sequence_length_temperature" , function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60,  gc_filter=F) {
  standardGeneric("sequence_length_temperature")
})

setMethod("sequence_length_temperature", signature(primer="Primer"),
          function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60, gc_filter=F){
            primer_seq_s2c<-s2c(primer@binding_sequence)
            temperatures<-list()
            names_i<-vector()
            sequences_i<-vector()
            for(i in (primer_min*3):length(primer_seq_s2c)){
              temperatures<-c(temperatures, temp_func(paste(primer_seq_s2c[1:i],collapse=""), offset=0))
              names_i<-c(names_i, i)
              sequences_i<-c(sequences_i, paste(primer_seq_s2c[1:i],collapse=""))
            }
            names(temperatures)<-sequences_i
            diff<-unlist(lapply(temperatures, function(x){abs(x-target_temp)}))
            #check for at least two A or T
            candidates_with_AT<-which(str_count(str_sub(names(diff), start=-5), "A|T")>=2 & str_count(str_sub(names(diff), start=-5), "A|T")<4)
            if(length(candidates_with_AT) == 0 || gc_filter==F) {
              candidate_binding_sequence<-names(diff[diff==min(diff)])
              if(gc_filter==T) {
                warning("The end (last five bases) of the binding sequence is not optimal. The primers are maybe inefficient.")
              }
            }
            else{
              diff_AT<-diff[candidates_with_AT]
              candidate_binding_sequence<-names(diff_AT[diff_AT==min(diff_AT)])
            }
            primer@binding_sequence<-candidate_binding_sequence
            primer@temperature<-as.numeric(temperatures[candidate_binding_sequence])
            primer@difference<-as.numeric(diff[candidate_binding_sequence])
            return(primer)
          }
)

setMethod("sequence_length_temperature", signature(primer="Primer_MSD"),
          function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60, gc_filter=F){
            callNextMethod()
          }
)

setMethod("sequence_length_temperature", signature(primer="Primer_SPM"),
          function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60, gc_filter=F){
            callNextMethod()
          }
          # function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60, gc_filter=F){
          #   primer_seq_s2c<-s2c(paste(primer@extra, primer@binding_sequence, sep=""))
          #   temperatures<-list()
          #   names_i<-vector()
          #   sequences_i<-vector()
          #   for(i in max((primer_min*3), nchar(primer@extra)):length(primer_seq_s2c)){
          #     temperatures<-c(temperatures, temp_func(paste(primer_seq_s2c[1:i],collapse=""), offset=0))
          #     names_i<-c(names_i, i)
          #     sequences_i<-c(sequences_i, paste(primer_seq_s2c[1:i],collapse=""))
          #   }
          #   names(temperatures)<-sequences_i
          #   diff<-unlist(lapply(temperatures, function(x){abs(x-target_temp)}))
          #   #check for at least two A or T
          #   candidates_with_AT<-which(str_count(str_sub(names(diff), start=-5), "A|T")>=2 & str_count(str_sub(names(diff), start=-5), "A|T")<4)
          #   if(length(candidates_with_AT) == 0 || gc_filter==F) {
          #     candidate_binding_sequence<-names(diff[diff==min(diff)])
          #     if(gc_filter==T) {
          #       warning("The end (last five bases) of the binding sequence is not optimal. The primers are maybe inefficient.")
          #     }
          #   }
          #   else{
          #     diff_AT<-diff[candidates_with_AT]
          #     candidate_binding_sequence<-names(diff_AT[diff_AT==min(diff_AT)])
          #   }
          #   primer@binding_sequence<-str_sub(candidate_binding_sequence, max(nchar(primer@extra)+1,0))
          #   primer@temperature<-as.numeric(temperatures[candidate_binding_sequence])
          #   primer@difference<-as.numeric(diff[candidate_binding_sequence])
          #   return(primer)
          # }
)

sequence_check<-function(input_sequence){
  input_sequence<-str_to_upper(input_sequence)
  input_sequence<-str_trim(input_sequence)
  if(nchar(input_sequence)%%3!=0) {
    stop(paste("The length of the sequence is no factor of 3. Please check your sequence.", "The length of the sequence was:", nchar(input_sequence),  sep=" "))
  }

  if(str_detect(input_sequence, "^(A|C|G|T)+$") == F) {
    stop(paste("The sequence contains invalid characters that are not A|C|G|T."))
  }

  codon_seq<-splitseq(s2c(input_sequence))
  met<-which(str_detect(codon_seq, "ATG"))
  if(length(met) == 0) {
    stop("No Methionine in the provided sequence. Stopping here. Please check the provided sequence.")
  }

  if(min(met) != 1){
    warning(paste("No Methionine at first codon found! Please check the provided sequence! Took codon #", min(met), "as start.", sep=" "))
    codon_seq<-codon_seq[min(met):length(codon_seq)]
  } #else(codon_seq<-codon_seq[-1])

  stop<-which(str_detect(codon_seq, "(TAA)|(TGA)|(TAG)"))
  if(length(stop) == 0) {
    stop("No stop codon in the provided sequence. Stopping here. Please check the provided sequence!")
  }

  if(max(stop) != length(codon_seq)) {
    warning(paste("There is no stop codon at the end of the sequence. Please check the provided sequence! Took codon #", max(stop), "as end.", sep= " "))
    codon_seq<-codon_seq[1:max(stop)]
  }# else {
  #codon_seq <- codon_seq[-length(codon_seq)]
  #}
  return(codon_seq)
}

check_primer_overhangs<-function(primers, fragments, binding_min_length=4, target_temp=60, check_repetitive=T) {
  #ToDo: Add paramter for temperature calculation method
  overhangs<-sapply(primers, function(x){return(c(x[[1]]@overhang, x[[2]]@overhang))})
  duplicates<-table(overhangs)
  duplicates<-duplicates[names(duplicates)!="" & duplicates > 1]
  if(check_repetitive == T) {
    #Repetitive overhangs
    rep<-table(overhangs)
    rep<-rep[names(rep)!=""]
    rep_temp<-names(rep)
    rep<-str_count(names(rep), ("(^(A|T){4}$)|(^(G|C){4}$)"))
    names(rep)<-rep_temp
    rep<-rep[rep > 0]
    rm(rep_temp)
    bad_overhangs<-union(names(duplicates), names(rep))
  } else {
    bad_overhangs<-duplicates
    }
  if(length(bad_overhangs)==0) {
    return(primers)
  }
  bad_overhang<-bad_overhangs[1]
  primer_num<-which(overhangs==bad_overhang)
  primer_unlist<-unlist(primers)
  fragment_num<-ceiling(primer_num)/2
  primer_num2<-primer_num %% 2
  primer_num2[primer_num2==0]<-2
  primer_num2[primer_num2==1]<-1
  for(i in 1:length(primer_num)) {
    if(primer_num2[i]==1) {
      primer_fd_num<-primer_num[i]
      primer_rv_num<-primer_num[i] - 1
    } else {
      primer_fd_num<-primer_num[i] + 1
      primer_rv_num<-primer_num[i]
    }
    primer_fd<-primer_unlist[[primer_fd_num]]
    primer_rv<-primer_unlist[[primer_rv_num]]
    #we will move to the left direction
    #check if primer_rv is an NDT primer
    if(class(primer_rv)=="Primer_MSD") {
      msd_mut<-sapply(c("NNN", "NNK", "NNS", "NDT", "DBK", "NRT"), FUN = function(x){paste(stringr::str_to_upper(rev(seqinr::comp(seqinr::s2c(x), ambiguous=T))), sep="", collapse="")}, USE.NAMES = F)
      if(str_sub(primer_rv@extra, 1, 3) %in% msd_mut) {
        if(i == length(primer_num)) {
          warning(paste("We can not fix overlaps or palyndromic sequences in the primers. Please consider a silent mutation at position ", fragments[[ceiling((primer_rv_num+1)/2)]]@start, ".", sep=""))
          return(primers)
        }
        else {
          next
        }
      }
      else{
        shift_base<-str_sub(primer_rv@extra, 1, 1)
        primer_rv@overhang<-paste(primer_rv@overhang,shift_base, sep="")
        primer_rv@extra<-str_sub(primer_rv@extra, 2)
        primer_rv@overhang<-str_sub(primer_rv@overhang, 2)
        primer_rv@temperature<-calculate_tm_nnb(primer_rv@binding_sequence, offset = 0)
        primer_rv@difference<-abs(primer_rv@temperature - primer_unlist[[primer_rv_num -1 ]]@temperature)
      }
    } else {
      if(nchar(primer_rv@binding_sequence) < 3 * binding_min_length) {
        if(i == length(primer_num)) {
          warning(paste("We can not fix overlaps or palyndromic sequences in the primers. Please consider a silent mutation at position ", fragments[[ceiling((primer_rv_num+1)/2)]]@start, ".", sep=""))
          return(primers)
        }
        else{
          next
        }
      }
      else{
        shift_base<-str_sub(primer_rv@binding_sequence, 1, 1)
        primer_rv@extra<-paste(primer_rv@extra, shift_base, sep="")
        primer_rv@binding_sequence<-str_sub(primer_rv@binding_sequence, 2)
        shift_base<-str_sub(primer_rv@extra, 1, 1)
        primer_rv@extra<-str_sub(primer_rv@extra, 2)
        primer_rv@overhang<-paste(primer_rv@overhang,shift_base, sep="")
        primer_rv@overhang<-str_sub(primer_rv@overhang, 2)
        primer_rv@temperature<-calculate_tm_nnb(primer_rv@binding_sequence, offset = 0)
        primer_rv@difference<-abs(primer_rv@temperature - primer_unlist[[primer_rv_num -1 ]]@temperature)
      }
    }
    #if(class(primer_fd)=="Primer_MSD") {
      primer_fd@overhang<-paste(comp(shift_base, forceToLower = F), primer_fd@overhang, sep="")
      primer_fd@extra<-paste(str_sub(primer_fd@overhang, 5), primer_fd@extra ,sep="")
      if(class(primer_fd)=="Primer_SPM") {
        primer_fd@binding_sequence<-paste(str_sub(primer_fd@extra, -1), primer_fd@binding_sequence ,sep="")
        primer_fd@extra<-str_sub(primer_fd@extra, 1, -2)
      }
      primer_fd@overhang<-str_sub(primer_fd@overhang, 1, 4)
      primer_fd@temperature<-calculate_tm_nnb(primer_fd@binding_sequence, offset = 0)
      primer_fd@difference<-abs(target_temp - primer_fd@temperature)
      primers[[ceiling(primer_fd_num/2)]][[1]]<-primer_fd
      primers[[ceiling(primer_rv_num/2)]][[2]]<-primer_rv
      break
    #}
    #else{
    #  primer_fd@overhang<-paste(comp(shift_base, forceToLower = F), primer_fd@overhang, sep="")
    #  primer_fd@binding_sequence<-paste(str_sub(primer_fd@overhang, 5), primer_fd@binding_sequence ,sep="")
    #  primer_fd@overhang<-str_sub(primer_fd@overhang, 1, 4)
    #  primer_fd@temperature<-calculate_tm_nnb(primer_fd@binding_sequence)
    #  primer_fd@difference<-abs(target_temp - primer_fd@temperature)
    #  primer_rv@difference<-abs(primer_fd@temperature - primer_rv@temperature)
    #  primers[[ceiling(primer_fd_num/2)]][[1]]<-primer_fd
    #  primers[[ceiling(primer_rv_num/2)]][[2]]<-primer_rv
    #  break
    #}
  }

  #overhangs<-sapply(primers, function(x){return(c(x[[1]]@overhang, x[[2]]@overhang))})
  #duplicates<-table(overhangs)
  #duplicates<-duplicates[names(duplicates)!="" & duplicates > 1]
  #if(length(duplicates)==0) {
  #  return(primers)
  #}
  #else{
  return(check_primer_overhangs(primers = primers, fragments = fragments, binding_min_length = binding_min_length, target_temp = target_temp))
  #}
}

setGeneric("print_primer_NF" , function(primer,file_name) {
  standardGeneric("print_primer_NF")
})

setMethod("print_primer_NF", signature(primer="Primer"),
          function(primer){
            cat(primer@prefix, primer@restriction_enzyme, primer@suffix, primer@vector, primer@overhang, primer@extra, primer@binding_sequence, "\n", sep="")
            cat(primer@prefix, primer@restriction_enzyme, primer@suffix, primer@vector, primer@overhang, primer@extra, primer@binding_sequence, "\n", sep="",file=file_name,append=TRUE)
            cat("Tm: ", primer@temperature, " \u00b0C" , "\n")
            cat("Tm: ", primer@temperature, "\n",file=file_name,append=TRUE)
            #cat("Temperature difference: ", primer@difference, " K", "\n")
          }
)

setMethod("print_primer_NF", signature(primer="Extended_Primerset"),
          function(primer){
            primerset<-primer
            for(i in 1:length(primerset@fragments)){
              cat("Fragment ", i, "\n", sep="",file=file_name,append=TRUE)
              cat("Fragment ", i, "\n", sep="")
              cat("Start ", primerset@fragments[[i]]@start, ", ", sep="",file=file_name,append=TRUE)
              cat("Start ", primerset@fragments[[i]]@start, ", ", sep="")
              cat("Stop ",  primerset@fragments[[i]]@stop, ", ", sep="",file=file_name,append=TRUE)
              cat("Stop ",  primerset@fragments[[i]]@stop, ", ", sep="")
              #cat("Length ",(primerset@fragments[[i]]@stop - primerset@fragments[[i]]@start)+1, "\n", sep="")
              cat("Forward\n",file=file_name,append=TRUE)
              cat("Forward\n")
              print_primer_NF(primerset@primers[[i]][[1]])
              cat("Reverse\n",file=file_name,append=TRUE)
              cat("Reverse\n")
              print_primer_NF(primerset@primers[[i]][[2]])
              cat("\n",file=file_name,append=TRUE)
              cat("\n")
            }
            cat("Input Sequence:\n", primerset@oldsequence,"\n" ,file=file_name,append=TRUE)
            cat("Input Sequence:\n", primerset@oldsequence,"\n" )
            cat("\nModified Sequence:\n", primerset@newsequence, "\n",file=file_name,append=TRUE)
            cat("\nModified Sequence:\n", primerset@newsequence, "\n")
          }
)


(as ‘lib’ is unspecified)







































	‘/tmp/RtmpcKhcg4/downloaded_packages’



In [None]:
# @title Input variables, then click 'Runtime' -> 'Run all'
%%R

# @markdown **General variables**
project_name = "Lib061_FN665" # @param {type:"string"}
input_sequence<-"ATGGCTAAAGTATTGTGTGTTTTATATGATGACCCTGTGGATGGTTATCCAAAGACATATGCCCGCGACGATCTCCCCAAGATCGACCATTACCCGGGTGGTCAAATCCTGCCCACGCCGAAGGCCATTGACTTCACGCCCGGCCAACTGCTGGGGTCCGTTTCGGGCGAACTGGGCCTTCGTGAGTACTTAGAAAGCAATGGACACACGCTTGTAGTCACTTCCGATAAGGACGGGCCCGACTCGGTGTTCGAGCGCGAGTTAGTCGACGCGGACGTAGTGATTTCACAACCATTCTGGCCTGCGTATCTGACGCCAGAGCGTATTGCTAAGGCGAAGAATTTGAAGCTGGCACTTACCGCGGGCATCGGTTCAGACCACGTGGACCTTCAGTCAGCGATCGATCGCAATGTCACCGTCGCAGAAGTGACCTACTGTAATTCAATTAGTGTGGCGGAGCACGTGGTCATGATGATCCTCAGCCTGGTCCGTAATTACTTACCCAGCCACGAGTGGGCGCGCAAGGGTGGCTGGAACATCGCCGATTGCGTCAGCCATGCCTACGACCTCGAAGCTATGCATGTGGGTACTGTCGCGGCAGGTCGCATCGGTTTGGCGGTTCTTCGTCGCCTCGCACCGTTCGATGTCCACTTGCACTATACAGACCGCCACCGCTTACCAGAGAGCGTTGAGAAGGAGCTCAACCTGACGTGGCATGCAACCCGCGAAGATATGTACCCCGTGTGTGACGTGGTTACGTTAAACTGTCCACTCCATCCTGAGACAGAGCATATGATTAACGATGAGACTTTAAAACTCTTTAAACGTGGTGCATACATTGTGAATACGGCCCGTGGCAAGTTATGCGACCGTGATGCAGTTGCACGCGCCCTTGAGAGCGGTCGTCTGGCAGGTTACGCGGGCGATGTTTGGTTTCCCCAGCCGGCACCAAAGGACCACCCCTGGCGTACGATGCCGTATAACGGTATGACTCCGCATATTTCGGGTACAACTCTGACTGCTCAAGCCCGTTATGCGGCGGGTACTCGTGAAATCCTTGAATGTTTCTTTGAGGGACGCCCGATCCGTGACGAATACCTTATTGTGCAAGGCGGGGCGCTGGCAGGCACCGGTGCACACAGCTACAGCAAAGGTAACGCCACAGGCGGGTCGGAAGAAGCGGCCAAGTTCAAGAAGGCCGTATGA" # @param {type:"string"}
input_mutants="/content/drive/MyDrive/fun_with_code/MSDM_script_test/fdh_msdm.txt" # @param {type:"string"}
output_dir="/content/drive/MyDrive/fun_with_code/MSDM_script_test/output" # @param {type:"string"}

# @markdown **Golden Gate variables**
cuf<-"/content/drive/MyDrive/fun_with_code/MSDM_script_test/ecoli_codon_freq.txt" # @param {type:"string"}
chosen_recognition_site<-"GGTCTC" # @param {type:"string"}
chosen_prefix<-"ggaa" # @param {type:"string"}
chosen_suffix<-"G" # @param {type:"string"}
overhang1<-"TATG" # @param {type:"string"}
overhang2<-"GCTT" # @param {type:"string"}
target_tm<-60 # @param {type:"number"}

In [None]:
# @title Run this third [runs R code to compute primers]
%%R

chosen_overhangs<-c(overhang1, overhang2)
input_mutants <- readLines(file(input_mutants))

# make output directory
individual_output_dir = paste(output_dir,"/individual_muts", col="", sep="")
dir.create(individual_output_dir, recursive=TRUE)

# run

for (line in input_mutants){

  # count num of muts
  num <- str_count(line,"_") + 1

  if(num == 1){
    mut1 <- str_split_i(line,"_",1)
    resi1 <- strtoi(substr(mut1,2,nchar(mut1)-1))
    mut_aa1 <- substr(mut1,nchar(mut1),nchar(mut1))
    mutations<-c(list(c(resi1, mut_aa1)))
    file_name <- paste(individual_output_dir, "/",project_name, "_", mut1, ".out", col="", sep="")
    #mut <- mut1
  }

  if(num == 2){
    mut1 <- str_split_i(line,"_",1)
    mut2 <- str_split_i(line,"_",2)
    resi1 <- strtoi(substr(mut1,2,nchar(mut1)-1))
    resi2 <- strtoi(substr(mut2,2,nchar(mut2)-1))
    mut_aa1 <- substr(mut1,nchar(mut1),nchar(mut1))
    mut_aa2 <- substr(mut2,nchar(mut2),nchar(mut2))
    mutations<-c(list(c(resi1, mut_aa1),c(resi2, mut_aa2)))
    file_name <- paste(individual_output_dir, "/",project_name, "_", mut1, "_", mut2, ".out", col="", sep="")
    #mut <- paste(mut1,"_",mut2, col="", sep="")

    }

  if(num == 3){
    mut1 <- str_split_i(line,"_",1)
    mut2 <- str_split_i(line,"_",2)
    mut3 <- str_split_i(line,"_",3)
    resi1 <- strtoi(substr(mut1,2,nchar(mut1)-1))
    resi2 <- strtoi(substr(mut2,2,nchar(mut2)-1))
    resi3 <- strtoi(substr(mut3,2,nchar(mut3)-1))
    mut_aa1 <- substr(mut1,nchar(mut1),nchar(mut1))
    mut_aa2 <- substr(mut2,nchar(mut2),nchar(mut2))
    mut_aa3 <- substr(mut3,nchar(mut3),nchar(mut3))
    mutations<-c(list(c(resi1, mut_aa1),c(resi2, mut_aa2),c(resi3, mut_aa3)))
    file_name <- paste(individual_output_dir, "/",project_name, "_", mut1, "_", mut2, "_", mut3, ".out", col="", sep="")
    #mut <- paste(mut1,"_",mut2,"_",mut3, col="", sep="")
    }

  if(num == 4){
    mut1 <- str_split_i(line,"_",1)
    mut2 <- str_split_i(line,"_",2)
    mut3 <- str_split_i(line,"_",3)
    mut4 <- str_split_i(line,"_",4)
    resi1 <- strtoi(substr(mut1,2,nchar(mut1)-1))
    resi2 <- strtoi(substr(mut2,2,nchar(mut2)-1))
    resi3 <- strtoi(substr(mut3,2,nchar(mut3)-1))
    resi4 <- strtoi(substr(mut4,2,nchar(mut4)-1))
    mut_aa1 <- substr(mut1,nchar(mut1),nchar(mut1))
    mut_aa2 <- substr(mut2,nchar(mut2),nchar(mut2))
    mut_aa3 <- substr(mut3,nchar(mut3),nchar(mut3))
    mut_aa4 <- substr(mut4,nchar(mut4),nchar(mut4))
    mutations<-c(list(c(resi1, mut_aa1),c(resi2, mut_aa2),c(resi3, mut_aa3),c(resi4, mut_aa4)))
    file_name <- paste(individual_output_dir, "/",project_name, "_", mut1, "_", mut2, "_", mut3, "_", mut4, ".out", col="", sep="")
    #mut <- paste(mut1,"_",mut2,"_",mut3,"_",mut4, col="", sep="")
  }

  if(num == 5){
    mut1 <- str_split_i(line,"_",1)
    mut2 <- str_split_i(line,"_",2)
    mut3 <- str_split_i(line,"_",3)
    mut4 <- str_split_i(line,"_",4)
    mut5 <- str_split_i(line,"_",5)
    resi1 <- strtoi(substr(mut1,2,nchar(mut1)-1))
    resi2 <- strtoi(substr(mut2,2,nchar(mut2)-1))
    resi3 <- strtoi(substr(mut3,2,nchar(mut3)-1))
    resi4 <- strtoi(substr(mut4,2,nchar(mut4)-1))
    resi5 <- strtoi(substr(mut5,2,nchar(mut5)-1))
    mut_aa1 <- substr(mut1,nchar(mut1),nchar(mut1))
    mut_aa2 <- substr(mut2,nchar(mut2),nchar(mut2))
    mut_aa3 <- substr(mut3,nchar(mut3),nchar(mut3))
    mut_aa4 <- substr(mut4,nchar(mut4),nchar(mut4))
    mut_aa5 <- substr(mut5,nchar(mut5),nchar(mut5))
    mutations<-c(list(c(resi1, mut_aa1),c(resi2, mut_aa2),c(resi3, mut_aa3),c(resi4, mut_aa4),c(resi5, mut_aa5)))
    file_name <- paste(individual_output_dir, "/",project_name, "_", mut1, "_", mut2, "_", mut3, "_", mut4, "_", mut5, ".out", col="", sep="")
    #mut <- paste(mut1,"_",mut2,"_",mut3,"_",mut4,"_",mut5, col="", sep="")
  }

  if(num > 5){
    print("more than 5 mutations not supported")
  }
  else {
    primers<-mutate_spm(input_sequence, prefix=chosen_prefix, restriction_enzyme = chosen_recognition_site, suffix = chosen_suffix, vector=chosen_overhangs, replacements = mutations, binding_min_length=4 ,binding_max_length = 9, target_temp=target_tm, fragment_min_size = 60, cuf=cuf)

    writeLines("", file_name)
    print_primer_NF(primers, file_name)
  }

}

Fragment 1
Start 2, Stop 114, Forward
GGAAGGTCTCGTATGAAAAAAGTATTGTGTGTTTTATATGATGAC
Tm:  50.56517  °C 
Reverse
GGAAGGTCTCCTTTCTTCGCCTTAGCAATACGC
Tm:  51.14711  °C 

Fragment 2
Start 115, Stop 402, Forward
GGAAGGTCTCGGAAATTGAAGCTGGCACTTACCGCGG
Tm:  60.60636  °C 
Reverse
GGAAGGTCTCCGCTTTCATACGGCCTTCTTGAACTTGGCC
Tm:  59.78997  °C 

Input Sequence:
 ATGGCTAAAGTATTGTGTGTTTTATATGATGACCCTGTGGATGGTTATCCAAAGACATATGCCCGCGACGATCTCCCCAAGATCGACCATTACCCGGGTGGTCAAATCCTGCCCACGCCGAAGGCCATTGACTTCACGCCCGGCCAACTGCTGGGGTCCGTTTCGGGCGAACTGGGCCTTCGTGAGTACTTAGAAAGCAATGGACACACGCTTGTAGTCACTTCCGATAAGGACGGGCCCGACTCGGTGTTCGAGCGCGAGTTAGTCGACGCGGACGTAGTGATTTCACAACCATTCTGGCCTGCGTATCTGACGCCAGAGCGTATTGCTAAGGCGAAGAATTTGAAGCTGGCACTTACCGCGGGCATCGGTTCAGACCACGTGGACCTTCAGTCAGCGATCGATCGCAATGTCACCGTCGCAGAAGTGACCTACTGTAATTCAATTAGTGTGGCGGAGCACGTGGTCATGATGATCCTCAGCCTGGTCCGTAATTACTTACCCAGCCACGAGTGGGCGCGCAAGGGTGGCTGGAACATCGCCGATTGCGTCAGCCATGCCTACGACCTCGAAGCTATGCATGTGGGTACTGTCGCGGCAGGTCGCATCGGTTTGGCGGTTCTTCGTCGCCTCGCACCGTTCGATGTCCACT

In [None]:
# @title Run this fourth [runs python code to produce a single output csv]
import glob,os

# magic commands for turing R variables into python variables
output_dir = %R output_dir
output_dir = str(output_dir[0])

project_name = %R project_name
project_name = str(project_name[0])

output_files = glob.glob(output_dir+"/individual_muts/"+project_name+"*.out")

mut_primers = {}

fwd = False
rev = False
tm_fwd=False
tm_rev=False

for f in output_files:

  mut = os.path.splitext(os.path.basename(f))[0].replace(project_name+"_","")
  mut_primers[mut] = {}

  for line in open(f,"r"):

    if tm_fwd:
      fwd_tm = float(line.strip().replace("Tm:",""))
      tm_fwd=False
    elif tm_rev:
      rev_tm = float(line.strip().replace("Tm:",""))
      tm_rev=False

      #print(frag_id,fwd_primer,fwd_tm,rev_primer,rev_tm)

      mut_primers[mut][frag_id] = {"fwd":{"primer":fwd_primer,"primer_binding":fwd_primer.replace("CACACCAGGTCTCG",""),"tm-GM":fwd_tm},"rev":{"primer":rev_primer,"primer_binding":rev_primer.replace("CACACCAGGTCTCC",""),"tm-GM":rev_tm}}

    if fwd:
      fwd_primer = line.strip()

      fwd=False
      tm_fwd=True

    elif rev:
      rev_primer = line.strip()

      rev=False
      tm_rev=True

    if "Fragment" in line:
      frag_id = line.strip().replace("Fragment ","")
    elif "Forward" in line:
      fwd=True
    elif "Reverse" in line:
      rev=True

# write output file
out = open(output_dir+"/"+project_name+".csv","w")

for mut in mut_primers:
    for frag in mut_primers[mut]:
        for direction in mut_primers[mut][frag]:
            out.write(mut+"_"+direction[0].upper()+frag+","+mut_primers[mut][frag][direction]["primer"]+","+str(round(mut_primers[mut][frag][direction]["tm-GM"],1))+","+str(len(mut_primers[mut][frag][direction]["primer"]))+"\n")
            #print(mut+"_"+direction[0].upper()+frag+","+mut_primers[mut][frag][direction]["primer"]+","+str(round(mut_primers[mut][frag][direction]["tm-GM"],1))+","+str(len(mut_primers[mut][frag][direction]["primer"]))+"\n")
out.close()