# Only required on your first run

In [1]:
pip install git+git://github.com/MatthewHiggins2017/bioconda-PrimedRPA.git

Collecting git+git://github.com/MatthewHiggins2017/bioconda-PrimedRPA.git
  Cloning git://github.com/MatthewHiggins2017/bioconda-PrimedRPA.git to /tmp/pip-req-build-6izb9_rc
  Running command git clone -q git://github.com/MatthewHiggins2017/bioconda-PrimedRPA.git /tmp/pip-req-build-6izb9_rc
  Resolved git://github.com/MatthewHiggins2017/bioconda-PrimedRPA.git to commit 9ac6d7ac1b8503a1aaf27d80d1e6160777c78583
Note: you may need to restart the kernel to use updated packages.


# Your sequences

In [60]:
from Bio import Seq
from Bio import SeqIO

In [77]:
# The name of the fasta file with your target genome
genomeFile = "sequence.fasta"

#read the genome
genome = SeqIO.read(genomeFile, "fasta")

#PCR Primers, if known
forwardPCR = Seq.Seq("GACCCCAAAATCAGCGAAAT")
reversePCR = Seq.Seq("TCTGGTTACTGCCAGTTGAATCTG")


In [78]:
forwardPCR

Seq('GACCCCAAAATCAGCGAAAT')

In [79]:
probeRegion = Seq.Seq("GCCAAGCAGAACATCCAACTGATCA")

In [87]:
padding = 100
start = genome.seq.find(forwardPCR)-padding
end = genome.seq.find(reversePCR.reverse_complement())+padding

In [None]:
#Please enter the path, from your current working directory, to the input fasta file:
fastaFile = "trimmed.fasta"

# Input Parameters

In [2]:
# Parameters:
#This parameters file will guide the PrimedRPA-based primer and probe design process. Please follow the instructions outlined below:

#Please define the reference name for this PrimedRPA run:
name = "N1_Aug5"

#Please indicate if you would like to use a previously generated Alignment File: [NO or File path]
preAligned = "NO"

#Please indicate if you would like to use the previously generated Binding Sites: [NO or File path]
knownBindingSites = "NO"

#Please enter the path, from your current working directory, to the input fasta file:
fastaFile = "trimmed.fasta"

#Please classify the contents of the input fasta file as one of the following options: [SS, MS, AMS]. Whereby:
# SS = Single sequence
# MS = Multiple unaligned sequences
# MAS = Multiple aligned sequences
fastaType = "SS"

#If multiple sequences are present in the input fasta file (Classification of MS or MAS), please indicate below the
#percentage identity required for the primers and probes target binding sites:
identityRequirement = 99

#Please indicate if a primer identity anchor is required. [NO or length of anchor]
identityAnchor = "NO"

#Desired primer length (This can be a range: 28-32 or fixed value: 32):
primerLength = "28-32"

#Please state if you require a probe to be designed and if so what type [NO,EXO,NFO]
probeType = "EXO"

#Desired probe length (This can be a range: 45-50 or fixed value: 50):
probeLength = 34

#Below please define your max amplicon length.
maxAmpliconLength = 150

#Below please state the repeat nucleotide cut-off in bp (e.g. 5bp will exclude sequences containing GGGGG).
repeatCutoff = 5

#Below please insert the minimum percentage GC content for primer/probe:
minGCContent = 40

#Below please insert the maximum percentage GC content for primer/probe:
maxGCContent = 60

#Below please indicate the percentage match tolerance for primer-probe dimerisation and secondary structure formation:
matchTolerance = 60

#Please enter [No or Path to Background file] below to identify if you want to perform a background DNA binding check:
backgroundCheck = "NO"

#Below please insert the percentage background cross reactivity threshold:
backgroundThreshold = 65

#Below please indicate if you would like to implement a Background Hard Fail Filter [NO,YES]:
bgHardFail = "NO"

#Please define the maximum number of sets you would like to identify:
numPrimerSets = 20

#Please define the number of threads available:
numThreads = 8

#Blastn Cross Reactivity Search Settings [Basic or Advanced or Fast]
xReactivity = "Fast"

#Blastn Evalue
blastEval = 1000


Write the parameter file and run PrimedRPA

In [3]:
tFile = open("Parameters_template.txt")
template = tFile.read()
tFile.close()

pFile = open("PrimedRPA_Parameters.txt", 'w')
print(template.format(name, preAligned, knownBindingSites, fastaFile, fastaType, identityRequirement, identityAnchor, primerLength, probeType, probeLength, maxAmpliconLength, repeatCutoff, minGCContent, maxGCContent, matchTolerance, backgroundCheck, backgroundThreshold, bgHardFail, numPrimerSets, numThreads, xReactivity, blastEval), file=pFile)

In [4]:
!PrimedRPA PrimedRPA_Parameters.txt




-------------------------------------------
----------------PrimedRPA------------------
-----Finding RPA Primer and Probe Sets-----
-------------Higgins M et al.--------------
-------------------------------------------


Parameters File Could Not Be Opened
Check File Path.
Please run PrimedRPA --help to see valid options


# Analysis and Picking a good primer set
These steps can be done in a spreadsheet or using the scripts below.
## Steps:
- Ensure the target region is part of the amplicon
- Minimize the targe-primer overlap
- Minimize amplicon length
- Minimize dimerization score
- Optimize GC content


In [7]:
import pandas as pd
import numpy as np
import matplotlib as mpl 


Read the data in

In [44]:
alignmentData = pd.read_csv(f"{name}_Alignment_Summary.csv")
primerSets = pd.read_csv(f"{name}_Output_Sets.csv")
bindingSites = pd.read_csv(f"{name}_PrimedRPA_Oligo_Binding_Sites.csv")

Look at the primer sets

In [46]:
primerSets

Unnamed: 0,Forward Primer (FP),FP GC%,FP Binding Start Site,Reverse Primer (RP),RP GC%,RP Binding Start Site,Amplicon Size,Max Dimerisation Percentage Score,Max Dimerisation String,Forward Primer Length,Reverse Primer Length,Minimum Primer 3' Identity Anchor,Probe (P),Probe GC%,Probe Binding Start Site,Probe Length
0,TCTGATAATGGACCCCAAAATCAGCGAAATGC,43.750,77.0,CCACTGCGTTCTCCATTCTGGTTACTGCCAGT,53.125,143.0,98.0,37.500,TCTGATAATGGACCCCAAAATCAGCGAAATGC ...,32.0,32.0,32.0,ACCCCGCATTACGTTTGGTGGACCCTCAGATTCA,52.941176,109.0,34.0
1,TCTGATAATGGACCCCAAAATCAGCGAAATGC,43.750,77.0,CCCACTGCGTTCTCCATTCTGGTTACTGCCAG,56.250,144.0,99.0,37.500,TCTGATAATGGACCCCAAAATCAGCGAAATGC ...,32.0,32.0,32.0,ACCCCGCATTACGTTTGGTGGACCCTCAGATTCA,52.941176,109.0,34.0
2,TCTGATAATGGACCCCAAAATCAGCGAAATGC,43.750,77.0,CCCCACTGCGTTCTCCATTCTGGTTACTGCCA,56.250,145.0,100.0,37.500,TCTGATAATGGACCCCAAAATCAGCGAAATGC ...,32.0,32.0,32.0,ACCCCGCATTACGTTTGGTGGACCCTCAGATTCA,52.941176,109.0,34.0
3,TCTGATAATGGACCCCAAAATCAGCGAAATGC,43.750,77.0,GCCCCACTGCGTTCTCCATTCTGGTTACTGCC,59.375,146.0,101.0,40.625,TCTGATAATGGACCCCAAAATCAGCGAAATGC ...,32.0,32.0,32.0,ACCCCGCATTACGTTTGGTGGACCCTCAGATTCA,52.941176,109.0,34.0
4,TCTGATAATGGACCCCAAAATCAGCGAAATGC,43.750,77.0,CGCCCCACTGCGTTCTCCATTCTGGTTACTGC,59.375,147.0,102.0,40.625,TCTGATAATGGACCCCAAAATCAGCGAAATGC ...,32.0,32.0,32.0,ACCCCGCATTACGTTTGGTGGACCCTCAGATTCA,52.941176,109.0,34.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143,AATGTCTGATAATGGACCCCAAAATCAGCGAA,40.625,73.0,CGTTGTTTTGATCGCGCCCCACTGCGTTCTCC,59.375,161.0,120.0,37.500,AATGTCTGATAATGGACCCCAAAATCAGCGAA ...,32.0,32.0,32.0,ACCCCGCATTACGTTTGGTGGACCCTCAGATTCA,52.941176,109.0,34.0
144,AATGTCTGATAATGGACCCCAAAATCAGCGAA,40.625,73.0,ACGTTGTTTTGATCGCGCCCCACTGCGTTCTC,56.250,162.0,121.0,37.500,AATGTCTGATAATGGACCCCAAAATCAGCGAA ...,32.0,32.0,32.0,ACCCCGCATTACGTTTGGTGGACCCTCAGATTCA,52.941176,109.0,34.0
145,AATGTCTGATAATGGACCCCAAAATCAGCGAA,40.625,73.0,GACGTTGTTTTGATCGCGCCCCACTGCGTTCT,56.250,163.0,122.0,34.375,AATGTCTGATAATGGACCCCAAAATCAGCGAA ...,32.0,32.0,32.0,ACCCCGCATTACGTTTGGTGGACCCTCAGATTCA,52.941176,109.0,34.0
146,AATGTCTGATAATGGACCCCAAAATCAGCGAA,40.625,73.0,CGACGTTGTTTTGATCGCGCCCCACTGCGTTC,59.375,164.0,123.0,31.250,AATGTCTGATAATGGACCCCAAAATCAGCGAA ...,32.0,32.0,32.0,ACCCCGCATTACGTTTGGTGGACCCTCAGATTCA,52.941176,109.0,34.0
