In [36]:
!git log -1

[33mcommit 2213ea5854e8855191c347c0409500aba66d3173[m
Author: John McCallum <cfljam@users.noreply.github.com>
Date:   Tue Feb 23 14:52:01 2016 +1300

    updated notebook


In [37]:
!git remote -v

origin	https://github.com/cfljam/pcr_marker_design (fetch)
origin	https://github.com/cfljam/pcr_marker_design (push)
upstream	https://github.com/PlantandFoodResearch/pcr_marker_design (fetch)
upstream	https://github.com/PlantandFoodResearch/pcr_marker_design (push)


In [38]:
!pip freeze

appnope==0.1.0
backports-abc==0.4
backports.ssl-match-hostname==3.4.0.2
bcbio-gff==0.6.2
biopython==1.66
Cython==0.23.4
decorator==4.0.6
gister==1.0.23
ipykernel==4.2.2
ipython==4.1.1
ipython-genutils==0.1.0
ipywidgets==4.1.1
Jinja2==2.8
jsonschema==2.4.0
jupyter==1.0.0
jupyter-client==4.1.1
jupyter-console==4.1.0
jupyter-core==4.0.6
MarkupSafe==0.23
mistune==0.7.1
nbconvert==4.1.0
nbformat==4.0.1
notebook==4.1.0
numpy==1.10.4
pandas==0.17.1
path.py==0.0.0
-e git+https://github.com/cfljam/pcr_marker_design@2213ea5854e8855191c347c0409500aba66d3173#egg=pcr_marker_design
-e git+https://github.com/cfljam/galaxy-pcr-markers@1767a0e5204a2ddb49e08648b5d8e3cf78eacb95#egg=PCRMarkerDesign
pexpect==3.3
pickleshare==0.5
primer3-py==0.4.3
ptyprocess==0.5
py==1.4.31
pybedtools==0.7.6
pyfaidx==0.4.4
Pygments==2.1
pysam==0.8.4
pytest==2.8.7
python-dateutil==2.4.2
pytz==2015.7
pyzmq==15.2.0
qtconsole==4.1.1
requests==2.9.1
scipy==0.17.0
simplegeneric==0.8.1


In [92]:

from pybedtools import BedTool
from pyfaidx import Fasta

In [4]:

test_ann="../test/test-data/targets.gff"

In [94]:
pwd

u'/Users/cfljam/Documents/pcr_marker_design/docs'

In [16]:
p3_globals={
        'PRIMER_OPT_SIZE': 20,
        'PRIMER_PICK_INTERNAL_OLIGO': 0,
        'PRIMER_INTERNAL_MAX_SELF_END': 8,
        'PRIMER_MIN_SIZE': 18,
        'PRIMER_MAX_SIZE': 25,
        'PRIMER_OPT_TM': 60.0,
        'PRIMER_MIN_TM': 57.0,
        'PRIMER_MAX_TM': 63.0,
        'PRIMER_MIN_GC': 20.0,
        'PRIMER_MAX_GC': 80.0,
        'PRIMER_MAX_POLY_X': 100,
        'PRIMER_INTERNAL_MAX_POLY_X': 100,
        'PRIMER_SALT_MONOVALENT': 50.0,
        'PRIMER_DNA_CONC': 50.0,
        'PRIMER_MAX_NS_ACCEPTED': 0,
        'PRIMER_MAX_SELF_ANY': 12,
        'PRIMER_MAX_SELF_END': 8,
        'PRIMER_PAIR_MAX_COMPL_ANY': 12,
        'PRIMER_PAIR_MAX_COMPL_END': 8,
        'PRIMER_PRODUCT_SIZE_RANGE': [60,250],
    }

In [263]:
class PrimerDesign:
    """A primer design object that is primed
    with genome reference and variant data
    """
    def __init__(self, reference,annot_file,desc):
        self.reference = Fasta(reference)
        self.annotations=BedTool(annot_file)
        self.desc=desc
        self.genome=self.reference.filename.replace("fasta","fasta.fai")


    def getseqslice(self,contig,start,end):
        seqslice=self.reference[contig][start:end]
        annslice=[X for X in self.annotations if (X.chrom==contig) & (X.start >start) & (X.end < end)]
        myslice=dict(seq=seqslice,ann=annslice)
        return myslice
    
    def getseqslicedict(self,target,max_size):
        """Pass a target to a designer and get a dictionary
        slice that we can pass to P3
        """
        target_int=target.slop(b=max_size,g=self.genome)
        offset=target_int[0].start
        sldic=dict(SEQUENCE_ID=self.desc)
        sldic['TARGET_ID']=target[0].chrom+ "_" + str(target_int[0].start) +"_" + str(target_int[0].end)
        sldic['SEQUENCE_TEMPLATE']=self.reference[target[0].chrom][target_int[0].start:target_int[0].end].seq
        slice_annot=[(X.start -offset,X.length) for X in (self.annotations - target) if (X.chrom==target[0].chrom) & \
                     (X.start > target_int[0].start) & (X.end < target_int[0].end)]
        sldic['SEQUENCE_EXCLUDED_REGION']=slice_annot
        sldic['SEQUENCE_INCLUDED_REGION']= (target[0].start -offset,target[0].length)
        return sldic


In [39]:
from pcr_marker_design import design as d
from pybedtools import BedTool

test_seq='../test/test-data/targets.fasta'
annfile='../test/test-data/targets.gff'
foo=d.PrimerDesign(test_seq,annfile,'target')

In [38]:
reload(d)

<module 'pcr_marker_design.design' from '/Users/cfljam/Documents/pcr_marker_design/pcr_marker_design/design.py'>

In [29]:
target=BedTool("k69_93535 1146 1147",from_string=True)
print(target)

k69_93535	1146	1147



In [30]:
target[0].start

1146

In [31]:
p3_globals['PRIMER_PRODUCT_SIZE_RANGE'][1]

250

In [40]:
seq_test=foo.getseqslicedict(target,p3_globals['PRIMER_PRODUCT_SIZE_RANGE'][1])
seq_test

{'SEQUENCE_EXCLUDED_REGION': [(244, 2), (439, 2)],
 'SEQUENCE_ID': 'target',
 'SEQUENCE_TARGET': (250, 1),
 'SEQUENCE_TEMPLATE': 'AAATAATGGAGAATAGATGGTTCAAGAATGGATTCGAGCCTGTGAAATATTACATTGAGAATGATAGGTTTCATAAGTGGTGTAGCTTAGACGAAGAGAATGCTAATGACAACGAGGAGGTAGAATCTGGAGATGAATCAGACTCTTCAGTTGCTTCCTGCCCTCCTACACTTAATGAAGGAAAGAAAAAAAGGACAGGGAAGCTTCATAGGCCTTTGAGTCTGAACGCATTTGACATAATTTCCTTTTCCAGAGGATTTGATCTTTCAGGTTTGTTTGAAGAAACGGGAGATGAAACAAGATTTGTGTCGGGTGAAACGATACCAAACATCATATCGAAATTGGAGGAGATTGCAAAAGTGGGTAGTTTCACGTTTAGGAAGAAGGATTGTAGGGTTAGTTTAGAAGGAACGCGAGAAGGAGTGAAGGGCCCTCTTACGATTGGAGCTGAGATATTTGAGCTTACGCCTAGTTTGGTTGTTGTTGAGCTTAAGAAGAAAG',
 'TARGET_ID': 'k69_93535_896_1397'}

In [27]:
str(seq_test['TARGET_ID'])

'k69_93535_896_1397'

In [20]:
import primer3 as P3

In [41]:
P3.designPrimers(seq_args=foo.getseqslicedict(target,p3_globals['PRIMER_PRODUCT_SIZE_RANGE'][1]),global_args= p3_globals)

{'PRIMER_INTERNAL_0': (304L, 26L),
 'PRIMER_INTERNAL_0_GC_PERCENT': 46.15384615384615,
 'PRIMER_INTERNAL_0_HAIRPIN_TH': 42.41935812013236,
 'PRIMER_INTERNAL_0_PENALTY': 7.074487853621633,
 'PRIMER_INTERNAL_0_SELF_ANY_TH': 0.844755456581197,
 'PRIMER_INTERNAL_0_SELF_END_TH': 0.0,
 'PRIMER_INTERNAL_0_SEQUENCE': 'TGTGTCGGGTGAAACGATACCAAACA',
 'PRIMER_INTERNAL_0_TM': 58.92551214637837,
 'PRIMER_INTERNAL_1': (304L, 26L),
 'PRIMER_INTERNAL_1_GC_PERCENT': 46.15384615384615,
 'PRIMER_INTERNAL_1_HAIRPIN_TH': 42.41935812013236,
 'PRIMER_INTERNAL_1_PENALTY': 7.074487853621633,
 'PRIMER_INTERNAL_1_SELF_ANY_TH': 0.844755456581197,
 'PRIMER_INTERNAL_1_SELF_END_TH': 0.0,
 'PRIMER_INTERNAL_1_SEQUENCE': 'TGTGTCGGGTGAAACGATACCAAACA',
 'PRIMER_INTERNAL_1_TM': 58.92551214637837,
 'PRIMER_INTERNAL_2': (304L, 26L),
 'PRIMER_INTERNAL_2_GC_PERCENT': 46.15384615384615,
 'PRIMER_INTERNAL_2_HAIRPIN_TH': 42.41935812013236,
 'PRIMER_INTERNAL_2_PENALTY': 7.074487853621633,
 'PRIMER_INTERNAL_2_SELF_ANY_TH': 0.844755

In [203]:
target[0].chrom

u'k69_93535'

In [210]:
print(target.slop(b=100,g=foo.genome))

k69_93535	1046	1247



In [212]:
target.slop(b=100,g=foo.genome)[0].start

1046

In [111]:
test={'seqid': 'test1'}
test['foo']='bar'
test

{'foo': 'bar', 'seqid': 'test1'}

In [100]:
from pcr_marker_design import design as d
test_seq="../test/test-data/targets.fasta"
annfile= "../test/test-data/targets.gff"
k69_93535_250_290='GACAAAGAGAAAATCCTCAAATCCGGCCTCGTCAACCACA'
designer = d.PrimerDesign(test_seq,annfile)
seqslice=designer.getseqslice('k69_93535',250,290)

In [96]:
designer.getseqslice('k69_93535',1000,1500)

u'AATGACAACGAGGAGGTAGAATCTGGAGATGAATCAGACTCTTCAGTTGCTTCCTGCCCTCCTACACTTAATGAAGGAAAGAAAAAAAGGACAGGGAAGCTTCATAGGCCTTTGAGTCTGAACGCATTTGACATAATTTCCTTTTCCAGAGGATTTGATCTTTCAGGTTTGTTTGAAGAAACGGGAGATGAAACAAGATTTGTGTCGGGTGAAACGATACCAAACATCATATCGAAATTGGAGGAGATTGCAAAAGTGGGTAGTTTCACGTTTAGGAAGAAGGATTGTAGGGTTAGTTTAGAAGGAACGCGAGAAGGAGTGAAGGGCCCTCTTACGATTGGAGCTGAGATATTTGAGCTTACGCCTAGTTTGGTTGTTGTTGAGCTTAAGAAGAAAGCAGGAGACAAAGCAGAGTACGAGGAGTTTTGTAACAAGGAGTTGAAACCTGGGTTGCTACATCTCATGTTTCCTAATGGCGGTGTTCCTTCCAACACAACTTC'

In [77]:
[X for X in designer.annotations if X.chrom=="k69_93535"]

[Interval(k69_93535:2-7),
 Interval(k69_93535:4-6),
 Interval(k69_93535:1140-1142),
 Interval(k69_93535:1146-1148),
 Interval(k69_93535:1335-1337),
 Interval(k69_93535:2-4),
 Interval(k69_93535:660-662),
 Interval(k69_93535:750-752),
 Interval(k69_93535:886-888)]

In [81]:
[X for X in designer.annotations if (X.chrom=="k69_93535") & (X.start >1000)& (X.end < 1500)]

[Interval(k69_93535:1140-1142),
 Interval(k69_93535:1146-1148),
 Interval(k69_93535:1335-1337)]

In [60]:
foo['zip'=1234]

SyntaxError: invalid syntax (<ipython-input-60-584824af9423>, line 1)

In [55]:
seqslice

u'GACAAAGAGAAAATCCTCAAATCCGGCCTCGTCAACCACA'

In [14]:
[X  for X in designer.annotations]

[Interval(k69_93535:2-7),
 Interval(k69_93535:4-6),
 Interval(k69_93535:1140-1142),
 Interval(k69_93535:1146-1148),
 Interval(k69_93535:1335-1337),
 Interval(k69_93535:2-4),
 Interval(k69_93535:660-662),
 Interval(k69_93535:750-752),
 Interval(k69_93535:886-888),
 Interval(k69_98089:1-6),
 Interval(k69_98089:2-6),
 Interval(k69_98089:3-6),
 Interval(k69_98089:4-6),
 Interval(k69_98089:721-723),
 Interval(k69_98089:725-727),
 Interval(k69_98089:728-731),
 Interval(k69_98089:740-748),
 Interval(k69_98089:741-748),
 Interval(k69_98089:742-748),
 Interval(k69_98089:1-3),
 Interval(k69_98089:29-31),
 Interval(k69_98089:2-4),
 Interval(k69_98089:452-454),
 Interval(k69_98089:514-516),
 Interval(k69_98089:521-523),
 Interval(k69_98089:549-551),
 Interval(k69_98089:624-626),
 Interval(k69_98089:692-694),
 Interval(k69_98089:716-718),
 Interval(k69_98089:722-724),
 Interval(k69_98089:725-727)]

In [17]:
designer.annotations[3].attrs

{'AC1': '8',
 'AF1': '0.3527',
 'DP': '2645',
 'DP4': '733,804,447,519',
 'FQ': '999',
 'G3': '0.2771,0.7229,6.934e-153',
 'HWE': '0.0248',
 'ID': 'k69_93535:SAMTOOLS:SNP:1147',
 'MQ': '42',
 'PV4': '0.51,0,0.027,1',
 'Reference_seq': 'C',
 'VDB': '0.0371',
 'Variant_seq': 'G'}

In [20]:
b=BedTool("k69_93535 1000 1500",from_string=True)

In [22]:
print(b.intersect(designer.annotations))

k69_93535	1141	1142
k69_93535	1147	1148
k69_93535	1336	1337



In [26]:
vcf_test= '/Users/cfljam/Documents/galaxy-pcr-markers/test-data/freebayes_out.vcf'

In [29]:
c=BedTool(vcf_test)
d=BedTool("Chr1 1000 1500",from_string=True)

In [34]:
[(x. start,x.length) for x in c.intersect(d)[10:120]]

[(1460, 1), (1480, 1)]

In [30]:
print(c.intersect(d))

Chr1	1007	.	T	A	17.1701	.	AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1;CIGAR=1X;DP=1;DPRA=0;EPP=5.18177;EPPR=0;HWE=-0;LEN=1;MEANALT=1;MQM=40;MQMR=0;NS=1;NUMALT=1;ODDS=3.91202;PAIRED=0;PAIREDR=0;RO=0;RPP=5.18177;RPPR=0;RUN=1;SAP=5.18177;SRP=0;TYPE=snp;XAI=0.0103093;XAM=0.0309278;XAS=0.0206186;XRI=0;XRM=0;XRS=0;BVAR	GT:GQ:DP:RO:QR:AO:QA:GL	1/1:14.1547:1:0:0:1:17:-1.7,-0.30103,0
Chr1	1012	.	TGT	AGA	17.1701	.	AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1;CIGAR=1X1M1X;DP=1;DPRA=0;EPP=5.18177;EPPR=0;HWE=-0;LEN=3;MEANALT=1;MQM=40;MQMR=0;NS=1;NUMALT=1;ODDS=3.91202;PAIRED=0;PAIREDR=0;RO=0;RPP=5.18177;RPPR=0;RUN=1;SAP=5.18177;SRP=0;TYPE=complex;XAI=0;XAM=0.0309278;XAS=0.0309278;XRI=0;XRM=0;XRS=0;BVAR	GT:GQ:DP:RO:QR:AO:QA:GL	1/1:14.1547:1:0:0:1:17:-1.7,-0.30103,0
Chr1	1056	.	A	C	17.1701	.	AB=0;ABP=0;AC=2;AF=1;AN=2;AO=1;CIGAR=1X;DP=1;DPRA=0;EPP=5.18177;EPPR=0;HWE=-0;LEN=1;MEANALT=1;MQM=40;MQMR=0;NS=1;NUMALT=1;ODDS=3.91202;PAIRED=0;PAIREDR=0;RO=0;RPP=5.18177;RPPR=0;RUN=1;SAP=5.18177;SRP=0;TYPE=snp;XAI=0.0103093;XAM=0.0309278;X