# Prototyping Design_primers for Design from VCF

### John McCallum Feb 2017

Use autoreload for development to reload modules automagically

In [1]:
%load_ext autoreload
%autoreload 2
import random

### Load up Module

In [2]:
from pybedtools import BedTool, Interval
from pyfaidx import Fasta , FastaVariant
import pandas as pd

In [3]:
from pcr_marker_design import design as d 
from pcr_marker_design import run_p3 as P3


In [4]:
ls ../test/test-data

384um_251453690362217.txt      Chr9_Myb210.vcf
AcCHR1_test.fasta              Chr9_Myb210.vcf.gz
AcCHR1_test.fasta.fai          targets
AcCHR1_test.phased.vcf.gz      targets.fasta
AcCHR1_test.phased.vcf.gz.tbi  targets.fasta.fai
AcCHR1_test.vcf.gz             targets.gff
AcCHR1_test.vcf.gz.tbi         targets.snps.bed
CHR9.1.68.5.fasta.gz


## Initialise Designer

In [5]:
test_seq = "../test/test-data/AcCHR1_test.fasta"
vcffile = "../test/test-data/AcCHR1_test.vcf.gz"
designer = d.VcfPrimerDesign(test_seq, vcffile, "TestCHR1")

### Create a BedTool Target

In [6]:
import numpy


In [7]:
targetBed=BedTool([random.choice(BedTool(vcffile)) for X in numpy.arange(1,8)])

In [8]:
Tdf=targetBed.to_dataframe()[[0,1,3,4]]
Tdf.columns=['CHR','POS','REF','ALT']
Tdf['TARGET_ID']= Tdf.apply(lambda X: X.CHR + ":"+ str(X.POS) + "-" + str(X.POS),axis=1)
Tdf

Unnamed: 0,CHR,POS,REF,ALT,TARGET_ID
0,CHR1,9371,G,A,CHR1:9371-9371
1,CHR1,9238,TGG,TGGG,CHR1:9238-9238
2,CHR1,3162,G,C,CHR1:3162-3162
3,CHR1,436,A,C,CHR1:436-436
4,CHR1,2908,T,G,CHR1:2908-2908
5,CHR1,8823,T,G,CHR1:8823-8823
6,CHR1,7055,A,C,CHR1:7055-7055


In [9]:
print(targetBed)

CHR1	9371	.	G	A	1072.8	.	AB=0.547945;ABP=4.46786;AC=1;AF=0.0384615;AN=26;AO=40;CIGAR=1X;DP=942;DPB=942;DPRA=1.00806;EPP=3.87889;EPPR=18.8228;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=13;NUMALT=1;ODDS=42.9316;PAIRED=0.975;PAIREDR=0.984462;PAO=0;PQA=0;PQR=0;PRO=0;QA=1488;QR=33304;RO=901;RPP=3.22745;RPPR=3.70681;RUN=1;SAF=19;SAP=3.22745;SAR=21;SRF=489;SRP=17.2996;SRR=412;TYPE=snp;technology.Illumina=1	GT:GQ:DP:RO:QR:AO:QA:GL	0/1:0:73:33:1175:40:1488:-10,0,-10	0/0:0:65:65:2455:0:0:0,-10,-10	0/0:0:85:84:3140:0:0:0,-10,-10	0/0:0:99:99:3702:0:0:0,-10,-10	0/0:0:50:50:1866:0:0:0,-10,-10	0/0:0:76:76:2756:0:0:0,-10,-10	0/0:0:81:81:2994:0:0:0,-10,-10	0/0:0:51:51:1989:0:0:0,-10,-10	0/0:0:85:85:2882:0:0:0,-10,-10	0/0:0:77:77:2845:0:0:0,-10,-10	0/0:0:70:70:2671:0:0:0,-10,-10	0/0:0:66:66:2480:0:0:0,-10,-10	0/0:0:64:64:2349:0:0:0,-10,-10
CHR1	9238	.	TGG	TGGG	6626.6	.	AB=0.416327;ABP=32.8082;AC=9;AF=0.346154;AN=26;AO=267;CIGAR=1M1I2M;DP=879;DPB=1189.33;DPRA=1.11353;EPP=4.38475;EPPR=10.4378;GTI=0;LEN=1;MEA

### Indexing into fasta reference

note start and end

In [10]:
designer.reference['CHR1'][6669:6670]

>CHR1:6670-6670
G

In [11]:
targetBed[0]

Interval(CHR1:9371-9372)

In [12]:
targetBed[0].start

9371

In [13]:
targetBed[0].end

9372

## Generate Primer3 Dict

In [14]:
designer.getseqslicedict(targetBed[0],120)

{'REF_OFFSET': 9251,
 'SEQUENCE_EXCLUDED_REGION': [(59, 1), (119, 1), (162, 1), (173, 1), (218, 1)],
 'SEQUENCE_ID': 'CHR1:9251-9492',
 'SEQUENCE_TARGET': (120, 1),
 'SEQUENCE_TEMPLATE': 'GTGTATTCGCCTCTAAAACCTGCTACAGGCCTACGTGTAGTATAGAGGTCATTATACCACCTTGGTATGACCATTACACAAAATATATCCTCCCACTTTCACTTATCATTATGGTCCTGTGTGATAACAAATTTCAATTTAATGGGATTAGCAATCCAATCCCAATCCCCTTACATTAATGGCTCATGGAATTGGATTAGTAATACCAACCCCCATGGAATACCTAATCCTTCACTTTTGG',
 'TARGET_ID': 'CHR1:9371-9371'}

In [15]:
p3_globals={
        'PRIMER_OPT_SIZE': 20,
        'PRIMER_PICK_INTERNAL_OLIGO': 0,
        'PRIMER_INTERNAL_MAX_SELF_END': 8,
        'PRIMER_MIN_SIZE': 18,
        'PRIMER_MAX_SIZE': 25,
        'PRIMER_OPT_TM': 60.0,
        'PRIMER_MIN_TM': 57.0,
        'PRIMER_MAX_TM': 63.0,
        'PRIMER_MIN_GC': 20.0,
        'PRIMER_MAX_GC': 80.0,
        'PRIMER_MAX_POLY_X': 100,
        'PRIMER_INTERNAL_MAX_POLY_X': 100,
        'PRIMER_SALT_MONOVALENT': 50.0,
        'PRIMER_DNA_CONC': 50.0,
        'PRIMER_MAX_NS_ACCEPTED': 0,
        'PRIMER_MAX_SELF_ANY': 12,
        'PRIMER_MAX_SELF_END': 8,
        'PRIMER_PAIR_MAX_COMPL_ANY': 12,
        'PRIMER_PAIR_MAX_COMPL_END': 8,
        'PRIMER_PRODUCT_SIZE_RANGE': [60,250],
        'PRIMER_NUM_RETURN' : 2
    }

### Design a single target

In [16]:
P3.run_P3(global_dict=p3_globals,
              target_dict=designer.getseqslicedict(targetBed[1],120))

[{'AMPLICON_REGION': 'CHR1:9168-9291',
  'PRIMER_LEFT': (9167, 20),
  'PRIMER_LEFT_SEQUENCE': 'AAGGCCCAAACCAATCCACA',
  'PRIMER_RIGHT': (9290, 21),
  'PRIMER_RIGHT_SEQUENCE': 'CTACACGTAGGCCTGTAGCAG',
  'SEQUENCE_ID': 'CHR1:9118-9359',
  'TARGET_ID': 'CHR1:9238-9238'}]

### Design to all 

In [17]:
[X.start for X in targetBed]

[9371, 9238, 3162, 436, 2908, 8823, 7055]

In [18]:
f=lambda x: P3.run_P3(global_dict=p3_globals,
              target_dict=designer.getseqslicedict(x,120))
amps=[y for x in [f(X) for X in targetBed] for y in x]
TAmp=pd.DataFrame(amps)
TAmp[[6,0,2,4]]

Unnamed: 0,TARGET_ID,AMPLICON_REGION,PRIMER_LEFT_SEQUENCE,PRIMER_RIGHT_SEQUENCE
0,CHR1:9371-9371,CHR1:9271-9466,CTGCTACAGGCCTACGTGTA,TGGGGGTTGGTATTACTAATCCA
1,CHR1:9238-9238,CHR1:9168-9291,AAGGCCCAAACCAATCCACA,CTACACGTAGGCCTGTAGCAG
2,CHR1:3162-3162,CHR1:3054-3184,TGAGCAAATCAGTGACACTAGGT,AGTATCACTGTGCCATGCTCC
3,CHR1:436-436,CHR1:337-481,GTTGCTCATGGTGGCTGTTG,GCCCAGTTGGAGATGGTGAG
4,CHR1:2908-2908,CHR1:2879-2986,ACTGTGTCGCACTGGGTTTT,TGCGTGTGCTTTATCGGAGG
5,CHR1:8823-8823,CHR1:8728-8844,ACCACCAAGTCGACATGTCC,AGGCCTTTGAAAGTGTCCGA
6,CHR1:7055-7055,CHR1:7025-7139,TGCCAAATTTTGCCAAGCCT,AGGCTTCGAGCATCTCCAAC


generate lists of amplicons

NB need to the comprhension to flatten the nested lists

In [19]:
%%bash
export https_proxy=https://proxy.pfr.co.nz:8080
export http_proxy=http://proxy.pfr.co.nz:8080
env | grep proxy

http_proxy=http://proxy.pfr.co.nz:8080
https_proxy=https://proxy.pfr.co.nz:8080


In [23]:
%%bash
export https_proxy=
export http_proxy=
env | grep proxy

http_proxy=
https_proxy=


### Melt a single  Amplicon

melts the refrence and consensus amplicons returning a tuple

In [24]:
designer.meltSlice('CHR1:6640-6755')

(85.629055007052187, 88.733427362482374)

### Run Prediction on All Amplicons in our results set

In [25]:
TAmp['Tm']=TAmp.AMPLICON_REGION.apply(lambda X: designer.meltSlice(X))

In [26]:
TAmp['TmDiff']=TAmp.Tm.apply(lambda X: abs(X[0]-X[1]))
TAmp[[6,0,8]]

Unnamed: 0,TARGET_ID,AMPLICON_REGION,TmDiff
0,CHR1:9371-9371,CHR1:9271-9466,0.0
1,CHR1:9238-9238,CHR1:9168-9291,0.150212
2,CHR1:3162-3162,CHR1:3054-3184,0.100141
3,CHR1:436-436,CHR1:337-481,0.0
4,CHR1:2908-2908,CHR1:2879-2986,0.300423
5,CHR1:8823-8823,CHR1:8728-8844,0.150212
6,CHR1:7055-7055,CHR1:7025-7139,0.0


In [27]:
Tdf

Unnamed: 0,CHR,POS,REF,ALT,TARGET_ID
0,CHR1,9371,G,A,CHR1:9371-9371
1,CHR1,9238,TGG,TGGG,CHR1:9238-9238
2,CHR1,3162,G,C,CHR1:3162-3162
3,CHR1,436,A,C,CHR1:436-436
4,CHR1,2908,T,G,CHR1:2908-2908
5,CHR1,8823,T,G,CHR1:8823-8823
6,CHR1,7055,A,C,CHR1:7055-7055


### Gist this 

In [30]:
!gister -d 'Walkthrough with Designer and Melt' ./2017-02-28PrototypingDesignPrimers.ipynb

https://gist.github.com/c37bd25d648bb11f47e653248e13d47f


See this gist as slideshow at [Nbviewer](http://nbviewer.jupyter.org/format/slides/gist/cfljam/c37bd25d648bb11f47e653248e13d47f#/
)

###  Update this Gist

In [34]:
!gister -e https://gist.github.com/c37bd25d648bb11f47e653248e13d47f ./2017-02-28PrototypingDesignPrimers.ipynb


https://gist.github.com/c37bd25d648bb11f47e653248e13d47f
