# Prototyping Design_primers for Design from VCF

### John McCallum Feb 2017

Use autoreload for development to reload modules automagically

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pcr_marker_design import design as d 
from pcr_marker_design import run_p3
from pybedtools import BedTool, Interval
from pyfaidx import Fasta , FastaVariant
import pandas as pd

In [3]:
ls ../test/test-data

384um_251453690362217.txt      Chr9_Myb210.vcf
AcCHR1_test.fasta              Chr9_Myb210.vcf.gz
AcCHR1_test.fasta.fai          targets
AcCHR1_test.phased.vcf.gz      targets.fasta
AcCHR1_test.phased.vcf.gz.tbi  targets.fasta.fai
AcCHR1_test.vcf.gz             targets.gff
AcCHR1_test.vcf.gz.tbi         targets.snps.bed
CHR9.1.68.5.fasta.gz


In [5]:
test_seq = "../test/test-data/AcCHR1_test.fasta"
vcffile = "../test/test-data/AcCHR1_test.vcf.gz"
designer = d.VcfPrimerDesign(test_seq, vcffile, "TestCHR1")

### Create a BedTool Target

In [52]:
targetBed=BedTool([random.choice(BedTool(vcffile)) for X in [1,3,4]])

In [104]:
Tdf=targetBed.to_dataframe()[[0,1,3,4]]
Tdf.columns=['CHR','POS','REF','ALT']
Tdf

Unnamed: 0,CHR,POS,REF,ALT
0,CHR1,6669,G,A
1,CHR1,4498,GAAAAAAAAT,GAAAAAAAAAT
2,CHR1,4948,C,T


In [84]:
print(targetBed)

CHR1	6669	.	G	A	254.986	.	AB=0.413793;ABP=4.88226;AC=1;AF=0.0384615;AN=26;AO=12;CIGAR=1X;DP=368;DPB=368;DPRA=1.02655;EPP=3.0103;EPPR=13.7701;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=13;NUMALT=1;ODDS=20.758;PAIRED=1;PAIREDR=0.991573;PAO=0;PQA=0;PQR=0;PRO=0;QA=436;QR=12158;RO=356;RPP=3.0103;RPPR=4.98658;RUN=1;SAF=4;SAP=5.9056;SAR=8;SRF=110;SRP=115.829;SRR=246;TYPE=snp;technology.Illumina=1	GT:GQ:DP:RO:QR:AO:QA:GL	0/0:99:36:36:1277:0:0:0,-9.77628,-10	0/0:95.558:21:21:746:0:0:0,-5.71503,-10	0/0:99:36:36:1260:0:0:0,-9.77219,-10	0/0:99:41:41:1432:0:0:0,-10,-10	0/0:99:28:28:908:0:0:0,-7.45519,-10	0/0:99:27:27:889:0:0:0,-7.23979,-10	0/0:99:27:27:923:0:0:0,-7.33558,-10	0/1:0:29:17:599:12:436:-10,0,-10	0/0:90.1507:19:19:639:0:0:0,-5.17431,-10	0/0:99:24:24:769:0:0:0,-6.50756,-10	0/0:99:25:25:873:0:0:0,-6.78862,-10	0/0:98.2818:22:22:785:0:0:0,-5.98746,-10	0/0:99:33:33:1058:0:0:0,-8.88142,-10
CHR1	4498	.	GAAAAAAAAT	GAAAAAAAAAT	1027.82	.	AB=0.385965;ABP=15.8868;AC=2;AF=0.0769231;AN=26;AO=45;CIGAR=1M1

### Indexing into fasta reference

note start and end

In [78]:
designer.reference['CHR1'][6669:6670]

>CHR1:6670-6670
G

In [62]:
targetBed[0]

Interval(CHR1:6669-6670)

In [63]:
targetBed[0].start

6669

In [65]:
targetBed[0].end

6670

## Generate Primer3 Dict

In [85]:
designer.getseqslicedict(targetBed[0],120)

{'REF_OFFSET': 6549,
 'SEQUENCE_EXCLUDED_REGION': [(18, 1),
  (65, 1),
  (82, 1),
  (118, 1),
  (119, 1),
  (125, 1),
  (151, 12),
  (172, 1),
  (233, 3)],
 'SEQUENCE_ID': 'CHR1:6549-6790',
 'SEQUENCE_TARGET': (120, 1),
 'SEQUENCE_TEMPLATE': 'CAAAATATCTAGAAAAGGAACTCTCAGAAAGATTCATGAGCGCTCTTTCAGAAATGCAATTTCTATAATTTTTGCAATTTCCGGAAGCTATTAGGGCTTCTTACGGAGGCGAGGGGCCGAGGCCCCGAAGAGTGGTTTTTTACTGGTGATATCTCTCTCTCTAAGATTCATGCAGATGAGAATGTCTCTAATATGTTGACGAAGCAAGTTATTAGAGATAAGTTCAAGCATTACTTGGACT',
 'TARGET_ID': 'CHR1:6669-6669'}

In [86]:
p3_globals={
        'PRIMER_OPT_SIZE': 20,
        'PRIMER_PICK_INTERNAL_OLIGO': 0,
        'PRIMER_INTERNAL_MAX_SELF_END': 8,
        'PRIMER_MIN_SIZE': 18,
        'PRIMER_MAX_SIZE': 25,
        'PRIMER_OPT_TM': 60.0,
        'PRIMER_MIN_TM': 57.0,
        'PRIMER_MAX_TM': 63.0,
        'PRIMER_MIN_GC': 20.0,
        'PRIMER_MAX_GC': 80.0,
        'PRIMER_MAX_POLY_X': 100,
        'PRIMER_INTERNAL_MAX_POLY_X': 100,
        'PRIMER_SALT_MONOVALENT': 50.0,
        'PRIMER_DNA_CONC': 50.0,
        'PRIMER_MAX_NS_ACCEPTED': 0,
        'PRIMER_MAX_SELF_ANY': 12,
        'PRIMER_MAX_SELF_END': 8,
        'PRIMER_PAIR_MAX_COMPL_ANY': 12,
        'PRIMER_PAIR_MAX_COMPL_END': 8,
        'PRIMER_PRODUCT_SIZE_RANGE': [60,250],
    }

### Design a single target

In [109]:
run_p3.run_P3(global_dict=p3_globals,
              target_dict=designer.getseqslicedict(targetBed[1],120))

[{'AMPLICON_REGION': 'CHR1:4464-4603',
  'PRIMER_LEFT': (4463, 20),
  'PRIMER_LEFT_SEQUENCE': 'AGGCAGTGGAAAACAAGACA',
  'PRIMER_RIGHT': (4602, 22),
  'PRIMER_RIGHT_SEQUENCE': 'TGGAATGAGAGATGGGTGATCT',
  'SEQUENCE_ID': 'CHR1:4378-4619',
  'TARGET_ID': 'CHR1:4498-4498'},
 {'AMPLICON_REGION': 'CHR1:4434-4603',
  'PRIMER_LEFT': (4433, 21),
  'PRIMER_LEFT_SEQUENCE': 'TGGCTCGTTTCTCCTAATCCT',
  'PRIMER_RIGHT': (4602, 22),
  'PRIMER_RIGHT_SEQUENCE': 'TGGAATGAGAGATGGGTGATCT',
  'SEQUENCE_ID': 'CHR1:4378-4619',
  'TARGET_ID': 'CHR1:4498-4498'},
 {'AMPLICON_REGION': 'CHR1:4464-4614',
  'PRIMER_LEFT': (4463, 20),
  'PRIMER_LEFT_SEQUENCE': 'AGGCAGTGGAAAACAAGACA',
  'PRIMER_RIGHT': (4613, 23),
  'PRIMER_RIGHT_SEQUENCE': 'AGCTGAGGAAATGGAATGAGAGA',
  'SEQUENCE_ID': 'CHR1:4378-4619',
  'TARGET_ID': 'CHR1:4498-4498'},
 {'AMPLICON_REGION': 'CHR1:4464-4609',
  'PRIMER_LEFT': (4463, 20),
  'PRIMER_LEFT_SEQUENCE': 'AGGCAGTGGAAAACAAGACA',
  'PRIMER_RIGHT': (4608, 23),
  'PRIMER_RIGHT_SEQUENCE': 'AGGAAATGGAAT

### Design to all 

In [126]:
[X.start for X in targetBed]

[6669, 4498, 4948]

In [127]:
f=lambda x: run_p3.run_P3(global_dict=p3_globals,
              target_dict=designer.getseqslicedict(x,120))

generate lists of amplicons

NB need to the comprhension to flatten the nested lists

In [144]:
amps=[y for x in [f(X) for X in targetBed] for y in x]

In [146]:
TAmp=pd.DataFrame(amps)
TAmp

Unnamed: 0,AMPLICON_REGION,PRIMER_LEFT,PRIMER_LEFT_SEQUENCE,PRIMER_RIGHT,PRIMER_RIGHT_SEQUENCE,SEQUENCE_ID,TARGET_ID
0,CHR1:6640-6755,"(6639, 20)",TTAGGGCTTCTTACGGAGGC,"(6754, 24)",TGCTTCGTCAACATATTAGAGACA,CHR1:6549-6790,CHR1:6669-6669
1,CHR1:6645-6755,"(6644, 19)",GCTTCTTACGGAGGCGAGG,"(6754, 24)",TGCTTCGTCAACATATTAGAGACA,CHR1:6549-6790,CHR1:6669-6669
2,CHR1:6644-6755,"(6643, 19)",GGCTTCTTACGGAGGCGAG,"(6754, 24)",TGCTTCGTCAACATATTAGAGACA,CHR1:6549-6790,CHR1:6669-6669
3,CHR1:6640-6756,"(6639, 20)",TTAGGGCTTCTTACGGAGGC,"(6755, 25)",TTGCTTCGTCAACATATTAGAGACA,CHR1:6549-6790,CHR1:6669-6669
4,CHR1:4464-4603,"(4463, 20)",AGGCAGTGGAAAACAAGACA,"(4602, 22)",TGGAATGAGAGATGGGTGATCT,CHR1:4378-4619,CHR1:4498-4498
5,CHR1:4434-4603,"(4433, 21)",TGGCTCGTTTCTCCTAATCCT,"(4602, 22)",TGGAATGAGAGATGGGTGATCT,CHR1:4378-4619,CHR1:4498-4498
6,CHR1:4464-4614,"(4463, 20)",AGGCAGTGGAAAACAAGACA,"(4613, 23)",AGCTGAGGAAATGGAATGAGAGA,CHR1:4378-4619,CHR1:4498-4498
7,CHR1:4464-4609,"(4463, 20)",AGGCAGTGGAAAACAAGACA,"(4608, 23)",AGGAAATGGAATGAGAGATGGGT,CHR1:4378-4619,CHR1:4498-4498
8,CHR1:4883-4969,"(4882, 20)",CTTGCGTTTTGGGCACTTCA,"(4968, 20)",AAGTGCTTGAAGTTTGCCCT,CHR1:4828-5069,CHR1:4948-4948
9,CHR1:4882-4969,"(4881, 20)",ACTTGCGTTTTGGGCACTTC,"(4968, 20)",AAGTGCTTGAAGTTTGCCCT,CHR1:4828-5069,CHR1:4948-4948


In [152]:
%%bash
export https_proxy=https://proxy.pfr.co.nz:8080
export http_proxy=http://proxy.pfr.co.nz:8080

In [153]:
designer.meltSlice('CHR1:6640-6755')

ConnectionError: HTTPSConnectionPool(host='www.dna.utah.edu', port=443): Max retries exceeded with url: /db/services/cgi-bin/udesign.cgi?rs=0&dmso=0&cation=20&seq=TTAGGGCTTCTTACGGAGGCGAGGGGCCGAGGCCCCGAAGAGTGGTTTTTTACTGGTGATATCTCTCTCTCTAAGATTCATGCAGATGAGAATGTCTCTAATATGTTGACGAAGCA&mg=2 (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x10dd98a90>: Failed to establish a new connection: [Errno 60] Operation timed out',))

### Check on Prototyped melt method

In [41]:
designer.meltSlice('CHR1:2879-3022')

(87.381523272214395, 86.980959097320167)

In [154]:
!/Users/johnmccallum/miniconda3/envs/Py3PCR/bin/faidx ../test/test-data/AcCHR1_test.fasta CHR1:2879-3022

/bin/sh: /Users/johnmccallum/miniconda3/envs/Py3PCR/bin/faidx: No such file or directory


In [94]:
Interval