# Prototyping Usage for VCF Design

Use autoreload for develp[ment to reload modules automagically

In [1]:
%load_ext autoreload
%autoreload 2

In [18]:
from pcr_marker_design import design as d 
from pcr_marker_design import run_p3
from pybedtools import BedTool

In [3]:
ls ../test/test-data

384um_251453690362217.txt      Chr9_Myb210.vcf
AcCHR1_test.fasta              Chr9_Myb210.vcf.gz
AcCHR1_test.fasta.fai          targets
AcCHR1_test.phased.vcf.gz      targets.fasta
AcCHR1_test.phased.vcf.gz.tbi  targets.fasta.fai
AcCHR1_test.vcf.gz             targets.gff
AcCHR1_test.vcf.gz.tbi         targets.snps.bed
CHR9.1.68.5.fasta.gz


In [4]:
test_seq = "../test/test-data/AcCHR1_test.fasta"
vcffile = "../test/test-data/AcCHR1_test.vcf.gz"
designer = d.VcfPrimerDesign(test_seq, vcffile, "TestCHR1")

In [9]:
target = BedTool('CHR1 3000 3001', from_string=True)

In [43]:
designer.getseqslicedict(target,100)

{'REF_OFFSET': 2900,
 'SEQUENCE_EXCLUDED_REGION': [(7, 1), (26, 1), (65, 1), (93, 1), (139, 1)],
 'SEQUENCE_ID': 'CHR1:2900-3101',
 'SEQUENCE_TARGET': (100, 1),
 'SEQUENCE_TEMPLATE': 'AGGAAATAAATAAATATGGAATAAAACATTGATATTACAAATAAAGGGTGCTTCTAGCTGAGTAGTCCTCCGATAAAGCACACGCATACAAAGGAATGAGAGAGAGAGAGAGAGGCGCTACCACATATAAAAGGGACAGCAAACATTTTAACATGAGCAAATCAGTGACACTAGGTAGGTGTTAGCACAAAAATGAACCTT',
 'TARGET_ID': 'CHR1:3000-3001'}

### Design

-set globals defaults

In [22]:
p3_globals={
        'PRIMER_OPT_SIZE': 20,
        'PRIMER_PICK_INTERNAL_OLIGO': 0,
        'PRIMER_INTERNAL_MAX_SELF_END': 8,
        'PRIMER_MIN_SIZE': 18,
        'PRIMER_MAX_SIZE': 25,
        'PRIMER_OPT_TM': 60.0,
        'PRIMER_MIN_TM': 57.0,
        'PRIMER_MAX_TM': 63.0,
        'PRIMER_MIN_GC': 20.0,
        'PRIMER_MAX_GC': 80.0,
        'PRIMER_MAX_POLY_X': 100,
        'PRIMER_INTERNAL_MAX_POLY_X': 100,
        'PRIMER_SALT_MONOVALENT': 50.0,
        'PRIMER_DNA_CONC': 50.0,
        'PRIMER_MAX_NS_ACCEPTED': 0,
        'PRIMER_MAX_SELF_ANY': 12,
        'PRIMER_MAX_SELF_END': 8,
        'PRIMER_PAIR_MAX_COMPL_ANY': 12,
        'PRIMER_PAIR_MAX_COMPL_END': 8,
        'PRIMER_PRODUCT_SIZE_RANGE': [60,250],
    }

In [36]:
designer.getseqslicedict(target,250)

{'REF_OFFSET': 2750,
 'SEQUENCE_EXCLUDED_REGION': [(43, 1),
  (122, 1),
  (157, 1),
  (176, 1),
  (215, 1),
  (243, 1),
  (289, 1),
  (411, 1),
  (472, 1)],
 'SEQUENCE_ID': 'CHR1:2750-3251',
 'SEQUENCE_TARGET': (250, 1),
 'SEQUENCE_TEMPLATE': 'CTCAATTTCTTTAGAAGCTTCCAGAGTTGTTGAATTGGCAGCGGCAACTACAGTCGCAACTGTTCCTAGCTTTGCAGAACCATTCCCACTCAAGGAATTCACGGACTCTTTATGTGCCTTCAGAACCAACTGTGTCGCACTGGGTTTTAAAGGAAATAAATAAATATGGAATAAAACATTGATATTACAAATAAAGGGTGCTTCTAGCTGAGTAGTCCTCCGATAAAGCACACGCATACAAAGGAATGAGAGAGAGAGAGAGAGGCGCTACCACATATAAAAGGGACAGCAAACATTTTAACATGAGCAAATCAGTGACACTAGGTAGGTGTTAGCACAAAAATGAACCTTGTTTACATCTGTTCACCACATCCTAGAACATCTTAGACACACACTGCAATAACATATGAGGTGGAGCATGGCACAGTGATACTGCAACAGTAGGATTCCCTGTAACTCTAATGCAACTTTTCATGTACTCAGCCTCTCAAATGATATCGC',
 'TARGET_ID': 'CHR1:3000-3001'}

In [40]:
run_p3.run_P3(global_dict=p3_globals,target_dict=designer.getseqslicedict(target,250))

[{'AMPLICON_REGION': 'CHR1:2879-3022',
  'PRIMER_LEFT': (2878, 20),
  'PRIMER_LEFT_SEQUENCE': 'ACTGTGTCGCACTGGGTTTT',
  'PRIMER_RIGHT': (3021, 20),
  'PRIMER_RIGHT_SEQUENCE': 'GGTAGCGCCTCTCTCTCTCT',
  'SEQUENCE_ID': 'CHR1:2750-3251',
  'TARGET_ID': 'CHR1:3000-3001'},
 {'AMPLICON_REGION': 'CHR1:2878-3022',
  'PRIMER_LEFT': (2877, 20),
  'PRIMER_LEFT_SEQUENCE': 'AACTGTGTCGCACTGGGTTT',
  'PRIMER_RIGHT': (3021, 20),
  'PRIMER_RIGHT_SEQUENCE': 'GGTAGCGCCTCTCTCTCTCT',
  'SEQUENCE_ID': 'CHR1:2750-3251',
  'TARGET_ID': 'CHR1:3000-3001'},
 {'AMPLICON_REGION': 'CHR1:2879-3026',
  'PRIMER_LEFT': (2878, 20),
  'PRIMER_LEFT_SEQUENCE': 'ACTGTGTCGCACTGGGTTTT',
  'PRIMER_RIGHT': (3025, 20),
  'PRIMER_RIGHT_SEQUENCE': 'ATGTGGTAGCGCCTCTCTCT',
  'SEQUENCE_ID': 'CHR1:2750-3251',
  'TARGET_ID': 'CHR1:3000-3001'},
 {'AMPLICON_REGION': 'CHR1:2878-3026',
  'PRIMER_LEFT': (2877, 20),
  'PRIMER_LEFT_SEQUENCE': 'AACTGTGTCGCACTGGGTTT',
  'PRIMER_RIGHT': (3025, 20),
  'PRIMER_RIGHT_SEQUENCE': 'ATGTGGTAGCGCCTCTCTCT

### Check out this Built-in Function!



In [42]:
!faidx ../test/test-data/AcCHR1_test.fasta CHR1:2879-3022 CHR1:2878-3022 CHR1:2879-3026 CHR1:2878-3026

>CHR1:2879-3022
ACTGTGTCGCACTGGGTTTTAAAGGAAATAAATAAATATGGAATAAAACATTGATATTACAAATAAAGGGTGCTTCTAGC
TGAGTAGTCCTCCGATAAAGCACACGCATACAAAGGAATGAGAGAGAGAGAGAGAGGCGCTACC
>CHR1:2878-3022
AACTGTGTCGCACTGGGTTTTAAAGGAAATAAATAAATATGGAATAAAACATTGATATTACAAATAAAGGGTGCTTCTAG
CTGAGTAGTCCTCCGATAAAGCACACGCATACAAAGGAATGAGAGAGAGAGAGAGAGGCGCTACC
>CHR1:2879-3026
ACTGTGTCGCACTGGGTTTTAAAGGAAATAAATAAATATGGAATAAAACATTGATATTACAAATAAAGGGTGCTTCTAGC
TGAGTAGTCCTCCGATAAAGCACACGCATACAAAGGAATGAGAGAGAGAGAGAGAGGCGCTACCACAT
>CHR1:2878-3026
AACTGTGTCGCACTGGGTTTTAAAGGAAATAAATAAATATGGAATAAAACATTGATATTACAAATAAAGGGTGCTTCTAG
CTGAGTAGTCCTCCGATAAAGCACACGCATACAAAGGAATGAGAGAGAGAGAGAGAGGCGCTACCACAT


In [44]:
test={
        'SEQUENCE_ID': 'MH1000',
        'TARGET_ID': 'MH1000:',
        'SEQUENCE_TEMPLATE': 'GCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCTACATTTT'
                             'AGCATCAGTGAGTACAGCATGCTTACTGGAAGAGAGGGTCATGCA'
                             'ACAGATTAGGAGGTAAGTTTGCAAAGGCAGGCTAAGGAGGAGACG'
                             'CACTGAATGCCATGGTAAGAACTCTGGACATAAAAATATTGGAAG'
                             'TTGTTGAGCAAGTAAAAAAAATGTTTGGAAGTGTTACTTTAGCAA'
                             'TGGCAAGAATGATAGTATGGAATAGATTGGCAGAATGAAGGCAAA'
                             'ATGATTAGACATATTGCATTAAGGTAAAAAATGATAACTGAAGAA'
                             'TTATGTGCCACACTTATTAATAAGAAAGAATATGTGAACCTTGCA'
                             'GATGTTTCCCTCTAGTAG',
        'SEQUENCE_INCLUDED_REGION': [36, 342]}

test_glob== {
        'PRIMER_OPT_SIZE': 20,
        'PRIMER_PICK_INTERNAL_OLIGO': 1,
        'PRIMER_INTERNAL_MAX_SELF_END': 8,
        'PRIMER_MIN_SIZE': 18,
        'PRIMER_MAX_SIZE': 25,
        'PRIMER_OPT_TM': 60.0,
        'PRIMER_MIN_TM': 57.0,
        'PRIMER_MAX_TM': 63.0,
        'PRIMER_MIN_GC': 20.0,
        'PRIMER_MAX_GC': 80.0,
        'PRIMER_MAX_POLY_X': 100,
        'PRIMER_INTERNAL_MAX_POLY_X': 100,
        'PRIMER_SALT_MONOVALENT': 50.0,
        'PRIMER_DNA_CONC': 50.0,
        'PRIMER_MAX_NS_ACCEPTED': 0,
        'PRIMER_MAX_SELF_ANY': 12,
        'PRIMER_MAX_SELF_END': 8,
        'PRIMER_PAIR_MAX_COMPL_ANY': 12,
        'PRIMER_PAIR_MAX_COMPL_END': 8,
        'PRIMER_PRODUCT_SIZE_RANGE': [[75, 100], [100, 125], [125, 150], [150, 175], [175, 200], [200, 225]]}

In [45]:
run_p3.run_P3(global_dict=p3_globals,target_dict=test)

[{'AMPLICON_REGION': 'MH1000:71-133',
  'PRIMER_LEFT': (70, 20),
  'PRIMER_LEFT_SEQUENCE': 'CTGGAAGAGAGGGTCATGCA',
  'PRIMER_RIGHT': (132, 20),
  'PRIMER_RIGHT_SEQUENCE': 'TCTCCTCCTTAGCCTGCCTT',
  'SEQUENCE_ID': 'MH1000',
  'TARGET_ID': 'MH1000:'},
 {'AMPLICON_REGION': 'MH1000:70-133',
  'PRIMER_LEFT': (69, 20),
  'PRIMER_LEFT_SEQUENCE': 'ACTGGAAGAGAGGGTCATGC',
  'PRIMER_RIGHT': (132, 20),
  'PRIMER_RIGHT_SEQUENCE': 'TCTCCTCCTTAGCCTGCCTT',
  'SEQUENCE_ID': 'MH1000',
  'TARGET_ID': 'MH1000:'},
 {'AMPLICON_REGION': 'MH1000:73-133',
  'PRIMER_LEFT': (72, 21),
  'PRIMER_LEFT_SEQUENCE': 'GGAAGAGAGGGTCATGCAACA',
  'PRIMER_RIGHT': (132, 20),
  'PRIMER_RIGHT_SEQUENCE': 'TCTCCTCCTTAGCCTGCCTT',
  'SEQUENCE_ID': 'MH1000',
  'TARGET_ID': 'MH1000:'},
 {'AMPLICON_REGION': 'MH1000:72-133',
  'PRIMER_LEFT': (71, 21),
  'PRIMER_LEFT_SEQUENCE': 'TGGAAGAGAGGGTCATGCAAC',
  'PRIMER_RIGHT': (132, 20),
  'PRIMER_RIGHT_SEQUENCE': 'TCTCCTCCTTAGCCTGCCTT',
  'SEQUENCE_ID': 'MH1000',
  'TARGET_ID': 'MH1000:'}]