# Prototyping Usage for VCF Design

### John McCallum Feb 2017

Use autoreload for development to reload modules automagically

In [64]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [63]:
from pcr_marker_design import design as d 
from pcr_marker_design import run_p3
from pybedtools import BedTool, Interval
from pyfaidx import Fasta , FastaVariant

In [6]:
ls ../test/test-data

384um_251453690362217.txt      Chr9_Myb210.vcf
AcCHR1_test.fasta              Chr9_Myb210.vcf.gz
AcCHR1_test.fasta.fai          targets
AcCHR1_test.phased.vcf.gz      targets.fasta
AcCHR1_test.phased.vcf.gz.tbi  targets.fasta.fai
AcCHR1_test.vcf.gz             targets.gff
AcCHR1_test.vcf.gz.tbi         targets.snps.bed
CHR9.1.68.5.fasta.gz


In [7]:
test_seq = "../test/test-data/AcCHR1_test.fasta"
vcffile = "../test/test-data/AcCHR1_test.vcf.gz"
designer = d.VcfPrimerDesign(test_seq, vcffile, "TestCHR1")



In [8]:
designer.annot.samples

['CK51_02',
 'CK51_05',
 'CK51_06',
 'CK51_09',
 'CK51_11',
 'Hort16A',
 'Hort22d',
 'Russell',
 'T03.51-11-28f',
 'T94.30-03-10f',
 'T94.30-04-08b',
 'T94.30-04-08c',
 'T94.30-04-08d']

### Create a BedTool Target

In [9]:
target = BedTool('CHR1 3000 3001', from_string=True)

### Create a target Interval

In [91]:
target_interval=Interval('CHR1',3000,3001)

In [84]:
target_interval.chrom

'CHR1'

In [85]:
target_interval.start

3000

In [90]:
target_interval.length

1

In [None]:
BedTool.

at 2c01be2662fe6466c3826b5a06ec81b9472f9f4c this needs a Bedtool length 1

In [93]:
designer.getseqslicedict(target,100)

AttributeError: 'BedTool' object has no attribute 'chrom'

Doesn't work with INterval

In [106]:
designer.getseqslicedict(target_interval,100)

NameError: name 'target' is not defined

In [100]:
print('\t'.join(map(str,['1dd','2','3'])))

1dd	2	3


### Design

-set globals defaults

In [11]:
p3_globals={
        'PRIMER_OPT_SIZE': 20,
        'PRIMER_PICK_INTERNAL_OLIGO': 0,
        'PRIMER_INTERNAL_MAX_SELF_END': 8,
        'PRIMER_MIN_SIZE': 18,
        'PRIMER_MAX_SIZE': 25,
        'PRIMER_OPT_TM': 60.0,
        'PRIMER_MIN_TM': 57.0,
        'PRIMER_MAX_TM': 63.0,
        'PRIMER_MIN_GC': 20.0,
        'PRIMER_MAX_GC': 80.0,
        'PRIMER_MAX_POLY_X': 100,
        'PRIMER_INTERNAL_MAX_POLY_X': 100,
        'PRIMER_SALT_MONOVALENT': 50.0,
        'PRIMER_DNA_CONC': 50.0,
        'PRIMER_MAX_NS_ACCEPTED': 0,
        'PRIMER_MAX_SELF_ANY': 12,
        'PRIMER_MAX_SELF_END': 8,
        'PRIMER_PAIR_MAX_COMPL_ANY': 12,
        'PRIMER_PAIR_MAX_COMPL_END': 8,
        'PRIMER_PRODUCT_SIZE_RANGE': [60,250],
    }

In [48]:
designer.getseqslicedict(target,250)

{'REF_OFFSET': 2750,
 'SEQUENCE_EXCLUDED_REGION': [(43, 1),
  (122, 1),
  (157, 1),
  (176, 1),
  (215, 1),
  (243, 1),
  (289, 1),
  (411, 1),
  (472, 1)],
 'SEQUENCE_ID': 'CHR1:2750-3251',
 'SEQUENCE_TARGET': (250, 1),
 'SEQUENCE_TEMPLATE': 'CTCAATTTCTTTAGAAGCTTCCAGAGTTGTTGAATTGGCAGCGGCAACTACAGTCGCAACTGTTCCTAGCTTTGCAGAACCATTCCCACTCAAGGAATTCACGGACTCTTTATGTGCCTTCAGAACCAACTGTGTCGCACTGGGTTTTAAAGGAAATAAATAAATATGGAATAAAACATTGATATTACAAATAAAGGGTGCTTCTAGCTGAGTAGTCCTCCGATAAAGCACACGCATACAAAGGAATGAGAGAGAGAGAGAGAGGCGCTACCACATATAAAAGGGACAGCAAACATTTTAACATGAGCAAATCAGTGACACTAGGTAGGTGTTAGCACAAAAATGAACCTTGTTTACATCTGTTCACCACATCCTAGAACATCTTAGACACACACTGCAATAACATATGAGGTGGAGCATGGCACAGTGATACTGCAACAGTAGGATTCCCTGTAACTCTAATGCAACTTTTCATGTACTCAGCCTCTCAAATGATATCGC',
 'TARGET_ID': 'CHR1:3001-3001'}

In [49]:
run_p3.run_P3(global_dict=p3_globals,target_dict=designer.getseqslicedict(target,250))

[{'AMPLICON_REGION': 'CHR1:2879-3022',
  'PRIMER_LEFT': (2878, 20),
  'PRIMER_LEFT_SEQUENCE': 'ACTGTGTCGCACTGGGTTTT',
  'PRIMER_RIGHT': (3021, 20),
  'PRIMER_RIGHT_SEQUENCE': 'GGTAGCGCCTCTCTCTCTCT',
  'SEQUENCE_ID': 'CHR1:2750-3251',
  'TARGET_ID': 'CHR1:3001-3001'},
 {'AMPLICON_REGION': 'CHR1:2878-3022',
  'PRIMER_LEFT': (2877, 20),
  'PRIMER_LEFT_SEQUENCE': 'AACTGTGTCGCACTGGGTTT',
  'PRIMER_RIGHT': (3021, 20),
  'PRIMER_RIGHT_SEQUENCE': 'GGTAGCGCCTCTCTCTCTCT',
  'SEQUENCE_ID': 'CHR1:2750-3251',
  'TARGET_ID': 'CHR1:3001-3001'},
 {'AMPLICON_REGION': 'CHR1:2879-3026',
  'PRIMER_LEFT': (2878, 20),
  'PRIMER_LEFT_SEQUENCE': 'ACTGTGTCGCACTGGGTTTT',
  'PRIMER_RIGHT': (3025, 20),
  'PRIMER_RIGHT_SEQUENCE': 'ATGTGGTAGCGCCTCTCTCT',
  'SEQUENCE_ID': 'CHR1:2750-3251',
  'TARGET_ID': 'CHR1:3001-3001'},
 {'AMPLICON_REGION': 'CHR1:2878-3026',
  'PRIMER_LEFT': (2877, 20),
  'PRIMER_LEFT_SEQUENCE': 'AACTGTGTCGCACTGGGTTT',
  'PRIMER_RIGHT': (3025, 20),
  'PRIMER_RIGHT_SEQUENCE': 'ATGTGGTAGCGCCTCTCTCT

### Check out this Built-in Function!

eg

```
faidx ../test/test-data/AcCHR1_test.fasta CHR1:2879-3022 CHR1:2878-3022 CHR1:2879-3026 CHR1:2878-3026
```


### Try out Variant Features in PyFaidx

In [50]:
from pyfaidx import FastaVariant, Fasta
ref=Fasta(test_seq,as_raw=True)
alt=FastaVariant(test_seq,vcffile,het=True, hom=True,sample=None, as_raw=True)
consensus_1=FastaVariant(test_seq,vcffile,het=True, hom=True,sample='CK51_02')
consensus_2=FastaVariant(test_seq,vcffile,het=True, hom=True,sample='CK51_09')



In [51]:
alt['CHR1'].variant_sites[:10]

(436, 542, 1024, 1218, 1604, 1634, 1893, 2085, 2143, 2241)

In [52]:
ref['CHR1'][430:450]

'TCCTAGCTTCTGTTGCTGGA'

In [53]:
alt['CHR1'][430:450]

'TCCTACCTTCTGTTGCTGGA'

### Melt the Ref and Alt consensus sequences at uMelt

In [54]:
from pcr_marker_design import umelt_service as um

In [26]:
## !export https_proxy=

In [27]:
umelt=um.UmeltService()
refmelt= um.MeltSeq(ref['CHR1'][430:450])
altmelt= um.MeltSeq(ref['CHR1'][430:450])

In [28]:
ref_melt_Tm=umelt.get_helicity_info(umelt.get_response(refmelt)).get_melting_temp()

In [29]:
alt_melt_Tm=umelt.get_helicity_info(umelt.get_response(altmelt)).get_melting_temp()

In [30]:
ref_melt_Tm

68.905500705218614

In [31]:
alt_melt_Tm

68.905500705218614

In [32]:
run_p3.run_P3(global_dict=p3_globals,
              target_dict=designer.getseqslicedict(BedTool('CHR1 2492 2493', from_string=True),250))

[{'AMPLICON_REGION': 'CHR1:2465-2699',
  'PRIMER_LEFT': (2464, 21),
  'PRIMER_LEFT_SEQUENCE': 'AGAGTTCTGAGTTCCTCGTGT',
  'PRIMER_RIGHT': (2698, 20),
  'PRIMER_RIGHT_SEQUENCE': 'CCAACCGATGGTCCAATGGA',
  'SEQUENCE_ID': 'CHR1:2242-2743',
  'TARGET_ID': 'CHR1:2493-2493'},
 {'AMPLICON_REGION': 'CHR1:2464-2699',
  'PRIMER_LEFT': (2463, 22),
  'PRIMER_LEFT_SEQUENCE': 'AAGAGTTCTGAGTTCCTCGTGT',
  'PRIMER_RIGHT': (2698, 20),
  'PRIMER_RIGHT_SEQUENCE': 'CCAACCGATGGTCCAATGGA',
  'SEQUENCE_ID': 'CHR1:2242-2743',
  'TARGET_ID': 'CHR1:2493-2493'},
 {'AMPLICON_REGION': 'CHR1:2465-2688',
  'PRIMER_LEFT': (2464, 21),
  'PRIMER_LEFT_SEQUENCE': 'AGAGTTCTGAGTTCCTCGTGT',
  'PRIMER_RIGHT': (2687, 20),
  'PRIMER_RIGHT_SEQUENCE': 'TCCAATGGATGACCTCACGG',
  'SEQUENCE_ID': 'CHR1:2242-2743',
  'TARGET_ID': 'CHR1:2493-2493'},
 {'AMPLICON_REGION': 'CHR1:2463-2699',
  'PRIMER_LEFT': (2462, 23),
  'PRIMER_LEFT_SEQUENCE': 'AAAGAGTTCTGAGTTCCTCGTGT',
  'PRIMER_RIGHT': (2698, 20),
  'PRIMER_RIGHT_SEQUENCE': 'CCAACCGATGGTC

### Try Flanking= False option and Melt Amplicons

In [55]:
import pandas as pd

In [56]:
test=run_p3.run_P3(global_dict=p3_globals,
              target_dict=designer.getseqslicedict(BedTool('CHR1 2400 2700', from_string=True),250,flanking= False))

In [57]:
test

[{'AMPLICON_REGION': 'CHR1:2547-2699',
  'PRIMER_LEFT': (2546, 20),
  'PRIMER_LEFT_SEQUENCE': 'TGATGTAGGTCGTTTGGCCT',
  'PRIMER_RIGHT': (2698, 20),
  'PRIMER_RIGHT_SEQUENCE': 'CCAACCGATGGTCCAATGGA',
  'SEQUENCE_ID': 'CHR1:2400-2700',
  'TARGET_ID': 'CHR1:2401-2700'},
 {'AMPLICON_REGION': 'CHR1:2554-2699',
  'PRIMER_LEFT': (2553, 20),
  'PRIMER_LEFT_SEQUENCE': 'GGTCGTTTGGCCTAATTCCC',
  'PRIMER_RIGHT': (2698, 20),
  'PRIMER_RIGHT_SEQUENCE': 'CCAACCGATGGTCCAATGGA',
  'SEQUENCE_ID': 'CHR1:2400-2700',
  'TARGET_ID': 'CHR1:2401-2700'},
 {'AMPLICON_REGION': 'CHR1:2544-2699',
  'PRIMER_LEFT': (2543, 21),
  'PRIMER_LEFT_SEQUENCE': 'GCTTGATGTAGGTCGTTTGGC',
  'PRIMER_RIGHT': (2698, 20),
  'PRIMER_RIGHT_SEQUENCE': 'CCAACCGATGGTCCAATGGA',
  'SEQUENCE_ID': 'CHR1:2400-2700',
  'TARGET_ID': 'CHR1:2401-2700'},
 {'AMPLICON_REGION': 'CHR1:2553-2699',
  'PRIMER_LEFT': (2552, 21),
  'PRIMER_LEFT_SEQUENCE': 'AGGTCGTTTGGCCTAATTCCC',
  'PRIMER_RIGHT': (2698, 20),
  'PRIMER_RIGHT_SEQUENCE': 'CCAACCGATGGTCCAATG

In [58]:
test_df=pd.DataFrame.from_dict(test)
test_df

Unnamed: 0,AMPLICON_REGION,PRIMER_LEFT,PRIMER_LEFT_SEQUENCE,PRIMER_RIGHT,PRIMER_RIGHT_SEQUENCE,SEQUENCE_ID,TARGET_ID
0,CHR1:2547-2699,"(2546, 20)",TGATGTAGGTCGTTTGGCCT,"(2698, 20)",CCAACCGATGGTCCAATGGA,CHR1:2400-2700,CHR1:2401-2700
1,CHR1:2554-2699,"(2553, 20)",GGTCGTTTGGCCTAATTCCC,"(2698, 20)",CCAACCGATGGTCCAATGGA,CHR1:2400-2700,CHR1:2401-2700
2,CHR1:2544-2699,"(2543, 21)",GCTTGATGTAGGTCGTTTGGC,"(2698, 20)",CCAACCGATGGTCCAATGGA,CHR1:2400-2700,CHR1:2401-2700
3,CHR1:2553-2699,"(2552, 21)",AGGTCGTTTGGCCTAATTCCC,"(2698, 20)",CCAACCGATGGTCCAATGGA,CHR1:2400-2700,CHR1:2401-2700


In [59]:
test_df.AMPLICON_REGION

0    CHR1:2547-2699
1    CHR1:2554-2699
2    CHR1:2544-2699
3    CHR1:2553-2699
Name: AMPLICON_REGION, dtype: object

In [60]:
designer.meltSlice(test_df.AMPLICON_REGION[0])

(86.380112834978846, 86.680535966149506)

In [61]:
test_df['Melt']=test_df['AMPLICON_REGION'].apply(designer.meltSlice)

In [62]:
test_df['TmDiff']=test_df.Melt.apply(lambda X : abs(X[0]-X[1]))
test_df

Unnamed: 0,AMPLICON_REGION,PRIMER_LEFT,PRIMER_LEFT_SEQUENCE,PRIMER_RIGHT,PRIMER_RIGHT_SEQUENCE,SEQUENCE_ID,TARGET_ID,Melt,TmDiff
0,CHR1:2547-2699,"(2546, 20)",TGATGTAGGTCGTTTGGCCT,"(2698, 20)",CCAACCGATGGTCCAATGGA,CHR1:2400-2700,CHR1:2401-2700,"(86.380112835, 86.6805359661)",0.300423
1,CHR1:2554-2699,"(2553, 20)",GGTCGTTTGGCCTAATTCCC,"(2698, 20)",CCAACCGATGGTCCAATGGA,CHR1:2400-2700,CHR1:2401-2700,"(86.2299012694, 86.3300423131)",0.100141
2,CHR1:2544-2699,"(2543, 21)",GCTTGATGTAGGTCGTTTGGC,"(2698, 20)",CCAACCGATGGTCCAATGGA,CHR1:2400-2700,CHR1:2401-2700,"(86.4802538787, 86.730606488)",0.250353
3,CHR1:2553-2699,"(2552, 21)",AGGTCGTTTGGCCTAATTCCC,"(2698, 20)",CCAACCGATGGTCCAATGGA,CHR1:2400-2700,CHR1:2401-2700,"(86.2799717913, 86.380112835)",0.100141


### Check on Prototyped melt method

In [41]:
designer.meltSlice('CHR1:2879-3022')

(87.381523272214395, 86.980959097320167)

In [42]:
!/Users/johnmccallum/miniconda3/envs/Py3PCR/bin/faidx ../test/test-data/AcCHR1_test.fasta CHR1:2879-3022

>CHR1:2879-3022
ACTGTGTCGCACTGGGTTTTAAAGGAAATAAATAAATATGGAATAAAACATTGATATTACAAATAAAGGGTGCTTCTAGC
TGAGTAGTCCTCCGATAAAGCACACGCATACAAAGGAATGAGAGAGAGAGAGAGAGGCGCTACC
