# Idea: ABcache
## getting a precomputed ABcache file containing the parameters for beta-binomial distribution from the PoN-list

# Running EBfilter createCache on testdata

## setting the config

In [1]:
cd ..

/Users/martinscience/Sites/Python/EBFilter


In [2]:
from codes import run

# snakemake config
config = {'EB':{'run': True}}
params = {}
params['map_quality'] = 20
params['base_quality'] = 15
params['filter_flags'] = 'UNMAP,SECONDARY,QCFAIL,DUP'
params['fitting_penalty'] = .5
params['caching'] = True
# to simulate snakemake behavior
config['EB']['threads'] = 1
config['EB']['params'] = params
config['EB']
config['annovar'] = {'sep': '\t'}
config['EB']['log'] = 'output/logfile'


params = config['EB']['params']
threads = config['EB']['threads']
log = config['EB']['log']
sep = config['annovar']['sep']
_q = str(params['map_quality'])  # mapping quality=20
_Q = params['base_quality']      # base quality=15
fit_pen = params['fitting_penalty']
filter_quals = ''
for qual in range( 33, 33 + _Q ): 
    filter_quals += chr( qual )  # qual asciis for filtering out
_ff = params['filter_flags']     # 'UNMAP,SECONDARY,QCFAIL,DUP'
config = {'q':_q, 'Q':_Q, 'filter_quals': filter_quals, 'log':log, 'fitting_penalty': fit_pen, 'ff':_ff, 'threads':threads, 'sep': sep}


## running makeEBcache on testdata

In [3]:
args = {}
args['pon_list'] = 'testdata/PoN_list.txt'
args['cache_folder'] = 'testdata/testdata_cache' # provide a folder for storing the data (snakemake config)
args['force_caching'] = True                 # force cache generation although no bed_file is provided
args['bed_file'] = None # 'testdata/input.bed'
args['generate_cache'] = True

In [None]:
config['threads'] = 3
config['debug_mode'] = False
run.main(args, config)
!ls output

Generating Cache...
Generating  pileup for chromosome chr11..
Splitting bam files for chromosome chr11..
Writing pileup of Chr chr11 to file testdata/testdata_cache/cache_pileups/cache_chr11.pileup
Reading pileup testdata/testdata_cache/cache_pileups/cache_chr11.pileup for AB computation
Splitting the 457702 lines of chr11.pileup into 3 chunks for multithreaded computation..
Process 7973: Computing ABs for Chr chr11 	(lines 0	 to 	152568)
Process 7974: Computing ABs for Chr chr11 	(lines 152568	 to 	305135)
Process 7975: Computing ABs for Chr chr11 	(lines 305135	 to 	457702)


### using CLI:

In [None]:
!makeEBcache -t3 -force_caching testdata/PoN_list.txt testdata/testdata_cache

## running EBscore in cache_mode on testdata

In [None]:
args = {}
args['mut_file'] = 'testdata/input.anno'
args['tumor_bam'] = 'testdata/tumor.bam'
args['pon_list'] = 'testdata/PoN_list.txt'
args['output_path'] = 'output/testdata_eb.csv'
args['use_cache'] = 'testdata/testdata_cache'
args['chromosome'] = 'chr11'
args['generate_cache'] = False

In [None]:
config['threads'] = 20
config['debug_mode'] = False
run.main(args, config)
!ls output

### using CLI:

In [None]:
!EBscore -t 3 -use_cache testdata/testdata_cache testdata/input.anno testdata/tumor.bam testdata/PoN_list.txt output/testdata_EB.csv

## Running EBfilter createCache on my data

In [None]:
import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['pon_list'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list'
args['cache_folder'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache'
args['force_caching'] = True
args['bed_file'] = None
args['generate_cache'] = True

In [None]:
config['threads'] = 3
config['debug_mode'] = True

### using CLI:

In [None]:
!makeEBcache -t3 -force_caching ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache


## Running EBscore in cache_mode on my data

In [None]:
import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['mut_file'] = f'{Home}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/anno/test_rel.csv'
args['tumor_bam'] = f'{Home}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/bam/test_Rel1.bam'
args['pon_list'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list'
args['use_cache'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache'
args['output_path'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/output/test_eb.csv'
args['chromosome'] = 'chr11'
args['generate_cache'] = False

In [None]:
config['threads'] = 40
config['debug_mode'] = True
run.main(args, config)

### using CLI:

In [None]:
!EBscore -t 3 -use_cache ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache ~//Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/anno/test_rel.csv ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/bam/test_Rel1.bam ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/output/test_eb.csv


## Running EBfilter createCache in BIHCluster on real data

In [None]:
import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['pon_list'] = f'{HOME}/work/static/ref/PoN/AML_Pon.txt'
args['cache_folder'] = f'{HOME}/work/static/ref/PoN/AML_cache'
args['force_caching'] = False
args['chromosome'] = '22'
args['bed_file'] = f'{HOME}/work/static/ref/bed_files/SureSelect/hg19/SS_HAEv6r2_hg19_Padded_nochr.bed'
args['generate_cache'] = True


In [None]:
config['threads'] = 40
config['debug_mode'] = True
run.main(args, config)

### using CLI:

In [None]:
!makeEBcache -t3 -bed_file ~/work/static/ref/bed_files/SureSelect/hg19/SS_HAEv6r2_hg19_Padded_nochr.bed ~/work/static/ref/PoN/AML_Pon.txt ~/work/static/ref/PoN/AML_cache


## Running EBscore in cache_mode in BIHCluster on real data

In [None]:
from codes import run

# snakemake config
config = {'EB':{'run': True}}
params = {}
params['map_quality'] = 20
params['base_quality'] = 15
params['filter_flags'] = 'UNMAP,SECONDARY,QCFAIL,DUP'
params['fitting_penalty'] = .5
params['caching'] = True
# to simulate snakemake behavior
config['EB']['threads'] = 1
config['EB']['params'] = params
config['EB']
config['annovar'] = {'sep': '\t'}
config['EB']['log'] = 'output/logfile'


params = config['EB']['params']
threads = config['EB']['threads']
log = config['EB']['log']
sep = config['annovar']['sep']
_q = str(params['map_quality'])  # mapping quality=20
_Q = params['base_quality']      # base quality=15
fit_pen = params['fitting_penalty']
filter_quals = ''
for qual in range( 33, 33 + _Q ): 
    filter_quals += chr( qual )  # qual asciis for filtering out
_ff = params['filter_flags']     # 'UNMAP,SECONDARY,QCFAIL,DUP'
config = {'q':_q, 'Q':_Q, 'filter_quals': filter_quals, 'log':log, 'fitting_penalty': fit_pen, 'ff':_ff, 'threads':threads, 'sep': sep}


import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['mut_file'] = 'testdata2/anno/test_D-CR1_bull.csv'
args['sep'] = ','
args['output_path'] = 'output/test_D_EB.csv'
args['tumor_bam'] = 'testdata2/bam/test_D.bam'
args['pon_list'] = f'{HOME}/work/static/ref/PoN/AML_Pon.txt'
args['use_cache'] = f'{HOME}/work/static/ref/PoN/AML_cache'
args['chromosome'] = '22'
args['generate_cache'] = False


config['threads'] = 40
config['debug_mode'] = False
run.main(args, config)

In [None]:
config['threads'] = 40
config['debug_mode'] = False
run.main(args, config)

In [61]:
import pandas as pd
eb = pd.read_csv('output/test_D_EB.csv', sep='\t')
pon_list = ['22']
eb = eb.loc[eb['Chr'] == 22]
eb

Unnamed: 0,Chr,Start,End,Ref,Alt,mut_type,somatic_status,gene,detail,func,...,TR2+,TR2-,superDups,esp6500_all,1000g,snp138,cosmic70,ljb23_pp2hdiv,ljb23_sift,EB_score
0,22,24087023,24087023,C,G,snp,Germline,ZNF70,,exonic,...,2,5,,,0.0002,,,"0.001,B","0.91,0.09,T",2.034
1,22,38119755,38119757,CAA,-,indel,Germline,TRIOBP,,exonic,...,7,1,,,0.336262,,,,,0.47
2,22,38120176,38120178,CCT,-,indel,Germline,TRIOBP,,exonic,...,6,3,,,0.250399,rs36219868,ID=COSM111672;OCCURENCE=1(ovary),,,0.427
3,22,39777822,39777822,-,CAA,indel,Germline,SYNGR1,,exonic,...,4,3,,,0.860823,rs149306472,ID=COSM1484245;OCCURENCE=2(breast),,,0.0
4,22,44282293,44282293,G,C,snp,Germline,PNPLA5,,exonic,...,3,2,,,0.0002,,,"0.022,B","0.37,0.63,T",1.96


In [2]:
open('output/test', 'a').close()