# Idea: ABcache
## getting a precomputed ABcache file containing the parameters for beta-binomial distribution from the PoN-list

# Running EBfilter createCache on testdata

## setting the config

In [1]:
cd ..

/Users/martinscience/Sites/Python/EBFilter


In [2]:
from codes import run

# snakemake config
config = {'EB':{'run': True}}
params = {}
params['map_quality'] = 20
params['base_quality'] = 15
params['filter_flags'] = 'UNMAP,SECONDARY,QCFAIL,DUP'
params['fitting_penalty'] = .5
params['caching'] = True
# to simulate snakemake behavior
config['EB']['threads'] = 1
config['EB']['params'] = params
config['EB']
config['annovar'] = {'sep': '\t'}
config['EB']['log'] = 'output/logfile'


params = config['EB']['params']
threads = config['EB']['threads']
log = config['EB']['log']
sep = config['annovar']['sep']
_q = str(params['map_quality'])  # mapping quality=20
_Q = params['base_quality']      # base quality=15
fit_pen = params['fitting_penalty']
filter_quals = ''
for qual in range( 33, 33 + _Q ): 
    filter_quals += chr( qual )  # qual asciis for filtering out
_ff = params['filter_flags']     # 'UNMAP,SECONDARY,QCFAIL,DUP'
config = {'q':_q, 'Q':_Q, 'filter_quals': filter_quals, 'log':log, 'fitting_penalty': fit_pen, 'ff':_ff, 'threads':threads, 'sep': sep}


## running makeEBcache on testdata (takes ~2h on 3 cores)
* is not necessary for testing the cache mode
* a precomputed EBcache is stored in testdata/precom_testcache

In [3]:
args = {}
args['pon_list'] = 'testdata/PoN_list.txt'
args['cache_folder'] = 'testdata/testdata_cache' # provide a folder for storing the data (snakemake config)
args['force_caching'] = True               # force cache generation although no bed_file is provided
args['bed_file'] = None # 'testdata/input.bed'
args['generate_cache'] = True


In [4]:
config['threads'] = 3
config['debug_mode'] = False
run.main(args, config)
!ls output

Validating PoN list testdata/PoN_list.txt..
2019-05-04 10:55:17 Generating Cache for chromosome chr11.. 
10:55:17 Splitting bam files of PoN for chromosome chr11..
10:55:21 Writing pon list for chromosome chr11..
10:55:21 Generating pileup for chromosome chr11..
10:55:50: Writing pileup of Chr chr11 to file testdata/testdata_cache/cache_pileups/cache_chr11.pileup
[{'file': 'testdata/testdata_cache/cache_pileups/cache_chr11.pileup', 'chr': 'chr11', 'pileup_len': 457702}]
10:56:02: Reading pileup testdata/testdata_cache/cache_pileups/cache_chr11.pileup for AB computation
Process 13229: 0 lines (0.0%) of Chr chr11.	|                         |
Process 13230: 10000 lines (2.2%) of Chr chr11.	|.                        |
Process 13231: 20000 lines (4.4%) of Chr chr11.	|..                       |
Process 13229: 30000 lines (6.6%) of Chr chr11.	|..                       |
Process 13230: 40000 lines (8.7%) of Chr chr11.	|...                      |
Process 13231: 50000 lines (10.9%) of Chr chr11.

### using CLI:

In [5]:
!./makeEBcache -t 3 -force_caching testdata/PoN_list.txt testdata/testdata_cache

Traceback (most recent call last):
  File "./makeEBcache", line 3, in <module>
    from ebfilter.run import main
ModuleNotFoundError: No module named 'ebfilter'


## running EBscore in cache_mode on testdata
* you can use the cache file generated in the last step or the precomputed one
* for using precomputed file, just change the args['use_cache'] value

In [6]:
args = {}
args['mut_file'] = 'testdata/input.anno'
args['tumor_bam'] = 'testdata/tumor.bam'
args['pon_list'] = 'testdata/PoN_list.txt'
args['output_path'] = 'output/testdata_eb.csv'
# toggle outcomment in the next two lines for use of precomputed cache file
args['use_cache'] = 'testdata/testdata_cache' 
# args['use_cache'] = 'testdata/precom_testcache'
args['chromosome'] = 'chr11'
args['generate_cache'] = False

In [7]:
config['threads'] = 3
config['debug_mode'] = False
run.main(args, config)
!ls output

Validating PoN list testdata/PoN_list.txt..
Running EBscore in EBcache mode...
Loading annotation file testdata/input.anno..
Loading cache testdata/testdata_cache/chr11.cache..
Drawing BB parameters from cache and piling up the target bam..
Writing annotation file output/testdata_eb.csv with EBscores to disc..
EBscore is finished!
testdata_eb.csv


### using CLI:

In [None]:
!EBscore -t 3 -use_cache testdata/testdata_cache testdata/input.anno testdata/tumor.bam testdata/PoN_list.txt output/testdata_EB.csv

## Running EBfilter createCache on my data (takes ~5h on 3 cores)

In [None]:
import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['pon_list'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list'
args['cache_folder'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache'
args['force_caching'] = False
args['bed_file'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/HAEv7.bed'
args['chrom'] = '22'
args['generate_cache'] = True

In [None]:
config['threads'] = 3
config['debug_mode'] = False
run.main(args, config)
!ls output

### using CLI:

In [None]:
!makeEBcache -t3 -force_caching ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache

## Running EBscore in cache_mode on my data

In [None]:
import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['mut_file'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/anno/test_rel.csv'
args['tumor_bam'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/bam/test_Rel1.bam'
args['pon_list'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list'
args['use_cache'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache'
args['output_path'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/output/test_eb.csv'
args['chromosome'] = 'chr11'
args['generate_cache'] = False

In [None]:
config['threads'] = 40
config['debug_mode'] = True
run.main(args, config)

### using CLI:

In [None]:
!EBscore -t 3 -use_cache ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache ~//Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/anno/test_rel.csv ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/bam/test_Rel1.bam ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/output/test_eb.csv


## Running EBfilter createCache in BIHCluster on real data

In [None]:
import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['pon_list'] = f'{HOME}/work/static/ref/PoN/AML_Pon.txt'
args['cache_folder'] = f'{HOME}/work/static/ref/PoN/AML_cache'
args['force_caching'] = False
args['chromosome'] = '22'
args['bed_file'] = f'{HOME}/work/static/ref/bed_files/SureSelect/hg19/SS_HAEv6r2_hg19_Padded_nochr.bed'
args['generate_cache'] = True


In [None]:
config['threads'] = 40
config['debug_mode'] = True
run.main(args, config)

In [None]:
from codes import run

# snakemake config
config = {'EB':{'run': True}}
params = {}
params['map_quality'] = 20
params['base_quality'] = 15
params['filter_flags'] = 'UNMAP,SECONDARY,QCFAIL,DUP'
params['fitting_penalty'] = .5
params['caching'] = True
# to simulate snakemake behavior
config['EB']['threads'] = 1
config['EB']['params'] = params
config['EB']
config['annovar'] = {'sep': '\t'}
config['EB']['log'] = 'output/logfile'


params = config['EB']['params']
threads = config['EB']['threads']
log = config['EB']['log']
sep = config['annovar']['sep']
_q = str(params['map_quality'])  # mapping quality=20
_Q = params['base_quality']      # base quality=15
fit_pen = params['fitting_penalty']
filter_quals = ''
for qual in range( 33, 33 + _Q ): 
    filter_quals += chr( qual )  # qual asciis for filtering out

_ff = params['filter_flags']     # 'UNMAP,SECONDARY,QCFAIL,DUP'
config = {'q':_q, 'Q':_Q, 'filter_quals': filter_quals, 'log':log, 'fitting_penalty': fit_pen, 'ff':_ff, 'threads':threads, 'sep': sep}

import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['pon_list'] = f'{HOME}/work/static/ref/PoN/AML_Pon.txt'
args['cache_folder'] = f'{HOME}/work/static/ref/PoN/AML_cache'
args['force_caching'] = False
args['chrom'] = '14'
args['bed_file'] = f'{HOME}/work/static/ref/bed_files/SureSelect/hg19/SS_HAEv6r2_hg19_Padded_nochr.bed'
args['generate_cache'] = True

config['threads'] = 20
config['debug_mode'] = True
run.main(args, config)

### using CLI:

In [None]:
!makeEBcache -t3 -bed_file ~/work/static/ref/bed_files/SureSelect/hg19/SS_HAEv6r2_hg19_Padded_nochr.bed ~/work/static/ref/PoN/AML_Pon.txt ~/work/static/ref/PoN/AML_cache


## Running EBscore in cache_mode in BIHCluster on real data

In [None]:
from codes import run

# snakemake config
config = {'EB':{'run': True}}
params = {}
params['map_quality'] = 20
params['base_quality'] = 15
params['filter_flags'] = 'UNMAP,SECONDARY,QCFAIL,DUP'
params['fitting_penalty'] = .5
params['caching'] = True
# to simulate snakemake behavior
config['EB']['threads'] = 1
config['EB']['params'] = params
config['EB']
config['annovar'] = {'sep': '\t'}
config['EB']['log'] = 'output/logfile'


params = config['EB']['params']
threads = config['EB']['threads']
log = config['EB']['log']
sep = config['annovar']['sep']
_q = str(params['map_quality'])  # mapping quality=20
_Q = params['base_quality']      # base quality=15
fit_pen = params['fitting_penalty']
filter_quals = ''
for qual in range( 33, 33 + _Q ): 
    filter_quals += chr( qual )  # qual asciis for filtering out
_ff = params['filter_flags']     # 'UNMAP,SECONDARY,QCFAIL,DUP'
config = {'q':_q, 'Q':_Q, 'filter_quals': filter_quals, 'log':log, 'fitting_penalty': fit_pen, 'ff':_ff, 'threads':threads, 'sep': sep}


import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['mut_file'] = 'testdata2/anno/test_D-CR1_bull.csv'
args['sep'] = ','
args['output_path'] = 'output/test_D_EB.csv'
args['tumor_bam'] = 'testdata2/bam/test_D.bam'
args['pon_list'] = f'{HOME}/work/static/ref/PoN/AML_Pon.txt'
args['use_cache'] = f'{HOME}/work/static/ref/PoN/AML_cache'
args['chromosome'] = '22'
args['generate_cache'] = False


config['threads'] = 40
config['debug_mode'] = False
run.main(args, config)

In [None]:
config['threads'] = 40
config['debug_mode'] = False
run.main(args, config)