# ABcache
## getting a precomputed ABcache file containing the parameters for beta-binomial distribution from the PoN-list

# Running EBfilter createCache on testdata

## setting the config

In [None]:
cd ..

In [None]:
from codes import run

# snakemake config
config = {'EB':{'run': True}}
params = {}
params['map_quality'] = 20
params['base_quality'] = 15
params['filter_flags'] = 'UNMAP,SECONDARY,QCFAIL,DUP'
params['fitting_penalty'] = .5
params['caching'] = True
# to simulate snakemake behavior
config['EB']['threads'] = 1
config['EB']['params'] = params
config['EB']
config['annovar'] = {'sep': '\t'}
config['EB']['log'] = 'output/logfile'


params = config['EB']['params']
threads = config['EB']['threads']
log = config['EB']['log']
sep = config['annovar']['sep']
_q = str(params['map_quality'])  # mapping quality=20
_Q = params['base_quality']      # base quality=15
fit_pen = params['fitting_penalty']
filter_quals = ''
for qual in range( 33, 33 + _Q ): 
    filter_quals += chr( qual )  # qual asciis for filtering out
_ff = params['filter_flags']     # 'UNMAP,SECONDARY,QCFAIL,DUP'
config = {'q':_q, 'Q':_Q, 'filter_quals': filter_quals, 'log':log, 'fitting_penalty': fit_pen, 'ff':_ff, 'threads':threads, 'sep': sep}


## running makeEBcache on testdata (takes ~2h on 3 cores)
* is not necessary for testing the cache mode
* a precomputed EBcache is stored in testdata/precom_testcache

In [None]:
args = {}
args['pon_list'] = 'testdata/PoN_list.txt'
args['cache_folder'] = 'testdata/testdata_cache' # provide a folder for storing the data (snakemake config)
args['force_caching'] = True               # force cache generation although no bed_file is provided
args['bed_file'] = None # 'testdata/input.bed'
args['generate_cache'] = True


In [None]:
config['threads'] = 4
config['debug_mode'] = False
run.main(args, config)
!ls output

### using CLI:

In [None]:
!./makeEBcache -t 3 -force_caching testdata/PoN_list.txt testdata/testdata_cache

## running EBscore in cache_mode on testdata
* you can use the cache file generated in the last step or the precomputed one
* for using precomputed file, just change the args['use_cache'] value

In [None]:
args = {}
args['mut_file'] = 'testdata/input.anno'
args['tumor_bam'] = 'testdata/tumor.bam'
args['pon_list'] = 'testdata/PoN_list.txt'
args['output_path'] = 'output/testdata_eb.csv'
# toggle outcomment in the next two lines for use of precomputed cache file
args['use_cache'] = 'testdata/testdata_cache' 
# args['use_cache'] = 'testdata/precom_testcache'
args['chromosome'] = 'chr11'
args['generate_cache'] = False

In [None]:
config['threads'] = 3
config['debug_mode'] = False
run.main(args, config)
!ls output

### using CLI:

In [None]:
!EBscore -t 3 -use_cache testdata/testdata_cache testdata/input.anno testdata/tumor.bam testdata/PoN_list.txt output/testdata_EB.csv

## Running EBfilter createCache on my data (takes ~5h on 3 cores)

In [None]:
import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['pon_list'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list'
args['cache_folder'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache'
args['force_caching'] = False
args['bed_file'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/HAEv7.bed'
args['chrom'] = '22'
args['generate_cache'] = True

In [None]:
config['threads'] = 3
config['debug_mode'] = False
run.main(args, config)
!ls output

### using CLI:

In [None]:
!makeEBcache -t3 -force_caching ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache

## Running EBscore in cache_mode on my data

In [None]:
import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['mut_file'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/anno/test_rel.csv'
args['tumor_bam'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/bam/test_Rel1.bam'
args['pon_list'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list'
args['use_cache'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache'
args['output_path'] = f'{HOME}/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/output/test_eb.csv'
args['chromosome'] = 'chr11'
args['generate_cache'] = False

In [None]:
config['threads'] = 40
config['debug_mode'] = True
run.main(args, config)

### using CLI:

In [None]:
!EBscore -t 3 -use_cache ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_cache ~//Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/anno/test_rel.csv ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/bam/test_Rel1.bam ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/aml_pon.list ~/Dropbox/Icke/Work/somVar/tools/EBFilter/mytestdata/output/test_eb.csv


## Running EBfilter createCache in BIHCluster on real data

In [None]:
import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
args = {}
args['pon_list'] = f'{HOME}/work/static/ref/PoN/AML_Pon.txt'
args['cache_folder'] = f'{HOME}/work/static/ref/PoN/AML_cache'
args['force_caching'] = False
args['chromosome'] = 'Y'
args['bed_file'] = f'{HOME}/work/static/ref/bed_files/SureSelect/hg19/SS_HAEv6r2_hg19_Padded_nochr.bed'
args['generate_cache'] = True


In [None]:
config['threads'] = 40
config['debug_mode'] = True
run.main(args, config)

In [None]:
from codes import run

# snakemake config
config = {'EB':{'run': True}}
params = {}
params['map_quality'] = 20
params['base_quality'] = 15
params['filter_flags'] = 'UNMAP,SECONDARY,QCFAIL,DUP'
params['fitting_penalty'] = .5
params['caching'] = True
# to simulate snakemake behavior
config['EB']['threads'] = 1
config['EB']['params'] = params
config['EB']
config['annovar'] = {'sep': '\t'}
config['EB']['log'] = 'output/logfile'


params = config['EB']['params']
threads = config['EB']['threads']
log = config['EB']['log']
sep = config['annovar']['sep']
_q = str(params['map_quality'])  # mapping quality=20
_Q = params['base_quality']      # base quality=15
fit_pen = params['fitting_penalty']
filter_quals = ''
for qual in range( 33, 33 + _Q ): 
    filter_quals += chr( qual )  # qual asciis for filtering out

_ff = params['filter_flags']     # 'UNMAP,SECONDARY,QCFAIL,DUP'
config = {'q':_q, 'Q':_Q, 'filter_quals': filter_quals, 'log':log, 'fitting_penalty': fit_pen, 'ff':_ff, 'threads':threads, 'sep': sep}

import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
# HOME = f"{home}/mount"
args = {}
args['pon_list'] = f'{HOME}/work/static/ref/PoN/AML_Pon.txt'
args['cache_folder'] = f'{HOME}/work/static/ref/PoN/AML_cache'
args['force_caching'] = False
args['chrom'] = 'Y'
args['bed_file'] = f'{HOME}/work/static/ref/bed_files/SureSelect/hg19/SS_HAEv6r2_hg19_Padded_nochr.bed'
args['generate_cache'] = True

config['threads'] = 20
config['debug_mode'] = True
run.main(args, config)

### using CLI:

In [None]:
!makeEBcache -t3 -bed_file ~/work/static/ref/bed_files/SureSelect/hg19/SS_HAEv6r2_hg19_Padded_nochr.bed ~/work/static/ref/PoN/AML_Pon.txt ~/work/static/ref/PoN/AML_cache


## Running EBscore in cache_mode in BIHCluster on real data

In [None]:
cd ..

In [None]:
from codes import run

# snakemake config
config = {'EB':{'run': True}}
params = {}
params['map_quality'] = 20
params['base_quality'] = 15
params['filter_flags'] = 'UNMAP,SECONDARY,QCFAIL,DUP'
params['fitting_penalty'] = .5
params['caching'] = True
# to simulate snakemake behavior
config['EB']['threads'] = 1
config['EB']['params'] = params
config['EB']
config['annovar'] = {'sep': ','}
config['EB']['log'] = 'output/logfile'


params = config['EB']['params']
threads = config['EB']['threads']
log = config['EB']['log']
sep = config['annovar']['sep']
_q = str(params['map_quality'])  # mapping quality=20
_Q = params['base_quality']      # base quality=15
fit_pen = params['fitting_penalty']
filter_quals = ''
for qual in range( 33, 33 + _Q ): 
    filter_quals += chr( qual )  # qual asciis for filtering out
_ff = params['filter_flags']     # 'UNMAP,SECONDARY,QCFAIL,DUP'
config = {'q':_q, 'Q':_Q, 'filter_quals': filter_quals, 'log':log, 'fitting_penalty': fit_pen, 'ff':_ff, 'threads':threads, 'sep': sep}

import os
HOME = os.environ['HOME'] # set HOME to run on different Macs
# HOME = f"{HOME}/mount"
args = {}
args['mut_file'] = f'{HOME}/work/utils/EBFilter/mut_file3.csv'
args['sep'] = 'tab'
args['output_path'] = f'{HOME}/work/utils/EBFilter/eb_file3.csv'
args['tumor_bam'] = f'{HOME}/scratch/projects/somVar/recalib/test_Rel1.bam'
args['pon_list'] = f'{HOME}/work/static/ref/PoN/AML_Pon.txt'
args['use_cache'] = f'{HOME}/work/static/ref/PoN/AML_cache'
args['generate_cache'] = False 

config['threads'] = 32
config['debug_mode'] = True
run.main(args, config)

In [None]:
config['threads'] = 32
config['debug_mode'] = True
run.main(args, config)