In [2]:
ls /Users/martinscience/Dropbox/Icke/Work/somVar/testdata/EBdata/

001_A-B-chr1.EB           002_chr7.bed              test.cleanpileup
001_A-B-chr1.mutmatrix    002_chr7.pon              test.cutpileup
002.csv                   003_A-B-chr1.EB           test.filterCount
002.matrix                003_A-B-chr1.mutmatrix    test.pileup
002_A-B-chr1.EB           chr7.pilecount.gz         test.pon
002_A-B-chr1.EBmutmatrix  stop.file
002_A-B-chr1.mutmatrix    test.bed


In [3]:
# get the code
import sys
sys.path.append('../code')

# set the paths
home = '/Users/mahtin'
home = '/Users/martinscience'
testdata = f"{home}/Dropbox/Icke/Work/somVar/testdata"
ebdata = f"{testdata}/EBdata"
pon_path = f"{testdata}/testpon"
static = f"{home}/Dropbox/Icke/Work/static"

# squeezing all data and shell paths into config
EBconfig = {
    "cleanpileup": "../shell/cleanpileup.mawk",
    "makeponlist": "../shell/makeponlist.sh",
    "csv2bed":"../shell/csv2bed.mawk",
    "pon2cols": "../shell/pon2cols.mawk",
    "pile2count": "../shell/pile2count2.mawk",
    "filterVar": "../shell/filterVar.mawk",
    "pon2tumor": "../shell/pon2tumor.mawk",
    "pon_path": pon_path,
    "genome_split": f"{static}/genome/gatk/hg38/split",
    "MAPQ": 20,
    "Q": 25,
    "fit_pen": 0.5,
    "threads":6,
    "count_dict": {0:"alt+", 1:"alt-", 2:"depth+", 3:"depth-"},
    "debug":True
}


In [28]:
from ebraw import bam2matrix
from ebconvert import matrix2AB, AB2EBscore
from script_utils import show_output

def run_ebscore(mut_file, bam_file, output_file='test.csv', pon_list='', chrom=None, EBconfig=EBconfig, debug=False):
    '''
    master function to start eb_computation
    '''
    
    # ############## LOAD DATA ###############################
    show_output(f"Computing EBscore for chrom {chrom} on target {bam_file}", color='normal')

    ############### bam2matrix #######################
    show_output(
        f"Piling up {chrom} of {bam_file} with Pon List.", color='normal')  
    df = bam2matrix(bam_file, mut_file, chrom, pon_list, EBconfig)
    if debug:
        matrix_file = f"{os.path.splitext(output_file)[0]}.EBmatrix"
        df.to_csv(matrix_file, sep='\t', index=False)
    
    cols = list(df.columns)[:5]
    # check if matrix_df is empty
    if df.empty:
        show_output(
            f"Pileup for {chrom} of {bam_file} was empty!")
        if debug:
            show_output(f"Created empty file {matrix_file}", color='warning')
        return
    
    
    # if no error:
    show_output(
        f"Pileup matrix for chrom {chrom} of {bam_file} completed.", color='success')

    # ############## matrix2AB ######################
    show_output(
        f"Computing ABparams from PoN data on {chrom}.", color='normal')
    df['ABparams'] = df.apply(matrix2AB, config=EBconfig, axis=1)
    show_output(
        f"Computing ABparams finished.", color='success')
    if debug:
        AB_file = f"{os.path.splitext(output_file)[0]}.AB"
        df.to_csv(AB_file, sep='\t', index=False)

    # multithreaded computation
    # EB_df = compute_matrix2EB_multi(
    #     eb_matrix, EBparams['fitting_penalty'], threads)

    # ############# AB2EB ##########################
    df['EBscore'] = df.apply(AB2EBscore, config=EBconfig, axis=1)  
    # add EBscore to columns
    cols.append('EBscore')

    
    # copy PON:ALT=Depth into separate columns
    df[['PonAlt', 'PonDepth']] = df['PON:ALT=Depth'].str.split('=', expand=True)
    # if using ABcache, matrix has to be added anyway for complete output
    # get the pon_matrix containing the Pon coverages in Alt and Ref
    # transfer PoN-Ref and PoN-Alt to EB_df
    # EB_df[['PoN-Ref', 'PoN-Alt']] = pon_matrix[['PoN-Ref', 'PoN-Alt']]
    # mut_cols += ['PoN-Ref', 'PoN-Alt']

    # rm unnecessary columns
    EB_df = df[cols]

    # ######### WRITE TO FILE ##############################################

    EB_df.to_csv(output_file, sep='\t', index=False)

    # cleanup
    # shell(f"rm {matrix_file} {EB_matrix_input_file}")  # {mutmatrix_file}
    show_output(
        f"Created EBscore for chrom {chrom} of {bam_file} and written to {output_file}", color='success')
    return EB_df

In [35]:
# get the input files
bam_file = f"{testdata}/bam/002_A.bam"
mut_file = f"{ebdata}/002.csv"
pon_list = "Pon_chr7short.txt"
chrom = "chr7"
output = "../output/002.EB"
df = run_ebscore(mut_file, bam_file, output_file=output, pon_list=pon_list, chrom=chrom, EBconfig=EBconfig, debug=True)

[1;35;2m11:00:16[0m : [1;30;1mComputing EBscore for chrom chr7 on target /Users/martinscience/Dropbox/Icke/Work/somVar/testdata/bam/002_A.bam[0m
[1;35;2m11:00:16[0m : [1;30;1mPiling up chr7 of /Users/martinscience/Dropbox/Icke/Work/somVar/testdata/bam/002_A.bam with Pon List.[0m
[1;35;2m11:01:01[0m : [1;36;1mPileup matrix for chrom chr7 of /Users/martinscience/Dropbox/Icke/Work/somVar/testdata/bam/002_A.bam completed.[0m
[1;35;2m11:01:01[0m : [1;30;1mComputing ABparams from PoN data on chr7.[0m
[1;35;2m11:01:02[0m : [1;36;1mComputing ABparams finished.[0m
[1;35;2m11:01:02[0m : [1;36;1mCreated EBscore for chrom chr7 of /Users/martinscience/Dropbox/Icke/Work/somVar/testdata/bam/002_A.bam and written to ../output/002.EB[0m


In [36]:
df2 = df.copy()
df2

Unnamed: 0,Chr,Start,End,Ref,Alt,EBscore,PON:ALT=Depth
0,chr7,680651,680651,T,G,0.055,0|0|0|0-5|2|3|3=129|92|148|166-10|6|16|8
1,chr7,35670232,35670232,C,T,1.38,102|0|0|0-18|0|0|0=102|92|167|229-18|24|42|44
2,chr7,72826970,72826970,G,A,0.871,40|0|0|99-26|0|0|50=126|189|187|291-80|100|110...
3,chr7,75147946,75147946,C,T,0.154,21|46|31|14-12|13|10|2=84|130|120|57-57|84|86|42
4,chr7,99722302,99722302,A,G,1.422,0|0|0|0-0|0|0|0=144|182|211|217-19|31|50|31
5,chr7,100958075,100958075,C,T,0.001,29|23|38|32-8|4|7|5=1096|1476|1547|1792-554|77...
6,chr7,100959332,100959332,C,G,0.042,23|32|43|44-2|2|1|0=281|354|342|486-91|149|161...
7,chr7,100959344,100959344,G,C,0.036,30|36|50|60-3|2|1|0=290|369|340|504-108|148|16...
8,chr7,101033998,101033998,G,C,0.338,139|0|188|0-22|0|15|0=536|588|796|690-301|395|...
9,chr7,101034004,101034004,G,A,0.349,142|0|187|0-24|0|16|0=553|579|803|691-299|414|...


In [39]:
df2[['PonAlt', 'PonDepth']] = df['PON:ALT=Depth'].str.split('=', expand=True)
df2

Unnamed: 0,Chr,Start,End,Ref,Alt,EBscore,PON:ALT=Depth,PonAlt,PonDepth
0,chr7,680651,680651,T,G,0.055,0|0|0|0-5|2|3|3=129|92|148|166-10|6|16|8,0|0|0|0-5|2|3|3,129|92|148|166-10|6|16|8
1,chr7,35670232,35670232,C,T,1.38,102|0|0|0-18|0|0|0=102|92|167|229-18|24|42|44,102|0|0|0-18|0|0|0,102|92|167|229-18|24|42|44
2,chr7,72826970,72826970,G,A,0.871,40|0|0|99-26|0|0|50=126|189|187|291-80|100|110...,40|0|0|99-26|0|0|50,126|189|187|291-80|100|110|124
3,chr7,75147946,75147946,C,T,0.154,21|46|31|14-12|13|10|2=84|130|120|57-57|84|86|42,21|46|31|14-12|13|10|2,84|130|120|57-57|84|86|42
4,chr7,99722302,99722302,A,G,1.422,0|0|0|0-0|0|0|0=144|182|211|217-19|31|50|31,0|0|0|0-0|0|0|0,144|182|211|217-19|31|50|31
5,chr7,100958075,100958075,C,T,0.001,29|23|38|32-8|4|7|5=1096|1476|1547|1792-554|77...,29|23|38|32-8|4|7|5,1096|1476|1547|1792-554|778|822|723
6,chr7,100959332,100959332,C,G,0.042,23|32|43|44-2|2|1|0=281|354|342|486-91|149|161...,23|32|43|44-2|2|1|0,281|354|342|486-91|149|161|135
7,chr7,100959344,100959344,G,C,0.036,30|36|50|60-3|2|1|0=290|369|340|504-108|148|16...,30|36|50|60-3|2|1|0,290|369|340|504-108|148|167|131
8,chr7,101033998,101033998,G,C,0.338,139|0|188|0-22|0|15|0=536|588|796|690-301|395|...,139|0|188|0-22|0|15|0,536|588|796|690-301|395|426|345
9,chr7,101034004,101034004,G,A,0.349,142|0|187|0-24|0|16|0=553|579|803|691-299|414|...,142|0|187|0-24|0|16|0,553|579|803|691-299|414|439|348


In [6]:
# get the code
import sys
sys.path.append('../code')

# set the paths
home = '/Users/mahtin'
home = '/Users/martinscience'
testdata = f"{home}/Dropbox/Icke/Work/somVar/testdata"
ebdata = f"{testdata}/EBdata"
pon_path = f"{testdata}/testpon"
static = f"{home}/Dropbox/Icke/Work/static"


# df.to_csv(f"{ebdata}/002.matrix", sep='\t', index=False)
df = pd.read_csv(f"{ebdata}/002.matrix", sep='\t')
df[:3]

Unnamed: 0,Chr,Start,End,Ref,Alt,Tumor:Alt=Depth,PON:ALT=Depth
0,chr7,680651,680651,T,G,0-1=28-2,0|0|0|0-5|2|3|3=129|92|148|166-10|6|16|8
1,chr7,35670232,35670232,C,T,4-3=13-4,102|0|0|0-18|0|0|0=102|92|167|229-18|24|42|44
2,chr7,72826970,72826970,G,A,2-5=10-10,40|0|0|99-26|0|0|50=126|189|187|291-80|100|110...
