In [1]:
##################
### Author: Adriano Fonzino. email: adriano.fonzino@uniba.it
##################
import os
import pandas as pd
import numpy as np


# define util function for downstream analysis
def join_bonafides(bonafide_fp, bonafide_anno_filt):
    ### function to join bonafide with reditools info with bonafide with anno and filtered 
    ### NON-REP only Refseq
    # load bonafide with reditools alignment profiles/data
    bonafide = pd.read_table(bonafide_fp)
    print("Bonafide not-filted with REDItools info:")
    display(bonafide)
    # load annotated table filtered for positives NON-REP being in Refseq
    bonafide_anno_filt = pd.read_table(bonafide_anno_filt_fp)
    print("Bonafide annotated and filtered (NON-REP only Refseq):")
    display(bonafide_anno_filt)
    # inner join between bonafide with and without annotation/filtering
    joined = pd.merge(bonafide, bonafide_anno_filt, 
                      left_on=["wt_Region", "wt_Position"],
                      right_on=["Region", "Position"], how="inner")
    print("Common sites annotated with REDItools info:")
    display(joined)
    # print not common site (not Refseq)
    left_join = pd.merge(bonafide, bonafide_anno_filt, 
                        left_on=["wt_Region", "wt_Position"],
                        right_on=["Region", "Position"], how="left")
    print("Discared sites (via left join):")
    display(left_join[left_join["Class_y"].isna()])
    output_file = bonafide_fp + ".anno.filt_refseq.tsv"
    print("Save on disk joined with annotation and filtering to :", output_file)
    joined.to_csv(output_file, index=None, sep="\t")

    return joined

## A549

In [2]:
### Couple 1
# inputs
bonafide_fp = "/lustre/bio_running/new_basecaller/REDINET_TEST_30_07_2024/REDInet/Package/Results/a549/SRR12492043.SRR12492044.control.outTable_773331943.gz_vs_SRR12492025.SRR12492026.siADARs.outTable_302610513.gz.bonafide_final.tsv"
bonafide_anno_filt_fp = "/lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_773331943_outTable_302610513_bona_fide_sites_2.tsv"

couple1_join = join_bonafides(bonafide_fp, bonafide_anno_filt_fp)

Bonafide not-filted with REDItools info:


Unnamed: 0,wt_Region,wt_Position,wt_Reference,wt_Strand,wt_Coverage-q30,wt_MeanQ,"wt_BaseCount[A,C,G,T]",wt_AllSubs,wt_Frequency,ko_Region,...,gStrand,gCoverage-q30,gMeanQ,"gBaseCount[A,C,G,T]",gAllSubs,gFrequency,Class,Class_binary,wt_sample,ko_sample
0,chrX,216177,A,1,193,51.64,"[189, 0, 4, 0]",AG,0.02,chrX,...,1,47,36.77,"[47, 0, 0, 0]",-,0.00,Editing,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...
1,chrX,216207,A,1,188,52.04,"[184, 0, 4, 0]",AG,0.02,chrX,...,1,45,36.64,"[45, 0, 0, 0]",-,0.00,Editing,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...
2,chrX,216235,A,1,188,51.60,"[185, 0, 3, 0]",AG,0.02,chrX,...,1,31,37.39,"[31, 0, 0, 0]",-,0.00,Editing,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...
3,chrX,216241,A,1,198,52.23,"[190, 0, 8, 0]",AG,0.04,chrX,...,1,35,37.29,"[35, 0, 0, 0]",-,0.00,Editing,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...
4,chrX,216248,A,1,213,51.35,"[204, 0, 9, 0]",AG,0.04,chrX,...,1,41,36.59,"[41, 0, 0, 0]",-,0.00,Editing,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8828,chr8,145066886,A,1,2337,51.77,"[0, 0, 2337, 0]",AG,1.00,chr8,...,1,106,36.79,"[0, 0, 106, 0]",AG,1.00,Not-Editing,0,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...
8829,chr8,145665516,A,0,71,47.69,"[1, 0, 70, 0]",AG,0.99,chr8,...,0,116,36.78,"[0, 0, 116, 0]",AG,1.00,Not-Editing,0,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...
8830,chr8,145677011,A,0,126,47.74,"[0, 0, 126, 0]",AG,1.00,chr8,...,0,98,36.48,"[0, 0, 98, 0]",AG,1.00,Not-Editing,0,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...
8831,chr8,145693720,A,1,160,54.27,"[0, 0, 160, 0]",AG,1.00,chr8,...,1,133,35.52,"[0, 0, 133, 0]",AG,1.00,Not-Editing,0,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...


Bonafide annotated and filtered (NON-REP only Refseq):


Unnamed: 0,Region,Position,Class,RMSK-Rep,RMSK-Reg,RefSeq-Rep,RefSeq-Reg
0,chr1,879875,1,-,-,"transcript,exon,3UTR&3UTR,exon,transcript",SAMD11&NOC2L
1,chr1,888639,0,-,-,"transcript,CDS,exon",NOC2L
2,chr1,888659,0,-,-,"transcript,CDS,exon",NOC2L
3,chr1,949654,0,-,-,"transcript,CDS,exon",ISG15
4,chr1,979632,1,-,-,"transcript,CDS,exon",AGRN
...,...,...,...,...,...,...,...
8815,chrX,154349801,1,LINE,L1MD2,"transcript,3UTR,exon",BRCC3
8816,chrX,154456747,0,-,-,"transcript,CDS,exon",VBP1
8817,chrX,155252736,0,-,-,"exon,transcript",WASH6P
8818,chrX,155252758,0,-,-,"exon,transcript",WASH6P


Common sites annotated with REDItools info:


Unnamed: 0,wt_Region,wt_Position,wt_Reference,wt_Strand,wt_Coverage-q30,wt_MeanQ,"wt_BaseCount[A,C,G,T]",wt_AllSubs,wt_Frequency,ko_Region,...,Class_binary,wt_sample,ko_sample,Region,Position,Class_y,RMSK-Rep,RMSK-Reg,RefSeq-Rep,RefSeq-Reg
0,chrX,216241,A,1,198,52.23,"[190, 0, 8, 0]",AG,0.04,chrX,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,chrX,216241,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
1,chrX,216248,A,1,213,51.35,"[204, 0, 9, 0]",AG,0.04,chrX,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,chrX,216248,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
2,chrX,216249,A,1,211,51.49,"[208, 0, 3, 0]",AG,0.01,chrX,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,chrX,216249,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
3,chrX,216254,A,1,252,49.59,"[244, 0, 8, 0]",AG,0.03,chrX,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,chrX,216254,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
4,chrX,216260,A,1,264,50.41,"[245, 0, 19, 0]",AG,0.07,chrX,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,chrX,216260,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8815,chr8,145066886,A,1,2337,51.77,"[0, 0, 2337, 0]",AG,1.00,chr8,...,0,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,chr8,145066886,0,-,-,"transcript,CDS,exon",GRINA
8816,chr8,145665516,A,0,71,47.69,"[1, 0, 70, 0]",AG,0.99,chr8,...,0,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,chr8,145665516,0,-,-,"transcript,CDS,exon",TONSL
8817,chr8,145677011,A,0,126,47.74,"[0, 0, 126, 0]",AG,1.00,chr8,...,0,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,chr8,145677011,0,-,-,transcript,CYHR1
8818,chr8,145693720,A,1,160,54.27,"[0, 0, 160, 0]",AG,1.00,chr8,...,0,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,chr8,145693720,0,-,-,"transcript,CDS,exon",KIFC2


Discared sites (via left join):


Unnamed: 0,wt_Region,wt_Position,wt_Reference,wt_Strand,wt_Coverage-q30,wt_MeanQ,"wt_BaseCount[A,C,G,T]",wt_AllSubs,wt_Frequency,ko_Region,...,Class_binary,wt_sample,ko_sample,Region,Position,Class_y,RMSK-Rep,RMSK-Reg,RefSeq-Rep,RefSeq-Reg
0,chrX,216177,A,1,193,51.64,"[189, 0, 4, 0]",AG,0.02,chrX,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,,,,,,,
1,chrX,216207,A,1,188,52.04,"[184, 0, 4, 0]",AG,0.02,chrX,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,,,,,,,
2,chrX,216235,A,1,188,51.6,"[185, 0, 3, 0]",AG,0.02,chrX,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,,,,,,,
1177,chr10,33176866,A,0,89,49.3,"[86, 0, 3, 0]",AG,0.03,chr10,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,,,,,,,
1255,chr10,75537313,A,1,76,56.67,"[69, 0, 7, 0]",AG,0.09,chr10,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,,,,,,,
1822,chr17,66043302,A,1,78,52.21,"[75, 0, 3, 0]",AG,0.04,chr17,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,,,,,,,
2820,chr19,18668650,A,1,106,60.35,"[103, 0, 3, 0]",AG,0.03,chr19,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,,,,,,,
3036,chr19,49588692,A,1,55,46.69,"[52, 0, 3, 0]",AG,0.05,chr19,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,,,,,,,
3461,chr1,16126655,A,0,146,56.97,"[143, 0, 3, 0]",AG,0.02,chr1,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,,,,,,,
3705,chr1,145117054,A,1,165,52.72,"[162, 0, 3, 0]",AG,0.02,chr1,...,1,SRR12492043.SRR12492044.control.outTable_77333...,SRR12492025.SRR12492026.siADARs.outTable_30261...,,,,,,,


Save on disk joined with annotation and filtering to : /lustre/bio_running/new_basecaller/REDINET_TEST_30_07_2024/REDInet/Package/Results/a549/SRR12492043.SRR12492044.control.outTable_773331943.gz_vs_SRR12492025.SRR12492026.siADARs.outTable_302610513.gz.bonafide_final.tsv.anno.filt_refseq.tsv


In [3]:
### Couple 2
# inputs
bonafide_fp = "/lustre/bio_running/new_basecaller/REDINET_TEST_30_07_2024/REDInet/Package/Results/a549/SRR12492045.SRR12492046.control.outTable_535670354.gz_vs_SRR12492027.SRR12492028.siADARs.outTable_396704193.gz.bonafide_final.tsv"
bonafide_anno_filt_fp = "/lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_535670354_outTable_396704193_bona_fide_sites_2.tsv"

couple2_join = join_bonafides(bonafide_fp, bonafide_anno_filt_fp)

Bonafide not-filted with REDItools info:


Unnamed: 0,wt_Region,wt_Position,wt_Reference,wt_Strand,wt_Coverage-q30,wt_MeanQ,"wt_BaseCount[A,C,G,T]",wt_AllSubs,wt_Frequency,ko_Region,...,gStrand,gCoverage-q30,gMeanQ,"gBaseCount[A,C,G,T]",gAllSubs,gFrequency,Class,Class_binary,wt_sample,ko_sample
0,chrX,216177,A,1,194,51.60,"[188, 0, 6, 0]",AG,0.03,chrX,...,1,47,36.77,"[47, 0, 0, 0]",-,0.00,Editing,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...
1,chrX,216226,A,1,232,51.86,"[225, 0, 7, 0]",AG,0.03,chrX,...,1,41,36.37,"[41, 0, 0, 0]",-,0.00,Editing,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...
2,chrX,216238,A,1,231,52.09,"[228, 0, 3, 0]",AG,0.01,chrX,...,1,32,38.22,"[32, 0, 0, 0]",-,0.00,Editing,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...
3,chrX,216239,A,1,237,51.42,"[228, 0, 9, 0]",AG,0.04,chrX,...,1,31,37.87,"[31, 0, 0, 0]",-,0.00,Editing,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...
4,chrX,216241,A,1,229,51.96,"[221, 0, 8, 0]",AG,0.03,chrX,...,1,35,37.29,"[35, 0, 0, 0]",-,0.00,Editing,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9333,chr8,145150832,A,1,2357,49.45,"[1, 0, 2356, 0]",AG,1.00,chr8,...,1,101,34.99,"[0, 0, 101, 0]",AG,1.00,Not-Editing,0,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...
9334,chr8,145665516,A,0,96,47.65,"[3, 0, 93, 0]",AG,0.97,chr8,...,0,116,36.78,"[0, 0, 116, 0]",AG,1.00,Not-Editing,0,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...
9335,chr8,145677011,A,0,149,48.46,"[0, 0, 149, 0]",AG,1.00,chr8,...,0,98,36.48,"[0, 0, 98, 0]",AG,1.00,Not-Editing,0,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...
9336,chr8,145693720,A,1,157,57.26,"[0, 0, 157, 0]",AG,1.00,chr8,...,1,133,35.52,"[0, 0, 133, 0]",AG,1.00,Not-Editing,0,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...


Bonafide annotated and filtered (NON-REP only Refseq):


Unnamed: 0,Region,Position,Class,RMSK-Rep,RMSK-Reg,RefSeq-Rep,RefSeq-Reg
0,chr1,888639,0,-,-,"transcript,CDS,exon",NOC2L
1,chr1,888659,0,-,-,"transcript,CDS,exon",NOC2L
2,chr1,981931,0,-,-,"transcript,CDS,exon",AGRN
3,chr1,981995,1,-,-,"transcript,CDS,exon",AGRN
4,chr1,1018144,0,-,-,"3UTR,exon,transcript",C1orf159
...,...,...,...,...,...,...,...
9318,chrX,154456747,0,-,-,"transcript,CDS,exon",VBP1
9319,chrX,155171995,1,-,-,"transcript,exon,3UTR",VAMP7
9320,chrX,155252736,0,-,-,"exon,transcript",WASH6P
9321,chrX,155252758,0,-,-,"exon,transcript",WASH6P


Common sites annotated with REDItools info:


Unnamed: 0,wt_Region,wt_Position,wt_Reference,wt_Strand,wt_Coverage-q30,wt_MeanQ,"wt_BaseCount[A,C,G,T]",wt_AllSubs,wt_Frequency,ko_Region,...,Class_binary,wt_sample,ko_sample,Region,Position,Class_y,RMSK-Rep,RMSK-Reg,RefSeq-Rep,RefSeq-Reg
0,chrX,216239,A,1,237,51.42,"[228, 0, 9, 0]",AG,0.04,chrX,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,chrX,216239,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
1,chrX,216241,A,1,229,51.96,"[221, 0, 8, 0]",AG,0.03,chrX,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,chrX,216241,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
2,chrX,216247,A,1,248,50.85,"[233, 0, 15, 0]",AG,0.06,chrX,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,chrX,216247,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
3,chrX,216249,A,1,245,50.73,"[240, 0, 5, 0]",AG,0.02,chrX,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,chrX,216249,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
4,chrX,216254,A,1,273,49.57,"[266, 0, 7, 0]",AG,0.03,chrX,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,chrX,216254,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9318,chr8,145150832,A,1,2357,49.45,"[1, 0, 2356, 0]",AG,1.00,chr8,...,0,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,chr8,145150832,0,-,-,"transcript,CDS,exon",CYC1
9319,chr8,145665516,A,0,96,47.65,"[3, 0, 93, 0]",AG,0.97,chr8,...,0,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,chr8,145665516,0,-,-,"transcript,CDS,exon",TONSL
9320,chr8,145677011,A,0,149,48.46,"[0, 0, 149, 0]",AG,1.00,chr8,...,0,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,chr8,145677011,0,-,-,transcript,CYHR1
9321,chr8,145693720,A,1,157,57.26,"[0, 0, 157, 0]",AG,1.00,chr8,...,0,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,chr8,145693720,0,-,-,"transcript,CDS,exon",KIFC2


Discared sites (via left join):


Unnamed: 0,wt_Region,wt_Position,wt_Reference,wt_Strand,wt_Coverage-q30,wt_MeanQ,"wt_BaseCount[A,C,G,T]",wt_AllSubs,wt_Frequency,ko_Region,...,Class_binary,wt_sample,ko_sample,Region,Position,Class_y,RMSK-Rep,RMSK-Reg,RefSeq-Rep,RefSeq-Reg
0,chrX,216177,A,1,194,51.6,"[188, 0, 6, 0]",AG,0.03,chrX,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,,,,,,,
1,chrX,216226,A,1,232,51.86,"[225, 0, 7, 0]",AG,0.03,chrX,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,,,,,,,
2,chrX,216238,A,1,231,52.09,"[228, 0, 3, 0]",AG,0.01,chrX,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,,,,,,,
1045,chr11,61566244,A,0,110,51.11,"[107, 0, 3, 0]",AG,0.03,chr11,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,,,,,,,
1290,chr10,75537313,A,1,72,50.79,"[69, 0, 3, 0]",AG,0.04,chr10,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,,,,,,,
1297,chr10,75538589,A,1,57,57.33,"[53, 0, 4, 0]",AG,0.07,chr10,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,,,,,,,
1508,chr17,6544347,A,1,147,48.06,"[144, 0, 3, 0]",AG,0.02,chr17,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,,,,,,,
1895,chr17,61920198,A,0,86,52.64,"[83, 0, 3, 0]",AG,0.03,chr17,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,,,,,,,
3221,chr19,48972610,A,1,148,48.66,"[145, 0, 3, 0]",AG,0.02,chr19,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,,,,,,,
3266,chr19,56165648,A,1,504,57.19,"[501, 0, 3, 0]",AG,0.01,chr19,...,1,SRR12492045.SRR12492046.control.outTable_53567...,SRR12492027.SRR12492028.siADARs.outTable_39670...,,,,,,,


Save on disk joined with annotation and filtering to : /lustre/bio_running/new_basecaller/REDINET_TEST_30_07_2024/REDInet/Package/Results/a549/SRR12492045.SRR12492046.control.outTable_535670354.gz_vs_SRR12492027.SRR12492028.siADARs.outTable_396704193.gz.bonafide_final.tsv.anno.filt_refseq.tsv


In [4]:
### Couple 3
# inputs
bonafide_fp = "/lustre/bio_running/new_basecaller/REDINET_TEST_30_07_2024/REDInet/Package/Results/a549/SRR12492047.SRR12492048.control.outTable_192318299.gz_vs_SRR12492029.SRR12492030.siADARs.outTable_436061877.gz.bonafide_final.tsv"
bonafide_anno_filt_fp = "/lustrehome/pietrolucamazzacuva/filezilla-recas/tissues/independent_datasets/a549/outTable_192318299_outTable_436061877_bona_fide_sites_2.tsv"

couple3_join = join_bonafides(bonafide_fp, bonafide_anno_filt_fp)

Bonafide not-filted with REDItools info:


Unnamed: 0,wt_Region,wt_Position,wt_Reference,wt_Strand,wt_Coverage-q30,wt_MeanQ,"wt_BaseCount[A,C,G,T]",wt_AllSubs,wt_Frequency,ko_Region,...,gStrand,gCoverage-q30,gMeanQ,"gBaseCount[A,C,G,T]",gAllSubs,gFrequency,Class,Class_binary,wt_sample,ko_sample
0,chrX,216254,A,1,273,48.92,"[261, 0, 12, 0]",AG,0.04,chrX,...,1,37,37.54,"[37, 0, 0, 0]",-,0.00,Editing,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...
1,chrX,216269,A,1,313,50.32,"[304, 0, 9, 0]",AG,0.03,chrX,...,1,35,36.34,"[35, 0, 0, 0]",-,0.00,Editing,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...
2,chrX,216272,A,1,332,49.94,"[312, 0, 20, 0]",AG,0.06,chrX,...,1,35,37.43,"[35, 0, 0, 0]",-,0.00,Editing,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...
3,chrX,216273,A,1,331,49.74,"[320, 0, 11, 0]",AG,0.03,chrX,...,1,34,37.21,"[34, 0, 0, 0]",-,0.00,Editing,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...
4,chrX,216309,A,1,425,54.68,"[416, 0, 9, 0]",AG,0.02,chrX,...,1,35,35.94,"[35, 0, 0, 0]",-,0.00,Editing,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8830,chr8,144992361,A,0,1099,50.93,"[604, 0, 495, 0]",AG,0.45,chr8,...,0,79,34.58,"[33, 0, 46, 0]",AG,0.58,Not-Editing,0,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...
8831,chr8,145001031,A,0,926,50.92,"[513, 0, 413, 0]",AG,0.45,chr8,...,0,75,35.93,"[43, 0, 32, 0]",AG,0.43,Not-Editing,0,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...
8832,chr8,145677011,A,0,182,46.22,"[0, 0, 182, 0]",AG,1.00,chr8,...,0,98,36.48,"[0, 0, 98, 0]",AG,1.00,Not-Editing,0,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...
8833,chr8,145693720,A,1,157,55.40,"[0, 0, 157, 0]",AG,1.00,chr8,...,1,133,35.52,"[0, 0, 133, 0]",AG,1.00,Not-Editing,0,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...


Bonafide annotated and filtered (NON-REP only Refseq):


Unnamed: 0,Region,Position,Class,RMSK-Rep,RMSK-Reg,RefSeq-Rep,RefSeq-Reg
0,chr1,888639,0,-,-,"transcript,CDS,exon",NOC2L
1,chr1,888659,0,-,-,"transcript,CDS,exon",NOC2L
2,chr1,949654,0,-,-,"transcript,CDS,exon",ISG15
3,chr1,979099,1,-,-,"transcript,CDS,exon",AGRN
4,chr1,981931,0,-,-,"transcript,CDS,exon",AGRN
...,...,...,...,...,...,...,...
8820,chrX,154283100,1,-,-,"transcript,exon,3UTR",FUNDC2
8821,chrX,154350220,1,SINE,AluJr,"transcript,3UTR,exon",BRCC3
8822,chrX,155252736,0,-,-,"exon,transcript",WASH6P
8823,chrX,155252758,0,-,-,"exon,transcript",WASH6P


Common sites annotated with REDItools info:


Unnamed: 0,wt_Region,wt_Position,wt_Reference,wt_Strand,wt_Coverage-q30,wt_MeanQ,"wt_BaseCount[A,C,G,T]",wt_AllSubs,wt_Frequency,ko_Region,...,Class_binary,wt_sample,ko_sample,Region,Position,Class_y,RMSK-Rep,RMSK-Reg,RefSeq-Rep,RefSeq-Reg
0,chrX,216273,A,1,331,49.74,"[320, 0, 11, 0]",AG,0.03,chrX,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,chrX,216273,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
1,chrX,216309,A,1,425,54.68,"[416, 0, 9, 0]",AG,0.02,chrX,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,chrX,216309,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
2,chrX,216326,A,1,406,57.01,"[403, 0, 3, 0]",AG,0.01,chrX,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,chrX,216326,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
3,chrX,216330,A,1,419,57.22,"[403, 0, 16, 0]",AG,0.04,chrX,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,chrX,216330,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
4,chrX,216336,A,1,431,56.97,"[422, 0, 9, 0]",AG,0.02,chrX,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,chrX,216336,1,SINE,AluSx1,"transcript,exon,3UTR",PLCXD1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8820,chr8,144992361,A,0,1099,50.93,"[604, 0, 495, 0]",AG,0.45,chr8,...,0,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,chr8,144992361,0,-,-,"exon,transcript,CDS",PLEC
8821,chr8,145001031,A,0,926,50.92,"[513, 0, 413, 0]",AG,0.45,chr8,...,0,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,chr8,145001031,0,-,-,"transcript,CDS,exon",PLEC
8822,chr8,145677011,A,0,182,46.22,"[0, 0, 182, 0]",AG,1.00,chr8,...,0,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,chr8,145677011,0,-,-,transcript,CYHR1
8823,chr8,145693720,A,1,157,55.40,"[0, 0, 157, 0]",AG,1.00,chr8,...,0,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,chr8,145693720,0,-,-,"transcript,CDS,exon",KIFC2


Discared sites (via left join):


Unnamed: 0,wt_Region,wt_Position,wt_Reference,wt_Strand,wt_Coverage-q30,wt_MeanQ,"wt_BaseCount[A,C,G,T]",wt_AllSubs,wt_Frequency,ko_Region,...,Class_binary,wt_sample,ko_sample,Region,Position,Class_y,RMSK-Rep,RMSK-Reg,RefSeq-Rep,RefSeq-Reg
0,chrX,216254,A,1,273,48.92,"[261, 0, 12, 0]",AG,0.04,chrX,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,,,,,,,
1,chrX,216269,A,1,313,50.32,"[304, 0, 9, 0]",AG,0.03,chrX,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,,,,,,,
2,chrX,216272,A,1,332,49.94,"[312, 0, 20, 0]",AG,0.06,chrX,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,,,,,,,
122,chrX,40440213,A,1,138,42.3,"[135, 0, 3, 0]",AG,0.02,chrX,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,,,,,,,
1172,chr11,93917323,A,1,128,51.74,"[124, 0, 4, 0]",AG,0.03,chr11,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,,,,,,,
2281,chr15,40687855,A,1,50,48.24,"[46, 0, 4, 0]",AG,0.08,chr15,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,,,,,,,
2909,chr19,36605791,A,0,671,49.13,"[667, 0, 4, 0]",AG,0.01,chr19,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,,,,,,,
3082,chr19,50640340,A,0,311,49.35,"[308, 0, 3, 0]",AG,0.01,chr19,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,,,,,,,
5607,chr1,16126621,A,0,175,57.59,"[172, 0, 3, 0]",AG,0.02,chr1,...,1,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,,,,,,,
6634,chrY,21153609,A,0,5083,54.07,"[0, 0, 5083, 0]",AG,1.0,chrY,...,0,SRR12492047.SRR12492048.control.outTable_19231...,SRR12492029.SRR12492030.siADARs.outTable_43606...,,,,,,,


Save on disk joined with annotation and filtering to : /lustre/bio_running/new_basecaller/REDINET_TEST_30_07_2024/REDInet/Package/Results/a549/SRR12492047.SRR12492048.control.outTable_192318299.gz_vs_SRR12492029.SRR12492030.siADARs.outTable_436061877.gz.bonafide_final.tsv.anno.filt_refseq.tsv
