Skip to content

Commit

Permalink
Merge pull request yhoogstrate#42 from yhoogstrate/3_0_6_arriba
Browse files Browse the repository at this point in the history
changes to support arriba
  • Loading branch information
yhoogstrate committed May 6, 2020
2 parents d382a92 + 1e103a0 commit dac3af2
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 31 deletions.
4 changes: 4 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
2020-05-06 Youri Hoogstrate

* Version 3.0.6: Support for ARRIBA - thanks to Alexandre Rouette

2016-07-11 Youri Hoogstrate

* Version 3.0.5: Changes in deployment & two fixes in export function
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@ FuMa supports the following file formats:
| Fusion Catcher | final-list_cand*.txt | fusion-catcher_final
| FusionMap | | fusionmap
| JAFFA | jaffa_results.cvs | jaffa
| ARRIBA | | arriba
| Trinity + GMAP | | trinity-gmap
| OncoFuse | | oncofuse
| RNA STAR | Chimeric.out.junction | rna-star_chimeric
Expand Down
7 changes: 7 additions & 0 deletions bin/fuma
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ if __name__ == "__main__":
except Exception as e:
raise Exception("Sample '"+sample_name+ "' could not be parsed as filetype: "+input_format+"\n\n"+str(e))

# ARRIBA
elif(input_format_stripped in ["arriba"]):
try:
samples[sample_name] = ReadArribaResults(sample_filename,sample_name)
except Exception as e:
raise Exception("Sample '"+sample_name+ "' could not be parsed as filetype: "+input_format+"\n\n"+str(e))

# 1-2-3-SV
elif(input_format_stripped in ["123sv"]):
try:
Expand Down
55 changes: 55 additions & 0 deletions fuma/Readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1539,3 +1539,58 @@ def parse_path(self,path_chunk):
m = re.search(self.regexes[key],line)
keys[key] = m.groups()
return keys






class ReadArribaResults(FusionDetectionExperiment):
parse_left_column = 4
parse_right_column = 5

logger = logging.getLogger("FuMa::Readers::ReadArribaResults")

def __init__(self,arg_filename,name):
FusionDetectionExperiment.__init__(self,name)

self.filename = arg_filename

self.parse()

def parse(self):
self.logger.info("Parsing file: "+str(self.filename))

self.i = 0

with open(self.filename,"r") as fh:
for line in fh:
line = line.strip()
if(len(line) > 0):
if(self.i > 0):# otherwise it's the header
self.parse_line(line)

self.i += 1

self.logger.debug("Parsed fusion genes: "+str(len(self)))

def parse_line(self,line):
line = line.strip().split("\t")

left = line[self.parse_left_column].strip('"').split(':')
right = line[self.parse_right_column].strip('"').split(':')

f = Fusion( \
left[0], \
right[0], \
left[1], \
right[1], \
None, \
None, \
self.name, \
str(self.i), \
False # The authors claim that for this tool acceptator donor strand is not preserved - therefore this has to be false
)
self.add_fusion(f)


2 changes: 1 addition & 1 deletion fuma/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<http://epydoc.sourceforge.net/manual-fields.html#fields-synonyms>
"""

__version_info__ = ('3', '0', '5')
__version_info__ = ('3', '0', '6')
__version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3])+"-"+__version_info__[3]
__author__ = 'Youri Hoogstrate'
__author_email__ = '_@.'
Expand Down
26 changes: 26 additions & 0 deletions tests/data/test_Readers.TestReadARRIBAfile.test_01.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#gene1 gene2 strand1(gene/fusion) strand2(gene/fusion) breakpoint1 breakpoint2 site1 site2 type direction1 direction2 split_reads1 split_reads2 discordant_mates coverage1 coverage2 confidence closest_genomic_breakpoint1 closest_genomic_breakpoint2 filters fusion_transcript reading_frame peptide_sequence read_identifiers
KMT2A MLLT10 +/+ +/+ 11:118482495 10:21651673 splice-site splice-site translocation downstream upstream 13 2 0 99 302 high . . duplicates(31) ATCCAAACAGGCCACCACTCCAGCTTCCAGGAAGTCAAGCAAGCAGGTCTCCCAGCCAGCACTGGTCATCCCGCCTCAGCCACCTACTACAGGACCGCCAAGAAAAGAAGTTCCCAAAACCACTCCTAGTGAGCCCAAGAAAAAGCAGCCTCCACCACCAGAATCAG___GTCCAGAGCAGAGCAAACAGAAAAAAGTGGCTCCCCGCCCAAGTATCCCTGTAAAACAAAAACCAAAAGAAAAG|AAATATAAAGAGAAGGACAAACACAAACAGAAACACAAGAAGCAGCCAGAACCATCACCTGCATTGGTTCCATCCTTGACTGTTACTACAGAAAAA___ACTTATACAAGCACTAGCAAC in-frame SKQATTPASRKSSKQVSQPALVIPPQPPTTGPPRKEVPKTTPSEPKKKQPPPPESGPEQSKQKKVAPRPSIPVKQKPKEK|KYKEKDKHKQKHKKQPEPSPALVPSLTVTTEKTYTSTS .
KMT2A MLLT10 +/+ +/+ 11:118482092 10:21651673 splice-site splice-site translocation downstream upstream 1 0 0 194 302 low . . . CACTGGTCATCCCGCCTCAGCCACCTACTACAGGACCGCCAAGAAAAGAAGTTCCCAAAACCACTCCTAGTGAGCCCAAGAAAAAGCAGCCTCCACCACCAGAATCAG|AAATATAAAGAGAAGGACAAACACAAACAGAAACACAAGAAGCAGCC out-of-frame LVIPPQPPTTGPPRKEVPKTTPSEPKKKQPPPPES|ei* .
MLLT10 RP11-460I19.2(14723),RNF212(12555) +/+ ./+ 10:21538912 4:1043695 splice-site intergenic translocation downstream upstream 1 7 2 138 42 high . . duplicates(26) ACCCGCTGGTTTATTGCGACGGGCACGGCTGCAGCGTCGCGGTGCATCAAG___CTTGCTATGGCATTGTTCAAGTACCCACTGGACCaTGGTTTTGCAGGAAATGTGAATCTCAGGAGAGAGCAGCCAGAGTG|CTCCTGCCAATTCCAACACTGAGAGTCTGTGTTTGGACGCCTCCAGTGACCACTGGCTCCGCGAGTGATGACTCACACCCATCACTCATCCCCTTCCTGAGTCAGCCGCGGGACCCTCTGCAGGGAGGCGTGCGAGTCATCGAG___AGAACAGCATTATTAGTGGC out-of-frame PLVYCDGHGCSVAVHQACYGIVQVPTGPWFCRKCESQERAARV|llpiptlrvcvwtppvttgsasddshpslipflsqprdplqggvrviertallv .
RP11-397P14.3 AKR1E2 -/+ +/+ 10:4889923 10:4830675 exon splice-site duplication/3'-3' downstream upstream 5 2 2 60 445 high . . duplicates(37) AGAATATCCAG___GCTCTCTTTTTGCCTGCTGCCATCCATGTAAGACGCGACTTGTTTCTCCTTGCCTTCTGTCATGATTGTGAGGCTTCCCCAGACACTTGGAACT___TGCCGTTGGCAATCTTCTTTCGGATGGCCTGCCCGATGCCTTCTTCATTCAG|GCTTCTCCAGGGAAAGTGACCGAGGCAGTGAAAGAGGCCATTGACGCAGGGTACCGGCACTTCGACTGTGCTTACTTTTACCACAATGAGAGGGAGGTTGGAGCAGGGATCCGTTGCAAGATCAAGGAAGGCGCTGTAAGACGGGAG . . .
KANSL1 RP11-707O23.1(5132),RP11-707O23.5(2368) -/- ./- 17:46170855 17:45598501 splice-site intergenic deletion upstream downstream 1 2 1 470 90 high . . duplicates(11) AGAGATTTACAGCTAGTGGCATAGCCAACTTGAGGTGCAGTGAACAGGCATTTGATTCAGATGTCACTGACAGTAGTTCAGGAGGGGAGTCTGATATTGAAGAGGAAGAACTGACCAGAGCTGATCCCGAGCAGCGTCATGTACCCCT|AATTCTCAATCGCAATCCTCTGACAACTGTTGAAGATCCGTATCTCTTTAAATTACTGGCATTAAAATATCt___AGACGTGGGAACAACGCAAGTCCCACTTACAACACTTAAGAACATTCTCATG out-of-frame RFTASGIANLRCSEQAFDSDVTDSSSGGESDIEEEELTRADPEQRHVPl|ilnrnplttvedpylfkllalkyldvgttqvplttlknil .
HELLPAR C12orf48 +/+ +/+ 12:102205152 12:102148230 exon splice-site duplication downstream upstream 0 1 3 51 289 high . . duplicates(27) CTCTTGTTTTGCTCCAAGAATTGGAGTGG|CACAGTGGAGAATTTACAGTCTCTCTCAGTGATGTTTTATTGACATGGAAATACTTGCTCCATGAGAAATTG...TGGACGTGACTGACCATTATGAGGACGTTAGGAAGATTTATGATGATTTCTTGAAGAACAGTAATATGTTAGATCTGATTGATGTTTATCAAAAATGTAGG . . .
CDH12P3(63905),RP11-1415C14.4(377) GUSBP3 ./- -/- 5:70196878 5:69642289 intergenic splice-site deletion upstream downstream 1 2 0 20 76 high . . duplicates(9) CCAAGGATGAAGGAAAAACTGGACCTCATTATGGATTTACTTTTGGGATACACTCATTATTCCAGAGGAGGGTAAAAGGCTGAGAAGCTTAAG|GATGGTGATTGCTCACACCAAAGCCTTGGACCCCTCCCAGCCTGTGACCTTTGTGACCAACTCCACCTACGCAGCAGACAAGGGGG . . .
RP11-524N5.1 CTC-436K13.2(38552),RP11-524N5.1(3318) -/+ ./+ 5:158359646 5:158317365 intron intergenic duplication downstream upstream 27 14 3 104 470 medium . . duplicates(134) CGAGAAGGGAGGCAGATCAG___GTTTTTCCTTTTACCAGTGTGGACTAGAGGAAAAGTGATCGACAAAGAGGCAAGGAGAGAATGACAAACAGGAG___...CTTACATTCTAGCCAGGAAAAATGGATAGAAATAAGAAAATGAATACATGAATGATCTCAGAGAAGG___GTATATAGTATTGAGCAATGATAGCATTTTGCTTAAGGAAACTGACTGCATACAGTTGTTCCCAAG|ATGAGAAAACTGAGTCCCAAGGGGTTAAGTGATTTGCTCAAGGCAATGGCAGAGGAGGAACCAGGACCCAATTTTCTCAACTGTGAGCCAAG___GTTTTTCCTTTTACCAGTGTGGACTAGAGGAAAAGTGATCGACAAAGAGGCAAGGAGAGAATGACAAACAGGAG . . .
RP11-392E22.9 ANKRD18A -/- -/- 9:38575476 9:38588663 splice-site splice-site duplication upstream downstream 14 10 1 208 980 medium . . duplicates(84),small_insert_size(3) GGAAGCCTTTGCAGGAGCAGTGAAAGCTAACAATTCCATGTCAAAAAAATTAATGAA___ATCGGATAAGAAAATAGCAGTGATCAGCACCAAGCTCTTTACGGAGAAACAGCGGATGAAATATTTTCTCAGCACTCTTCCTACAAGGCCAGAACCAGAGTTACCTTGTGTTGAAAATCTTAATAGTATAGAACTCAACAGAAAATATATTCCCAAAACGGCCATAAGAATTCCTACTTCAAACCCACAGACTTCAAATAACTGCAAGAACTTCTTGACTGAG|CCTGAAGAAAAACATGAAGAATTCAGAAAACTTTTTGAATTAATATCATTACTGAACTATACTGCGGACCAAATAAGAAAGAAAAATCGTGAATTAGAAGAAGAGGCAACTGG___ATATAAGAAATGCCTAGAAATGACAATAAATATGTTAAATGCATTTGCAAATGAGGACTTCAGTTGCCATGGAGACTTAAATACAGACC . . .
RP11-392E22.9 ANKRD18A -/- -/- 9:38575476 9:38586312 splice-site splice-site duplication upstream downstream 6 12 0 208 262 medium . . duplicates(40) AGCAGTGATCAGCACCAAGCTCTTTACGGAGAAACAGCGGATGAAATATTTTCTCAGCACTCTTCCTACAAGGCCAGAACCAGAGTTACCTTGTGTTGAAAATCTTAATAGTATAGAACTCAACAGAAAATATATTCCCAAAACGGCCATAAGAATTCCTACTTCAAACCCACAGACTTCAAATAACTGCAAGAACTTCTTGACTGAG|ATATAAGAAATGCCTAGAAATGACAATAAATATGTTAAATGCATTTGCAAATGAGGACTTCAGTTGCCATGGAGACTTAAATACAGACC out-of-frame AVISTKLFTEKQRMKYFLSTLPTRPEPELPCVENLNSIELNRKYIPKTAIRIPTSNPQTSNNCKNFLTE|i* .
RP11-392E22.9 ANKRD18A -/- -/- 9:38575476 9:38578148 splice-site splice-site duplication upstream downstream 0 1 0 208 212 low . . duplicates(5) CAAATAACTGCAAGAACTTCTTGACTGAG|TTTAATGATCTTGTGGCCGAGAAGGAAGCTGTGTCTTCAGAATGTGTCAATTTGGCCAAAGACAATGAAGTTCTTCATCAGGAGTTATTATCTATGAGAAATGTACAAGAG in-frame NNCKNFLTE|FNDLVAEKEAVSSECVNLAKDNEVLHQELLSMRNVQ .
RNU6-3P SNORA71B -/- -/- X:141118029 20:38425322 splice-site exon translocation upstream downstream 25 0 0 866 3 medium . . duplicates(69) AGCACATATACTAAAATTGGAACGATACAGAGAAGATTAGCATGGCCCCTGCGCAAGGATGACACGCAAATTCGTGAAGCGTTCCATATTTTTT|CGAAAGTGATCGTGGGCTGCCTTTGCCCTGGTCATTGATAGTGCAGGGAGAGGAATCAATGAAAGCGCTTCCCCGTGTTTGAAGG . . .
RNU6-3P SNORA71D -/- -/- X:141118029 20:38433989 splice-site exon translocation upstream downstream 19 0 0 866 13 medium . . duplicates(49) CAGCACATATACTAAAATTGGAACGATACAGAGAAGATTAGCATGGCCCCTGCGCAAGGATGACACGCAAATTCGTGAAGCGTTCCATATTTTTT|CGAAAGTGATCGTGGGCTGCCTGTGCCCTGGTCATTGATAGTGCAGGGAGAG . . .
RNU6-9 SNORA20 +/+ -/- 19:893592 6:159780371 splice-site exon translocation downstream downstream 17 0 0 322 62 medium . . duplicates(62) CTTCGGCAGCACATATACTAAAATTGGAACGATACAGAGAAGATTAGCATGGCCCCTGCGCAAGGATGACACGCAAATTCGTGAAGCGTTCCATATTTTTTT|ATTTGCTGCTTGTAGTCTCACAGTGATACGAGCAGTTATACGCATGGGATAAAATAACATTGGGC . . .
OR10Z1 CTD-2341M24.1(92506),RP11-26L16.1(110092) +/- ./- 1:158611080 14:86222284 3'UTR intergenic translocation upstream downstream 0 2 1 1155 10 medium . . duplicates(9) CGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGCTTGTGTTTGTGTGCATATTACATTTATTGTAGGATCT?AAAAAATCTCAAG|TTTGGAAGGATGAGAAGGC...AAGACTAAGTAAGAACATGTATACAGGAAGCAATAAAGTATTGACAAGCCAATAATCATGACTGAGGGACAAATGAGATAACCCAAAGTCCAGCTCACATC . . .
ANKRD18A ANKRD18A -/- -/- 9:38575476 9:38588663 splice-site splice-site duplication/non-canonical_splicing upstream downstream 14 10 1 208 980 low . . duplicates(84) GGAAGCCTTTGCAGGAGCAGTGAAAGCTAACAATTCCATGTCAAAAAAATTAATGAA___ATCGGATAAGAAAATAGCAGTGATCAGCACCAAGCTCTTTACGGAGAAACAGCGGATGAAATATTTTCTCAGCACTCTTCCTACAAGGCCAGAACCAGAGTTACCTTGTGTTGAAAATCTTAATAGTATAGAACTCAACAGAAAATATATTCCCAAAACGGCCATAAGAATTCCTACTTCAAACCCACAGACTTCAAATAACTGCAAGAACTTCTTGACTGAG|CCTGAAGAAAAACATGAAGAATTCAGAAAACTTTTTGAATTAATATCATTACTGAACTATACTGCGGACCAAATAAGAAAGAAAAATCGTGAATTAGAAGAAGAGGCAACTGG___ATATAAGAAATGCCTAGAAATGACAATAAATATGTTAAATGCATTTGCAAATGAGGACTTCAGTTGCCATGGAGACTTAAATACAGACC in-frame EAFAGAVKANNSMSKKLMKSDKKIAVISTKLFTEKQRMKYFLSTLPTRPEPELPCVENLNSIELNRKYIPKTAIRIPTSNPQTSNNCKNFLTE|PEEKHEEFRKLFELISLLNYTADQIRKKNRELEEEATGYKKCLEMTINMLNAFANEDFSCHGDLNTD .
WASHC2A GLCCI1 +/+ +/+ 10:50110138 7:8086902 CDS 3'UTR translocation downstream upstream 0 0 4 74 402 low . . duplicates(3) GCAAAAGCCTCCGAGCTCTCCAAAAAGAAAGCATCTGCCCTGTTGTTCAGCAGTGATGAGGAG___GACCAGTGGAATATTCCTGCTTCACAGACCCACTTAGC...|...ATGGAAAATACTGTAATTCAGGATTATGTTTACAATTGATCCAGGTGTTTGTTTCTAACTTCTGTAATACATACAATGCAAAAAAAAAAAAAAAAAAATGGC . . .
COX7C RP11-477B16.2(107232),RP11-74A12.2(91794) +/+ ./+ 5:86620690 13:94868247 3'UTR intergenic translocation downstream upstream 0 0 4 5370 13 low . . duplicates(2) GCATTTGCTACACCCTTCCTTGTAGTAAGACACCAACTGCTTAAAACATAAGGATGTTTCAGTTCCTCCATTTAACAG___ATATGAAGAGCATTTTAAGAGGT...|...AACAAGTGCTGACAAGGATGTGTAGAAATTGAAACCCTCATGCATTCCTGGTGGGAATGCAAAATGGTGCATTCACTGCAAACCAAAAAAAAAAAAAAAGTTT . . .
CMTM6 AC093113.1(165002),RP11-638D14.1(79189) -/- ./+ 3:32483823 2:146754312 3'UTR intergenic translocation upstream upstream 0 0 4 4080 17 low . . . CCTGCATTGTGGTGCCTGAGCCCTGGCAGAAGCTCTTGTAAAATTTGTTAATTGTTTAAACCACTTCTTTTGGAGAGCAAGGGGAAGGTCAAGAAGGCAGT...|...ATgGAGAAGTAGTGGGAGAAAAAAAGTTCAAATGATAGAAGTTGGCAAATGCCAATCTAAGGATTTCAAATTTATCTTTTTTTTTTTTTTTTTTTTTT . . .
PYURF KDM6A(32878),CXorf36(2894) -/- ./+ 4:88521038 X:45145480 3'UTR intergenic translocation upstream upstream 0 0 3 186 8 low . . duplicates(1) CGAATGTGATATAAAACCACATAATCAAATAGAAACTTCATGTACTTACAAAAACTGAGTTTGTAAAATTACCTTCATTTCTTTGACATTAAATGCTTATA...|...TACAAAAAAATTTTAAAAAATTAGCCAGGCGTGGTGCTACCACTGCACTCCAGCCTGGACAAAGCCCTGTCTCcAAAAAAAAAAAAAAAAAAAAGAGTG . . .
AC006369.3(15150),AC010878.3(54719) CDC42EP3 ./- -/- 2:37762196 2:37719944 intergenic splice-site deletion/read-through upstream downstream 1 2 0 13 14 low . . duplicates(10) CACCGTGATCCTTAACCAGAAGTCATTTTGTGCTCTTTCAACTGCATTCTGCAAATTTGTATTCCACAGTGCAGGAAAGATC|GCAGAAACGAGTAATGTGAGAGCCTGCAATTGTAATCTTCATAAAGTATATCACATACCGTTCAAAAACATCAAGTGGCATGTTAAATAAGCTTTTGGAGAATAAGTTGAGCGTGAC . . .
SMARCA5 RP1-241P17.1(12064),RP11-761E20.1(62157) +/+ ./+ 4:143546801 X:115855114 CDS intergenic translocation downstream upstream 0 0 3 5123 1 low . . . AACAGCTTAAAATTGATGAAGCTGAATCCCTTAATGATGAAGAGTTAGAGGAAAAAGAGAAGCTTCTAACACAG___GGATTTACCAATTGGAATAAGAGAGA...|...TTcTGAGtgTGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAAAGAGAAAGAGAGAGAGAAAGAGAAAGAGACAGAGAAAAGAAACTATGTTG . . .
CKAP4 SGCB -/- -/- 12:106238010 4:52031703 3'UTR intron translocation upstream downstream 0 3 0 207 8 low . . . CTGAGCTGTATTGTTCTTTAATGGCTGTCTTGCCCTTCCAAAAAAAATTGAAAA|CTCCTAAACCATGGAAAAAATAGAAACTCATGGAATAGAGAAAATGC...TCAATGAAACAAGAATAACATGCCATTTAAAAGGCACAAGCCAGGCACAGTGGCTCATGCCTGATAAAGTGAGACCCTGTCTCTACCAAAAAAAAAAAAAAT . . .
CTD-2013M15.1(4379018),EMB(121204) EMB ./- -/- 5:50274988 5:50410965 intergenic splice-site duplication upstream downstream 1 2 0 83 16723 low . . duplicates(6) TACTAAAGGCTTACTATTGGTAAGACCTATAGAACAGTGCGTTTGGAAGCGGACTCACCcAGCATTCTATTTAATCACTCTGTAGAAGCTAAG|GTTCACCATCATTAATAGCAAACAAATGGGAAGTTATTCTTGTTTCTTTCGAGAGGAAAAGGAACAAAGGGGAACATTTAATTTCAAAG___TCCCTGAACTTCATG . . .
KIAA1586(4626),ZNF451(26979) KIAA1586 ./+ +/+ 6:57059865 6:57052686 intergenic splice-site duplication downstream upstream 1 0 0 16 373 low . . duplicates(3) TAAACAGACTCAAAGAAG___GCATTTTAAACTGAATAGGTCCAAAGCTGAATTTATTCATCTCTCATCATATCTGTTTTGAAAAATGGCCCAACCATTTCCCTTGATGCTCAAACTAGAAAG|ATTCTGTTTCCTAAAATGCCAAAACGACAG . . .
39 changes: 9 additions & 30 deletions tests/test_Readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from fuma.Readers import ReadSOAPFuseTranscripts
from fuma.Readers import ReadEricScriptResultsTotal
from fuma.Readers import ReadJaffaResults
from fuma.Readers import ReadArribaResults


class TestReadChimeraScanAbsoluteBEDPE(unittest.TestCase):
Expand Down Expand Up @@ -225,41 +226,19 @@ def test_01(self):
self.assertEqual(fusions[1].right_strand , STRAND_FORWARD )


class TestReadJaffaResults(unittest.TestCase):
class TestReadArribaResults(unittest.TestCase):
def test_01(self):
""" Tests whether files of input format from SOAPFusion can
be parsed
Fusion (from dataset 'test'): chr20:46365686(?)<->chr20:47538547(?)
Fusion (from dataset 'test'): chr17:59445688(?)<->chr20:49411710(?)
Fusion (from dataset 'test'): chr17:37793484(?)<->chr20:53259997(?)
Fusion (from dataset 'test'): chr17:57917129(?)<->chr17:57992064(?)
"""
fusions = ReadArribaResults("tests/data/test_Readers.TestReadARRIBAfile.test_01.txt","test")
self.assertEqual(len(fusions) , 25)

fusions = ReadJaffaResults("tests/data/test_Readers.TestReadJaffaResults.test_01.txt","test")

self.assertEqual(len(fusions) , 4)

self.assertEqual(fusions[0].get_left_chromosome(True) , 'chr20')
self.assertEqual(fusions[0].get_right_chromosome(True) , 'chr20')
self.assertEqual(fusions[0].left_break_position , 46365686)
self.assertEqual(fusions[0].right_break_position , 47538547)
self.assertEqual(fusions[0].get_left_chromosome(True) , 'chr10')
self.assertEqual(fusions[0].get_right_chromosome(True) , 'chr11')
self.assertEqual(fusions[0].left_break_position , 21651673)
self.assertEqual(fusions[0].right_break_position , 118482495)
self.assertEqual(fusions[0].left_strand , None)
self.assertEqual(fusions[0].right_strand , None)
self.assertEqual(fusions[0].acceptor_donor_direction , None)

#dataset 'test'): chr17:59445688(?)<-chr20:49411710(?)
self.assertEqual(fusions[1].get_left_chromosome(True) , 'chr17')
self.assertEqual(fusions[1].get_right_chromosome(True) , 'chr20')
self.assertEqual(fusions[1].left_break_position , 59445688)
self.assertEqual(fusions[1].right_break_position , 49411710)
self.assertEqual(fusions[1].left_strand , None)
self.assertEqual(fusions[1].right_strand , None)
self.assertEqual(fusions[0].acceptor_donor_direction , None)

# @todo
# comparing 2x test read jaffa results should give a exception:
# raise Exception("A fusion gene without an annotated acceptor-donor direction was used for acceptor-donor-order-specific-matching.\n\n"+fusion_1.__str__()+"\n"+fusion_2.__str__())



def main():
Expand Down

0 comments on commit dac3af2

Please sign in to comment.