Permalink
Browse files

cutting by length added

	modified:   fatool/fa.py
	modified:   fatool/tests/test_fa.py
  • Loading branch information...
1 parent 3f0ed87 commit 8d73b4acfcaebf57ab6004ba150cfebca03bf78e @blazejmarciniak blazejmarciniak committed Jan 29, 2017
Showing with 29 additions and 19 deletions.
  1. +12 −5 fatool/fa.py
  2. +17 −14 fatool/tests/test_fa.py
View
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
-import re
+import re #gex as re
import math
from fatool import Sequence
import logging
@@ -51,11 +51,10 @@ def load_from_file(file):
@staticmethod
def load_content(content):
- #print content
- nc = content.split('>')
+ ncs = re.findall(re.compile('(?=(^>[\S\s]+?)(^>|\Z))',re.M), content)
contigs_list = []
- for r in nc[1:]:
- contigs_list.append(Sequence('>'+r.split('\n', 1)[0].rstrip(), re.sub('^>.*\n', '', '>'+r.rstrip())))
+ for r in ncs:
+ contigs_list.append(Sequence(r[0].split('\n', 1)[0].rstrip(), re.sub('^>.*\n', '', r[0].rstrip())))
return contigs_list
def write(self, fafile):
@@ -127,6 +126,14 @@ def remove(self, contigs_name_list):
if not r.name in contigs_name_list:
new_contig_list.append(r)
return Fa(new_contig_list, 'rem_'+self.name)
+
+ def cut_min_len(self, min_len):
+ nc = []
+ for r in self.contigs:
+ if len(r) > min_len:
+ nc.append(r)
+ return Fa(nc,'cutof_'+str(min_len)+self.name)
+
def validate(self):
'''
View
@@ -24,11 +24,11 @@ def test_setUpFa(self):
f = Fa(cl, 'test-fa')
self.assertEqual(cl, f.contigs)
self.assertEqual('test-fa', f.name)
- self.assertEqual({'name':0, 'name2':1, 'name3':2}, f.contigs_idx)
+ self.assertEqual({'>name':0, '>name2':1, '>name3':2}, f.contigs_idx)
cl.append('something')
with self.assertRaises(TypeError):
Fa(cl, 'name4')
- '''
+
def test_str(self):
cl = []
cl.append(Sequence('>name', 'ACTGactg'))
@@ -69,29 +69,32 @@ def test_add_contigs(self):
def test_show_names(self):
cl = [Sequence('>name', 'ACTGactg'), Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')]
f = Fa(cl, 'test-fa')
- self.assertEqual(['name','name2','name3'], f.show_names())
+ self.assertEqual(['>name','>name2','>name3'], f.show_names())
f.add_contig(Sequence('>name2', 'ACTGaaaaaaa'), 1)
- self.assertEqual(['name','name3','name2'], f.show_names())
+ self.assertEqual(['>name','>name3','>name2'], f.show_names())
f.add_contig(Sequence('>name7', 'ACTGaaaaaaa'), 1)
- self.assertEqual(['name','name3','name2','name7'], f.show_names())
+ self.assertEqual(['>name','>name3','>name2','>name7'], f.show_names())
def test_extract(self):
cl = [Sequence('>name', 'ACTGactg'), Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')]
f = Fa(cl, 'test-fa')
self.assertEqual(cl, f.contigs)
cl2 = [Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')]
- self.assertEqual(cl2, f.extract(['name2', 'name3']).contigs)
- self.assertEqual('extr_test-fa', f.extract(['name2', 'name3']).name)
- self.assertEqual(cl2, f.extract(['name2', 'name3', 'name321']).contigs)
+ self.assertEqual(cl2, f.extract(['>name2', '>name3']).contigs)
+ print 'printing contigs'
+ for c in f.extract(['name2', 'name3']).contigs:
+ print c
+ self.assertEqual('>extr_test-fa', f.extract(['>name2', '>name3']).name)
+ self.assertEqual(cl2, f.extract(['>name2', '>name3', '>name321']).contigs)
def test_remove(self):
cl = [Sequence('>name', 'ACTGactg'), Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')]
f = Fa(cl, 'test-fa')
- self.assertEqual([Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')], f.remove(['name']).contigs)
- self.assertEqual([Sequence('>name', 'ACTGactg')], f.remove(['name2','name3']).contigs)
- self.assertEqual([Sequence('>name', 'ACTGactg')], f.remove(['name2','name3','name234']).contigs)
- self.assertEqual([Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')], f.remove(['name']).contigs)
+ self.assertEqual([Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')], f.remove(['>name']).contigs)
+ self.assertEqual([Sequence('>name', 'ACTGactg')], f.remove(['>name2','>name3']).contigs)
+ self.assertEqual([Sequence('>name', 'ACTGactg')], f.remove(['>name2','>name3','>name234']).contigs)
+ self.assertEqual([Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')], f.remove(['>name']).contigs)
def test_statistics(self):
cl = [Sequence('>name', 'ACTGactg'), Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN'), Sequence('>name4', 'CTNAC')]
@@ -187,7 +190,7 @@ def tearDown(self):
os.remove('f2.fa')
os.remove('test.fa')
pass
- '''
+
def test_conv_to_fq(self):
cl = []
test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGGGATCAGCATTGGCGGAATTACCGACAGTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGGCCGTTTTTGTTGGATAAAAGACCCACAGGGCGCCATTATTGCGGCCATTAGCTATTTAAAACGTTGATGTAA'
@@ -198,4 +201,4 @@ def test_conv_to_fq(self):
print fq
if __name__ == "__main__":
- unittest.main()
+ unittest.main()

0 comments on commit 8d73b4a

Please sign in to comment.