Permalink
Browse files

Added cutNameMarkers functionality.

	modified:   README.md
	modified:   bin/cmdfatool.py
	modified:   fatool/fa.py
	modified:   fatool/sequence.py
	modified:   fatool/tests/test_sequence.py
  • Loading branch information...
1 parent c419afd commit 396276980e6e3d109747e0a253ec02fd9998ccfa @blazejmarciniak blazejmarciniak committed Jun 23, 2016
Showing with 135 additions and 17 deletions.
  1. +36 −0 README.md
  2. +44 −8 bin/cmdfatool.py
  3. +0 −1 fatool/fa.py
  4. +30 −4 fatool/sequence.py
  5. +25 −4 fatool/tests/test_sequence.py
View
@@ -174,3 +174,39 @@ optional arguments:
-f FAFILE, --fafile FAFILE file to show statistics usualy *.fa
--report REPORT log file if not supplied stdout
--operator [OPERATOR [OPERATOR ...]] user who have fired script it will be noted in log
+
+ findPrimer:
+
+usage: cmdfatool.py findPrimer [-h] -f FAFILE --start START --stop STOP --mode
+ {FF,FR} [--minlen MINLEN] [--maxlen MAXLEN]
+ [--mml MML] [--report REPORT]
+ [--operator [OPERATOR [OPERATOR ...]]]
+
+optional arguments:
+ -h, --help show this help message and exit
+ -f FAFILE, --fafile FAFILE
+ file to show statistics usualy *.fa
+ --start START strat codon 5'
+ --stop STOP stop codon 3'
+ --mode {FF,FR} FF (start forward, stop forward) or FR (start 5' stop 3')
+ --minlen MINLEN minimum length (detfault 50bp)
+ --maxlen MAXLEN max length (detfault 1000bp)
+ --mml MML mismatch level number of allowed missmatches in primers (detfault 0)
+ --report REPORT report results into file if not supplied stdout
+ --operator [OPERATOR [OPERATOR ...]]
+ user who have fired script it will be noted in report
+
+
+ cutNameMarker:
+
+
+usage: cmdfatool.py cutNameMarker [-h] -f FAFILE -m MARKER -l LENGTH
+ --keepMarker KEEPMARKER [-o OUTPUT]
+
+optional arguments:
+ -h, --help show this help message and exit
+ -f FAFILE, --fafile FAFILE file to show statistics usualy *.fa
+ -m MARKER, --marker MARKER marker that indicates start of cut
+ -l LENGTH, --length LENGTH length of cut
+ --keepMarker KEEPMARKER weather to keep marker or not default 1 (Yes)
+ -o OUTPUT, --output OUTPUT output file default: output.fa
View
@@ -88,25 +88,43 @@ def main():
sub_s.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w'))
sub_s.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str)
sub_s.set_defaults(func=statistics)
-
- sub_fm = subparsers.add_parser('findMotif', help='finding given motif; display motif and its position in contig')
+ '''
+ sub_fm = subparsers.add_parser('findMotif', help='display motifs position in contig')
sub_fm.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True)
sub_fm.add_argument('--mml', help='mismatch level number of allowed missmatches in primers (detfault 0)', type=str, default=0)
sub_fm.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w'))
sub_fm.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str)
sub_fm.set_defaults(func=find_motif)
-
+ '''
sub_fp = subparsers.add_parser('findPrimer', help='display list of founded primers')
sub_fp.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True)
sub_fp.add_argument('--start', help='strat codon 5\'', type=str, required=True)
sub_fp.add_argument('--stop', help='stop codon 3\'', type=str, required=True)
- sub_fp.add_argument('--mode', help='FF (start forward, stop forward) or FR (start 5\' stop 3\')', type=str, choices=['FF', 'FR'], required=True)
+ sub_fp.add_argument('--mode', help='FF (start forward, stop forward) or FR (start 5\' stop 3\')', type=str, choices=['FF', 'FR'], default = 'FR', required=True)
sub_fp.add_argument('--minlen', help='minimum length (detfault 50bp)', type=int, default=50)
sub_fp.add_argument('--maxlen', help='max length (detfault 1000bp)', type=int, default=1000)
sub_fp.add_argument('--mml', help='mismatch level number of allowed missmatches in primers (detfault 0)', type=int, default=0)
sub_fp.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w'))
sub_fp.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str)
sub_fp.set_defaults(func=find_primers)
+
+ sub_cn = subparsers.add_parser('cutName', help='cuts name from position to given length')
+ sub_cn.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True)
+ sub_cn.add_argument('--start', help='start of cut', type=int, required=True)
+ sub_cn.add_argument('-l', '--length', help='length of cut', type=int, required=True)
+ sub_cn.set_defaults(func=cut_name)
+
+ sub_lnam = subparsers.add_parser('cutNameMarker', help='cuts name leaving defined number of chars after begining of marker')
+ sub_lnam.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True)
+ sub_lnam.add_argument('-m', '--marker', help='marker that indicates start of cut', type=str, required=True)
+ sub_lnam.add_argument('-l', '--length', help='length of cut', type=int, required=True)
+ sub_lnam.add_argument('--keepMarker', help='weather to keep marker or not default 1 (Yes)', type=int, required=True)
+ sub_lnam.add_argument('-o', '--output', help='output file default: output.fa', type=argparse.FileType('w'), default='output.fa')
+ #sub_lnam.add_argument('-d', '--outputDir', help='output directory where multiple contigs will be saved', type=str)
+ sub_lnam.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w'))
+ sub_lnam.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str)
+ sub_lnam.set_defaults(func=cut_name_pattern)
+
'''
sub_fap = subparsers.add_parser('findPrimer', help='show statistics of fa file')
sub_fap.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True)
@@ -122,9 +140,7 @@ def main():
#parser.add_argument('--report', help='log file if not supplied stdout', type=argparse.FileType('w'))
args = parser.parse_args()
- #if args.version:
- # print version
- # exit(0)
+
args.func(args)
@@ -366,17 +382,21 @@ def reverse(args):
fa = Fa.load_from_file(args.fafile)
fa.reverse()
fa.write(args.output)
+ rep += '\n\n------------------------------------------------------'
+ rep += '\nFinished:\t'+str(datetime.datetime.now())
if args.report:
with args.report as log_file:
log_file.write(rep)
else:
print rep
+
def find_motif(args):
print 'not available yet'
pass
def find_primers(args):
+ rep = str(make_log_header('reverse', args.operator))
fa = Fa.load_from_file(args.fafile)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
@@ -386,8 +406,24 @@ def find_primers(args):
rep += '\n================\n\t\t'+r.name+'\n'
for q in r.find_aprox_primers(args.start, args.stop, str(args.mode), int(args.mml), args.minlen, args.maxlen):
rep += q+'\n'
+ rep += '\n\n------------------------------------------------------'
+ rep += '\nFinished:\t'+str(datetime.datetime.now())
+ if args.report:
+ with args.report as log_file:
+ log_file.write(rep)
+ else:
+ print rep
+
+def cut_name_pattern(args):
+ rep = str(make_log_header('cutNameMarker', args.operator))
+ fa = Fa.load_from_file(args.fafile)
+ for r in fa.contigs:
+ r.leave_name_after_marker(args.marker, args.length, args.keepMarker)
+ fa.write(args.output)
- print rep
+def cut_name(args):
+ pass
+
if __name__ == '__main__':
exit(main())
View
@@ -11,7 +11,6 @@ def __init__(self, contigs_list, name):
logger = logging.getLogger(__name__)
logger.debug('creating Fa object')
- logger.info('but info works')
self.name = name
self.contigs = []
self.contigs_idx = {}
View
@@ -10,7 +10,7 @@
class Sequence(object):
def __init__(self, name, seq):
if Sequence.validate_name_string(name):
- self.name = name.lstrip('>')
+ self.name = name
else:
raise NameError('Sequence name have to start with ">"')
self.seq = seq
@@ -193,9 +193,35 @@ def cut(self, length, step):
contig_end = len(self.seq) # last position of contig
contig_list = [] # contig list returning by function
while i+length <= contig_end:
- contig_list.append(Sequence('>'+self.name+'_frag_'+str(i + 1)+':'+str(i + length), str(self.seq[i:i+length])))
+ contig_list.append(Sequence(self.name+'_frag_'+str(i + 1)+':'+str(i + length), str(self.seq[i:i+length])))
i = i+step
return contig_list
+
+ def cut_name(self, length, start = 0):
+ self.name = self.name[start:length]
+ print self.name
+
+ def leave_name_after_marker(self, mark, length = 0, keep_marker = 1):
+ m = re.search(re.escape(mark), self.name)
+ logger = logging.getLogger(__name__)
+ logger.setLevel(logging.DEBUG)
+ logger.debug(m)
+ logger.debug(keep_marker)
+ if m:
+ # keep original marker or skip it
+
+ if keep_marker == 1:
+ s = m.start()
+ else:
+ s = m.end()
+ # defined length or return string to end
+ if length > 0:
+ self.name = '>'+self.name[s:s+length].lstrip('>')
+ else:
+ self.name = '>'+self.name[s:].lstrip('>')
+ return 1
+ return 0
+
def reverse(self):
'''
@@ -207,7 +233,7 @@ def reverse(self):
rev = rev.translate(maketrans('ACTGactg', 'TGACtgac'))
# creating 80 chars lines
#rev = re.sub("(.{80})", '\\1\n', rev, 0)
- return Sequence('>rev_'+self.name, rev)
+ return Sequence('>rev_'+self.name.lstrip('>'), rev)
def normalize(self):
@@ -367,7 +393,7 @@ def __str__(self):
'''
creates nicely outputed string
'''
- return '>'+self.name+'\n'+re.sub("(.{80})", '\\1\n', self.seq, 0)+'\n'
+ return self.name+'\n'+re.sub("(.{80})", '\\1\n', self.seq, 0)+'\n'
def __len__(self):
@@ -12,7 +12,7 @@ def setUp(self):
def test_setUpSequence(self):
c = Sequence('>name', 'ACTGactg')
self.assertTrue( isinstance(c, Sequence) )
- self.assertEqual(c.name, 'name')
+ self.assertEqual(c.name, '>name')
self.assertEqual(c.seq, 'ACTGactg')
def test_contig_str(self):
@@ -376,11 +376,32 @@ def test_find_aprox_primers(self):
'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
]
- for r in c.find_aprox_primers('TTTT', 'GGGG', 'FF', 1,60,65):
- print r
-
+ #for r in c.find_aprox_primers('TTTT', 'GGGG', 'FF', 1,60,65):
+ # print r
+
self.assertEqual(t_TTTT_GGGG_FF_60_65, c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1,60,65))
+ def test_leave_name_after_marker(self):
+ c = Sequence('>test_something_special_gene=qwerty_ready', 'ACTGTACGGA')
+ self.assertEqual(1, c.leave_name_after_marker('gene=', 20))
+ self.assertEqual('>gene=qwerty_ready', c.name)
+ #self.assertEqual()
+ c2 = Sequence('>test_something_special_gene=qwerty_ready', 'ACTGTACGGA')
+ self.assertEqual(1, c2.leave_name_after_marker('gene=', 11))
+ self.assertEqual('>gene=qwerty', c2.name)
+ c = Sequence('>test_something_special_gene=qwerty_ready', 'ACTGTACGGA')
+ self.assertEqual(1, c.leave_name_after_marker('gene='))
+ self.assertEqual('>gene=qwerty_ready', c.name)
+ c2 = Sequence('>test_something_special_gene=qwerty_ready', 'ACTGTACGGA')
+ self.assertEqual(1, c2.leave_name_after_marker('gene=', 6, 0))
+ self.assertEqual('>qwerty', c2.name)
+
+ def test_cut_name(self):
+ c = Sequence('>test_something_special_gene=qwerty_ready', 'ACTGTACGGA')
+ c.cut_name(5)
+ self.assertEqual('>test', c.name)
+ c.cut_name(10)
+ self.assertEqual('>test', c.name)
if __name__ == "__main__":

0 comments on commit 3962769

Please sign in to comment.