From 0065ac7b5e73e44c52c9b30755cf7fc9f42996cd Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 28 Jun 2016 00:20:10 +0200 Subject: [PATCH] Builds removed modified: build/lib/fatool/fa.py modified: build/lib/fatool/fuzzy.py modified: build/lib/fatool/sequence.py modified: build/scripts-2.7/cmdfatool.py --- build/lib/fatool/fa.py | 14 ++- build/lib/fatool/fuzzy.py | 31 ++--- build/lib/fatool/sequence.py | 102 +++++++++++---- build/scripts-2.7/cmdfatool.py | 275 ++++++++++++++++++++++++++++++++--------- 4 files changed, 312 insertions(+), 110 deletions(-) diff --git a/build/lib/fatool/fa.py b/build/lib/fatool/fa.py index 827df1b..dfbad5a 100644 --- a/build/lib/fatool/fa.py +++ b/build/lib/fatool/fa.py @@ -4,27 +4,34 @@ import re import math from fatool import Sequence +import logging class Fa(object): def __init__(self, contigs_list, name): - #print contigs_list - # do poprawki + logger = logging.getLogger(__name__) + + logger.debug('creating Fa object') self.name = name self.contigs = [] self.contigs_idx = {} for r in contigs_list: if not isinstance(r, Sequence): + logger.error('Supplied param is not Sequence object') raise TypeError('Wrong param supplied Sequence was expected') if not r.name in self.contigs_idx: if len(self.contigs) > 0: + logger.debug('appending contig: '+r.name) self.contigs.append(r) else: + logger.debug('adding first contig: '+r.name) self.contigs = [r] self.contigs_idx[r.name] = len(self.contigs) - 1 else: + logger.error('Sequence name: '+r.name+' already exists in file') raise NameError('Sequence name already exists: '+r.name) - # self.stats{'A':0,'C':0,'T':0,'G':0,'N':0, 'L':0, } + + @staticmethod def load_from_file(file): if isinstance(file, str): @@ -110,6 +117,7 @@ def validate(self): def nl_statistics(self, g, percent): ''' Counts statistics of N50, L50, N75 etc. + g array containing sorted contigs by length, from biggest to lowest ''' ncount = -1 # index & number of contigs with +1 nsum = 0 diff --git a/build/lib/fatool/fuzzy.py b/build/lib/fatool/fuzzy.py index 2177397..1e41376 100644 --- a/build/lib/fatool/fuzzy.py +++ b/build/lib/fatool/fuzzy.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- #import math +import logging def find_aprox_match_iter(needle, hstack, missmatch_level, hs_start_pos = 0): i = hs_start_pos # start iterate from start position @@ -11,8 +12,6 @@ def find_aprox_match_iter(needle, hstack, missmatch_level, hs_start_pos = 0): while i < len(hstack): if hstack[i] != needle[j]: mmatch_count += 1 - #print mmatch_count - #print 'j = '+str(j) if mmatch_count > missmatch_level: # if missmatch level oversized back to strat + 1 and start again i -= j @@ -41,7 +40,6 @@ def find_all_aprox_matches(needle, hstack, missmatch_level, hs_start_pos): i = r[0]+1 # match not found - no more maches in hstack else: - #print 'not found' break return ret_list @@ -49,10 +47,8 @@ def find_all_aprox_matches(needle, hstack, missmatch_level, hs_start_pos): def find_motif_in_aprox_range(start_motif, stop_motif, hstack, missmatch_level, hs_start_pos = 0): start = 0 stop = 0 - #print 'startm: '+start_motif+'\tstop_motif: '+stop_motif start = find_aprox_match_iter(start_motif, hstack, missmatch_level, hs_start_pos = 0) stop = find_aprox_match_iter(stop_motif, hstack, missmatch_level, start[1]) - #print start,stop if start and stop: return hstack[start[1]:stop[0]] @@ -61,21 +57,21 @@ def find_all_motifs_in_aprox_range(start_motif, stop_motif, hstack, missmatch_le start = 0 stop = 0 ret_list = [] - print 'hstack in fuzzy' - print hstack + logger = logging.getLogger(__name__) + #logger.setLevel(logging.DEBUG) + logger.debug([start_motif, stop_motif, hstack, missmatch_level, hs_start_pos, len_min, len_max]) + logger.debug(hstack) + while i <= len(hstack): start = find_aprox_match_iter(start_motif, hstack, missmatch_level, i) stop = find_aprox_match_iter(stop_motif, hstack, missmatch_level, start[1]) - #print start,stop if start and stop: - #print 'start + stop found' - if stop[0] - start[1] > len_min and stop[0] - start[1] < len_max: - #print 'match valid' - ret_list.append(hstack[start[1]:stop[0]]) + if stop[1] - start[0] >= len_min and stop[1] - start[0] <= len_max: + ret_list.append(hstack[start[0]:stop[1]]) i = start[0]+1 - #print i else: break + logger.debug(ret_list) return ret_list def find_motif(needle, hstack, missmatch_level, hs_start_pos = 0): @@ -85,22 +81,13 @@ def find_motif(needle, hstack, missmatch_level, hs_start_pos = 0): return hstack[r[0]:r[1]] def find_all_motifs(needle, hstack, missmatch_level, hs_start_pos = 0): - #print 'fuzzy.find_all_motifs' - #print needle - #print hstack - #print missmatch_level - #print hs_start_pos i = hs_start_pos ret_list = [] while i <= len(hstack): r = find_aprox_match_iter(needle, hstack, missmatch_level, i ) - #print r if r: - #print 'founded: ',r ret_list.append(hstack[r[0]:r[1]]) - #ret_list = [hstack[r[0]:r[1]]] i = r[0]+1 else: break - #print ret_list return ret_list \ No newline at end of file diff --git a/build/lib/fatool/sequence.py b/build/lib/fatool/sequence.py index 4e21fa6..1facb01 100644 --- a/build/lib/fatool/sequence.py +++ b/build/lib/fatool/sequence.py @@ -4,12 +4,13 @@ from collections import Counter import fuzzy import re +import logging class Sequence(object): def __init__(self, name, seq): if Sequence.validate_name_string(name): - self.name = name.lstrip('>') + self.name = name else: raise NameError('Sequence name have to start with ">"') self.seq = seq @@ -133,7 +134,7 @@ def detailed_validate_generic(seq, domain): log_info = [] # if not allowed chars found if m: - # it may be 60 xxxxxxxxxx xxx.... format + # it may be 61 xxxxxxxxxx xxx.... format if re.search('(\d+)', seq): seq_array = seq.split('\n') new_array = [] # array to store new sequence after cleaning and transformation @@ -146,7 +147,7 @@ def detailed_validate_generic(seq, domain): if end_of_seq_array > 1: line_length = int(new_array[1][0])-int(new_array[0][0]) - # validate ecah block (between " " [space]) of given sequence + # validate each block (between " " [space]) of given sequence i = 0 while i < end_of_seq_array: # digit on begining of line was not found - error @@ -192,9 +193,35 @@ def cut(self, length, step): contig_end = len(self.seq) # last position of contig contig_list = [] # contig list returning by function while i+length <= contig_end: - contig_list.append(Sequence('>'+self.name+'_frag_'+str(i + 1)+':'+str(i + length), str(self.seq[i:i+length]))) + contig_list.append(Sequence(self.name+'_frag_'+str(i + 1)+':'+str(i + length), str(self.seq[i:i+length]))) i = i+step return contig_list + + def cut_name(self, length, start = 0): + self.name = self.name[start:length] + print self.name + + def leave_name_after_marker(self, mark, length = 0, keep_marker = 1): + m = re.search(re.escape(mark), self.name) + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + logger.debug(m) + logger.debug(keep_marker) + if m: + # keep original marker or skip it + + if keep_marker == 1: + s = m.start() + else: + s = m.end() + # defined length or return string to end + if length > 0: + self.name = '>'+self.name[s:s+length].lstrip('>') + else: + self.name = '>'+self.name[s:].lstrip('>') + return 1 + return 0 + def reverse(self): ''' @@ -206,7 +233,7 @@ def reverse(self): rev = rev.translate(maketrans('ACTGactg', 'TGACtgac')) # creating 80 chars lines #rev = re.sub("(.{80})", '\\1\n', rev, 0) - return Sequence('>rev_'+self.name, rev) + return Sequence('>rev_'+self.name.lstrip('>'), rev) def normalize(self): @@ -251,7 +278,7 @@ def translate2protein_in_range_generic(seq, start, stop, tdict): frame2 = [] frame3 = [] - # creating pattern to find start codons + # creating pattern (from dict) to find start codons for r in start: p += r+'|' p = '('+p.rstrip('|')+')' @@ -259,13 +286,12 @@ def translate2protein_in_range_generic(seq, start, stop, tdict): # creating pattern to find stop codons for r in stop: p_stop += r+'|' - p_stop = '('+p.rstrip('|')+')' + p_stop = '('+p_stop.rstrip('|')+')' - # match for start contigs m = re.finditer(p, seq) # there will be stored latest string position for each frame - frame_iterator[0,0,0] + frame_iterator = [0,0,0] stop_pos = len(seq) # where to stop searching if no stopcodon found @@ -276,9 +302,10 @@ def translate2protein_in_range_generic(seq, start, stop, tdict): # set i for start position of current start contig i = r.start() ret = '' - while i+3 <= stop: + while i+3 <= stop_pos: ret += Sequence.translate(seq[i:i+3], tdict) if re.match(p_stop, seq[i:i+3]): + #print 'exiting on: '+seq[i:i+3] i = i+3 break else: @@ -294,18 +321,36 @@ def translate2protein_in_range_generic(seq, start, stop, tdict): return [frame1, frame2, frame3] + def translate2protein_in_range(self, start, stop, tdict): + tdict = { + 'GCA':'A','GCC':'A','GCG':'A','GCT':'A', 'TGC':'C','TGT':'C', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', + 'TTC':'F', 'TTT':'F', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'CAC':'H', 'CAT':'H', 'ATA':'I', 'ATC':'I', 'ATT':'I', + 'AAA':'K', 'AAG':'K', 'TTA':'L', 'TTG':'L', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'ATG':'M', 'AAC':'N', 'AAT':'N', + 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAA':'Q', 'CAG':'Q', 'AGA':'R', 'AGG':'R', 'CGA':'R', 'CGC':'R', 'CGG':'R', + 'CGT':'R', 'AGC':'S', 'AGT':'S', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', + 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'TGG':'W', 'TAC':'Y', 'TAT':'Y', 'TAG': '*', 'TGA':'*', 'TAA':'*' + } + + f = Sequence.translate2protein_in_range_generic(self.seq, start, stop, tdict) + r = Sequence.translate2protein_in_range_generic(self.reverse().seq, start, stop, tdict) + + return {'fwd':f, 'rev':r} + + @staticmethod def translate2protein_generic(seq, tdict): # +5 to secure all frames f1 = '' f2 = '' f3 = '' + i = 0 while i+5 < len(seq): f1 += Sequence.translate(seq[i:i+3], tdict) f2 += Sequence.translate(seq[i+1:i+4], tdict) f3 += Sequence.translate(seq[i+2:i+5], tdict) + i = i + 3 - return [('',f1,seq[-2:]),(seq[0:1],f2,seq[-1:]),(seq[0:2],f2,)] + return [('',f1,seq[-2:]),(seq[0:1],f2,seq[-1:]),(seq[0:2],f2,'')] def translate2protein(self, tdict): tdict = { @@ -321,7 +366,7 @@ def translate2protein(self, tdict): return {'fwd':f, 'rev':r} @staticmethod - def translate(contig, tdict): + def translate(codon, tdict): if codon in tdict: return tdict[codon] else: @@ -331,37 +376,42 @@ def find_aprox_motif(self, motif, missmatch_level): self.normalize() return fuzzy.find_all_motifs(motif, self.seq, missmatch_level, hs_start_pos = 0) - def find_aprox_primers(self, start, stop, missmatch_level = 0, len_min = 50, len_max = 10000): + def find_primers(self, start, stop, mode, len_min = 50, len_max = 10000): + return self.find_aprox_primers(start, stop, mode, 0, len_min, len_max) + + + def find_aprox_primers(self, start, stop, mode, missmatch_level = 0, len_min = 50, len_max = 10000): #start 5'->3' # add missmatch_level condition if 50%> - rev = stop[::-1] - new_stop = rev.translate(maketrans('ACTGactg', 'TGACtgac')) + logger = logging.getLogger(__name__) + #logger.setLevel(logging.DEBUG) + logger.debug('given args: start:'+start+' stop: '+stop+' mode: '+mode+' mm level: '+str(missmatch_level)+' len_min: '+str(len_min)+' len_max: '+str(len_max)) + #logger.debug('sequence: '+self.seq) + if mode.upper() == 'FR': + rev = stop[::-1] + stop = rev.translate(maketrans('ACTGactg', 'TGACtgac')) + elif mode.upper() != 'FF': + raise ('Unexpected mode: '+str(mode)+' expected values [FR|FF]') + r_list = [] self.normalize() - #print '\nAfter normailzation' - #print self.seq res = fuzzy.find_all_motifs_in_aprox_range(start, stop, self.seq, missmatch_level, 0, len_min, len_max) if res: r_list.extend(res) - rev = start[::-1] - new_start = rev.translate(maketrans('ACTGactg', 'TGACtgac')) - #print 'new_seq in sequence\n' - #print new_seq.seq - res = fuzzy.find_all_motifs_in_aprox_range(new_start, stop, self.seq, missmatch_level, 0, len_min, len_max) + res = fuzzy.find_all_motifs_in_aprox_range(start, stop, self.reverse().seq, missmatch_level, 0, len_min, len_max) if res: r_list.extend(res) - print 'Sequence.find_aprox_primers', - for s in r_list: - print s+'\n' + + logger.debug(r_list) return r_list def __str__(self): ''' creates nicely outputed string ''' - return '>'+self.name+'\n'+re.sub("(.{80})", '\\1\n', self.seq, 0)+'\n' + return self.name+'\n'+re.sub("(.{80})", '\\1\n', self.seq, 0)+'\n' def __len__(self): diff --git a/build/scripts-2.7/cmdfatool.py b/build/scripts-2.7/cmdfatool.py index 2ec7a47..00554f7 100644 --- a/build/scripts-2.7/cmdfatool.py +++ b/build/scripts-2.7/cmdfatool.py @@ -6,15 +6,19 @@ import re import datetime from string import maketrans -# from fatool import Contig from fatool import * from decimal import * +import logging def main(): + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) + logger.setLevel(logging.INFO) + #logger.setLevel(logging.DEBUG) parser = argparse.ArgumentParser() #parser.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) - parser.add_argument('-v', '--version', help='display version number and exit', action='version', version='%(prog)s 0.2.1') + parser.add_argument('-v', '--version', help='display version number and exit', action='version', version='%(prog)s 0.3.1') subparsers = parser.add_subparsers(title='fatool commands', help='each has own params, for more details use: command -h') sub_cut = subparsers.add_parser('cut', help='split supplied sequence into smaller parts, according to given params') @@ -22,23 +26,23 @@ def main(): sub_cut.add_argument('-r', '--range', help='cutted sequence length', type=int, required=True) sub_cut.add_argument('-o', '--output', help='output file default: output.fa', type=argparse.FileType('w'), default='output.fa') sub_cut.add_argument('-s', '--step', help='step length default: 1', type=int, default=1) - sub_cut.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_cut.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_cut.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_cut.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_cut.set_defaults(func=cut_fa) sub_en = subparsers.add_parser('extractNames', help='extracting contigs names only') sub_en.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) sub_en.add_argument('-o', '--output', help='output file if not supplied stdout', type=argparse.FileType('w')) - sub_en.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_en.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_en.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_en.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_en.set_defaults(func=extract_names) sub_ec = subparsers.add_parser('extractContigs', help='extracting contigs specified in file (output in new file)') sub_ec.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) sub_ec.add_argument('--list', help='file containing list of contigs one contig per line', type=argparse.FileType('r'), required=True) sub_ec.add_argument('-o', '--output', help='output file; if --multifile is set output directory', type=str, required=True) - sub_ec.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_ec.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_ec.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_ec.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_ec.add_argument('--multifile', help='if this flag is set each contig will be saved in separate file', action='store_true') sub_ec.set_defaults(func=extract_contigs) @@ -46,8 +50,8 @@ def main(): sub_rc.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) sub_rc.add_argument('--list', help='file containing list of contigs one contig per line', type=argparse.FileType('r'), required=True) sub_rc.add_argument('-o', '--output', help='output file if not supplied stdout', type=str, required=True) - sub_rc.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_rc.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_rc.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_rc.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_rc.set_defaults(func=remove_contigs) sub_jc = subparsers.add_parser('join', help='joining two or more files, yet not verifing duplicates') @@ -55,22 +59,22 @@ def main(): sub_jc.add_argument('-o', '--output', help='output file if not supplied stdout', type=argparse.FileType('w'), required=True) sub_jc.add_argument('--files', help='files to be joined', nargs='*', type=argparse.FileType('r')) sub_jc.add_argument('--overwrite', help='if set owerwrites contigs with same name', action='store_true') - sub_jc.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_jc.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_jc.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_jc.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_jc.set_defaults(func=join) sub_sc = subparsers.add_parser('split', help='each cotig saved into separate file') sub_sc.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) sub_sc.add_argument('-d', '--outputDir', help='output directory where splited contigs will be saved', type=str, required=True) - sub_sc.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_sc.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_sc.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_sc.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_sc.set_defaults(func=split_contigs) sub_r = subparsers.add_parser('reverse', help='reverse all sequences in file') sub_r.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) sub_r.add_argument('-o', '--output', help='output file; if --multifile is set output directory', type=argparse.FileType('w'), required=True) - sub_r.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_r.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_r.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_r.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_r.set_defaults(func=reverse) sub_v = subparsers.add_parser('validate', help='validates fa file') @@ -81,31 +85,71 @@ def main(): sub_s = subparsers.add_parser('stats', help='show statistics of fa file') sub_s.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) - sub_s.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_s.add_argument('--operator', help='user who have fired script it will be noted in log', nargs='*', type=str) + sub_s.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_s.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) sub_s.set_defaults(func=statistics) - - sub_s = subparsers.add_parser('findMotif', help='show statistics of fa file') - sub_s.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) - sub_s.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_s.add_argument('--operator', help='user who have fired script it will be noted in log', nargs='*', type=str) - sub_s.set_defaults(func=find_motif) - - sub_s = subparsers.add_parser('findPrimer', help='show statistics of fa file') - sub_s.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) - sub_s.add_argument('--start', help='strat codon 5\'', type=str, required=True) - sub_s.add_argument('--stop', help='stop codon 3\'', type=str, required=True) - sub_s.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_s.add_argument('--operator', help='user who have fired script it will be noted in log', nargs='*', type=str) - sub_s.set_defaults(func=find_primers) - - #parser.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) - #parser.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) + ''' + sub_fm = subparsers.add_parser('findMotif', help='display motifs position in contig') + sub_fm.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) + sub_fm.add_argument('--mml', help='mismatch level number of allowed missmatches in primers (detfault 0)', type=str, default=0) + sub_fm.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_fm.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) + sub_fm.set_defaults(func=find_motif) + ''' + sub_fp = subparsers.add_parser('findPrimer', help='display list of founded primers') + sub_fp.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) + sub_fp.add_argument('--start', help='strat codon 5\'', type=str, required=True) + sub_fp.add_argument('--stop', help='stop codon 3\'', type=str, required=True) + sub_fp.add_argument('--mode', help='FF (start forward, stop forward) or FR (start 5\' stop 3\')', type=str, choices=['FF', 'FR'], default = 'FR', required=True) + sub_fp.add_argument('--minlen', help='minimum length (detfault 50bp)', type=int, default=50) + sub_fp.add_argument('--maxlen', help='max length (detfault 1000bp)', type=int, default=1000) + sub_fp.add_argument('--mml', help='mismatch level number of allowed missmatches in primers (detfault 0)', type=int, default=0) + sub_fp.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_fp.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) + sub_fp.set_defaults(func=find_primers) + + sub_cn = subparsers.add_parser('cutName', help='cuts name from position to given length') + sub_cn.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) + sub_cn.add_argument('--start', help='start of cut', type=int, required=True) + sub_cn.add_argument('-l', '--length', help='length of cut', type=int, required=True) + sub_cn.set_defaults(func=cut_name) + + sub_lnam = subparsers.add_parser('cutNameMarker', help='cuts name leaving defined number of chars after begining of marker') + sub_lnam.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) + sub_lnam.add_argument('-m', '--marker', help='marker that indicates start of cut', type=str, required=True) + sub_lnam.add_argument('-l', '--length', help='length of cut', type=int, required=True) + sub_lnam.add_argument('--keepMarker', help='weather to keep marker or not default 1 (Yes)', type=int, required=True) + sub_lnam.add_argument('-o', '--output', help='output file default: output.fa', type=argparse.FileType('w'), default='output.fa') + #sub_lnam.add_argument('-d', '--outputDir', help='output directory where multiple contigs will be saved', type=str) + sub_lnam.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_lnam.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) + sub_lnam.set_defaults(func=cut_name_pattern) + + sub_trn_d2p = subparsers.add_parser('translateDNA2Proteins', help='display translation to proteins') + sub_trn_d2p.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) + sub_trn_d2p.add_argument('-o', '--output', help='output file default: output.fa', type=argparse.FileType('w'), default='output.fa') + sub_trn_d2p.add_argument('--startCodons', help='list of start codons separated by space bar', nargs='*', type=str) + sub_trn_d2p.add_argument('--stopCodons', help='list of stop codons separated by space bar', nargs='*', type=str) + sub_trn_d2p.add_argument('--nss', help='No Start Stop', action='store_true') + sub_trn_d2p.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_trn_d2p.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) + sub_trn_d2p.set_defaults(func=translate_dna_to_protein) + ''' + sub_fap = subparsers.add_parser('findPrimer', help='show statistics of fa file') + sub_fap.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) + sub_fap.add_argument('--start', help='strat codon 5\'', type=str, required=True) + sub_fap.add_argument('--stop', help='stop codon 3\'', type=str, required=True) + sub_fap.add_argument('--minlen', help='minimum length (detfault 50bp)', type=str, default=50) + sub_fap.add_argument('--maxlen', help='max length (detfault 1000bp)', type=str, default=1000 + sub_fap.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_fap.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) + sub_fap.set_defaults(func=find_primers) + ''' + #parser.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) + #parser.add_argument('--report', help='log file if not supplied stdout', type=argparse.FileType('w')) args = parser.parse_args() - #if args.version: - # print version - # exit(0) + args.func(args) @@ -126,6 +170,13 @@ def make_log_header(cmd, op): def cut_fa(args): + #logging.basicConfig(level=logging.ERROR) + #logging.basicConfig(level=logging.DEBUG) + logger = logging.getLogger(__name__) + + logger.setLevel(logging.DEBUG) + logger.debug('debug mode started') + logger.info('command: cut starting') rep = str(make_log_header('cut', args.operator)) fafile = args.fafile @@ -134,19 +185,26 @@ def cut_fa(args): step = args.step f = Fa.load_from_file(fafile) + logger.info('file: '+fafile.name+' loaded') contig_list = [] for r in f.contigs: - contig_list.join(r.cut(split_range, step)) + contig_list += r.cut(split_range, step) + logger.info('cutted contigs added from conting: '+r.name) result_fa = Fa(contig_list, 'splited') + logger.info('trying to write file') result_fa.write(output) + logger.info('file written') rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) def extract_names(args): + logger = logging.getLogger(__name__) + logger.setLevel(logging.info) + logger.info('command: extractNames starting') rep = str(make_log_header('extractNames', args.operator)) fafile = args.fafile output = args.output @@ -159,8 +217,8 @@ def extract_names(args): rep += 'Number of neames founded:\t' + str(len(names)) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) def extract_contigs(args): @@ -184,8 +242,8 @@ def extract_contigs(args): rep += '\Extracted contigs:\t'+str(len(result_ta.contigs)) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) else: print rep @@ -205,8 +263,8 @@ def remove_contigs(args): result_fa.write(args.output) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(stats_rep) else: print stats_rep @@ -232,8 +290,8 @@ def join(args): fa.write(args.output) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(stats_rep) else: print stats_rep @@ -245,8 +303,8 @@ def split_contigs(args): fa.write_multiple_files(args.output) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) else: print rep @@ -296,8 +354,8 @@ def statistics(args): stats_rep += '\nL90:\t'+str(stats['L90']) stats_rep += '\n\n------------------------------------------------------' stats_rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(stats_rep) else: print stats_rep @@ -321,8 +379,8 @@ def validate(args): rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) else: print rep @@ -333,21 +391,120 @@ def reverse(args): fa = Fa.load_from_file(args.fafile) fa.reverse() fa.write(args.output) - if args.log: - with args.log as log_file: + rep += '\n\n------------------------------------------------------' + rep += '\nFinished:\t'+str(datetime.datetime.now()) + if args.report: + with args.report as log_file: log_file.write(rep) else: print rep + def find_motif(args): print 'not available yet' pass def find_primers(args): - print 'not available yet' - pass + rep = str(make_log_header('reverse', args.operator)) + fa = Fa.load_from_file(args.fafile) + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + logger.debug(args) + rep = '' + for r in fa.contigs: + rep += '\n================\n\t\t'+r.name+'\n' + for q in r.find_aprox_primers(args.start, args.stop, str(args.mode), int(args.mml), args.minlen, args.maxlen): + rep += q+'\n' + rep += '\n\n------------------------------------------------------' + rep += '\nFinished:\t'+str(datetime.datetime.now()) + if args.report: + with args.report as log_file: + log_file.write(rep) + else: + print rep + +def cut_name_pattern(args): + rep = str(make_log_header('cutNameMarker', args.operator)) + fa = Fa.load_from_file(args.fafile) + for r in fa.contigs: + r.leave_name_after_marker(args.marker, args.length, args.keepMarker) + fa.write(args.output) + +def translate_dna_to_protein(args): + rep = str(make_log_header('translate2protein', args.operator)) + fa = Fa.load_from_file(args.fafile) + r_dict = {} + otp = '' + if args.nss: + for r in fa.contigs: + r_dict = r.translate2protein({}) + otp += '\n=============================\n'+r.name+'\n=============================\n' + otp += '\nFORWARD\n' + i = 0 + for f in r_dict['fwd']: + otp += 'FRAME:\t'+str(i+1)+'\n' + otp += 'BEFORE:\t '+f[0] + otp += 'TRANSLATION:\n '+f[1] + otp += 'AFTER:\t '+f[2] + otp += '\n------------------------------------------------\n' + i+=1 + otp += '\nREVERS\n' + otp += '\n------------------------------------------------\n' + i = 0 + for f in r_dict['rev']: + otp += 'FRAME:\t'+str(i+1)+'\n' + otp += 'BEFORE:\t '+f[0] + otp += 'TRANSLATION:\n '+f[1] + otp += 'AFTER:\t '+f[2] + otp += '\n------------------------------------------------\n' + i+=1 + rep += otp + + else: + tdict = { + 'GCA':'A','GCC':'A','GCG':'A','GCT':'A', 'TGC':'C','TGT':'C', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', + 'TTC':'F', 'TTT':'F', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'CAC':'H', 'CAT':'H', 'ATA':'I', 'ATC':'I', 'ATT':'I', + 'AAA':'K', 'AAG':'K', 'TTA':'L', 'TTG':'L', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'ATG':'M', 'AAC':'N', 'AAT':'N', + 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAA':'Q', 'CAG':'Q', 'AGA':'R', 'AGG':'R', 'CGA':'R', 'CGC':'R', 'CGG':'R', + 'CGT':'R', 'AGC':'S', 'AGT':'S', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', + 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'TGG':'W', 'TAC':'Y', 'TAT':'Y', 'TAG': '*', 'TGA':'*', 'TAA':'*' + } + for r in fa.contigs: + + r_dict = r.translate2protein_in_range(args.startCodons, args.stopCodons, tdict) + otp += '\n=============================\n'+r.name+'\n=============================\n' + otp += 'FORWARD\n' + i = 0 + + for f in r_dict['fwd']: + otp += 'FRAME:\t'+str(i+1)+'\n' + for k in f: + otp += '\n'+k[0]+' start: '+str(k[1]) + otp += '\n------------------------------------------------\n' + otp += '\n=================================================\n' + otp += 'REVERS\n' + i = 0 + for f in r_dict['rev']: + otp += 'FRAME:\t'+str(i+1)+'\n' + for k in f: + otp += '\n'+k[0]+' start: '+str(k[1]) + otp += '\n------------------------------------------------\n' + otp += '\n=================================================\n' + rep += otp + fa.write(args.output) + rep += '\n\n------------------------------------------------------' + rep += '\nFinished:\t'+str(datetime.datetime.now()) + if args.report: + with args.report as log_file: + log_file.write(rep) + else: + print rep +def cut_name(args): + pass + + if __name__ == '__main__': exit(main())