From c419afd8056735eb0b6d5b3db6a7ebc255b9931a Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 23 Jun 2016 00:04:47 +0200 Subject: [PATCH] Finding aproximate primers added modified: README.md modified: bin/cmdfatool.py modified: fatool/fa.py modified: fatool/fuzzy.py modified: fatool/sequence.py modified: fatool/tests/test_sequence.py --- README.md | 55 +++++------ bin/cmdfatool.py | 153 ++++++++++++++++++----------- fatool/fa.py | 15 ++- fatool/fuzzy.py | 31 ++---- fatool/sequence.py | 38 +++++--- fatool/tests/test_sequence.py | 220 +++++++++++++++++++++++++++++++++++++----- 6 files changed, 366 insertions(+), 146 deletions(-) diff --git a/README.md b/README.md index 7c4f5de..3e2ed0d 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,6 @@ NAME ==== fatool -DESCRIPTION -=========== - -Tool for analyze and manipulate fasta files VERSION ======= @@ -19,13 +15,18 @@ APACHE 2.0 Specified in LICENSE.md file INTRODUCTION ============ -Command line tool in python 2.7. It operates on fa/fasta/etc. files. version: 0.1.0 +Package and Command line tool in python 2.7. It operates on fa/fasta/etc. files. version: 0.2.1. To install package use setup.py install. PREREQUISITES ============= PYTHON 2.7 +USAGE +===== + + + COMMAND LINE ============ @@ -53,7 +54,7 @@ fatool commands: cut: usage: cmdfatool.py cut [-h] -f FAFILE -r RANGE [-o OUTPUT] [-s STEP] - [--log LOG] [--operator OPERATOR] + [--report REPORT] [--operator OPERATOR] optional arguments: -h, --help show this help message and exit @@ -61,51 +62,51 @@ optional arguments: -r RANGE, --range RANGE cutted sequence length -o OUTPUT, --output OUTPUT output file default: output.fa -s STEP, --step STEP step length default: 1 - --log LOG log file if not supplied stdout + --report REPORT log file if not supplied stdout --operator OPERATOR user who have fired script it will be noted in log extractNames: -usage: cmdfatool.py extractNames [-h] -f FAFILE [-o OUTPUT] [--log LOG] +usage: cmdfatool.py extractNames [-h] -f FAFILE [-o OUTPUT] [--report REPORT] [--operator OPERATOR] optional arguments: -h, --help show this help message and exit -f FAFILE, --fafile FAFILE file to be cut usualy *.fa -o OUTPUT, --output OUTPUT output file if not supplied stdout - --log LOG log file if not supplied stdout - --operator OPERATOR user who have fired script it will be noted in log + --report REPORT log file if not supplied stdout + --operator OPERATOR user who have fired script it will be noted in log extractContigs: usage: cmdfatool.py extractContigs [-h] -f FAFILE --list LIST -o OUTPUT - [--log LOG] [--operator OPERATOR] + [--report REPORT] [--operator OPERATOR] [--multifile] optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit -f FAFILE, --fafile FAFILE file to be cut usualy *.fa - --list LIST file containing list of contigs one contig per line + --list LIST file containing list of contigs one contig per line -o OUTPUT, --output OUTPUT output file; if --multifile is set output directory - --log LOG log file if not supplied stdout - --operator OPERATOR user who have fired script it will be noted in log - --multifile if this flag is set each contig will be saved in - separate file + --report REPORT log file if not supplied stdout + --operator OPERATOR user who have fired script it will be noted in log + --multifile if this flag is set each contig will be saved in + separate file remContigs usage: cmdfatool.py remContigs [-h] -f FAFILE --list LIST -o OUTPUT - [--log LOG] [--operator OPERATOR] + [--report REPORT] [--operator OPERATOR] optional arguments: -h, --help show this help message and exit -f FAFILE, --fafile FAFILE file to be cut usualy *.fa --list LIST file containing list of contigs one contig per line -o OUTPUT, --output OUTPUT output file if not supplied stdout - --log LOG log file if not supplied stdout + --report REPORT log file if not supplied stdout --operator OPERATOR user who have fired script it will be noted in log @@ -113,7 +114,7 @@ optional arguments: usage: cmdfatool.py join [-h] -f FAFILE -o OUTPUT [--files [FILES [FILES ...]]] [--overwrite] - [--log LOG] [--operator OPERATOR] + [--report REPORT] [--operator OPERATOR] optional arguments: -h, --help show this help message and exit @@ -121,33 +122,33 @@ optional arguments: -o OUTPUT, --output OUTPUT output file if not supplied stdout --files [FILES [FILES ...]] files to be joined --overwrite if set owerwrites contigs with same name - --log LOG log file if not supplied stdout + --report REPORT log file if not supplied stdout --operator OPERATOR user who have fired script it will be noted in log split -usage: cmdfatool.py split [-h] -f FAFILE -d OUTPUTDIR [--log LOG] +usage: cmdfatool.py split [-h] -f FAFILE -d OUTPUTDIR [--report REPORT] [--operator OPERATOR] optional arguments: -h, --help show this help message and exit -f FAFILE, --fafile FAFILE file to be cut usualy *.fa -d OUTPUTDIR, --outputDir OUTPUTDIR output directory where splited contigs will be saved - --log LOG log file if not supplied stdout + --report REPORT log file if not supplied stdout --operator OPERATOR user who have fired script it will be noted in log reverse -usage: cmdfatool.py reverse [-h] -f FAFILE -o OUTPUT [--log LOG] +usage: cmdfatool.py reverse [-h] -f FAFILE -o OUTPUT [--report REPORT] [--operator OPERATOR] optional arguments: -h, --help show this help message and exit -f FAFILE, --fafile FAFILE file to be cut usualy *.fa -o OUTPUT, --output OUTPUT output file; if --multifile is set output directory - --log LOG log file if not supplied stdout + --report REPORT log file if not supplied stdout --operator OPERATOR user who have fired script it will be noted in log @@ -165,11 +166,11 @@ optional arguments: stats -usage: cmdfatool.py stats [-h] -f FAFILE [--log LOG] +usage: cmdfatool.py stats [-h] -f FAFILE [--report REPORT] [--operator [OPERATOR [OPERATOR ...]]] optional arguments: -h, --help show this help message and exit -f FAFILE, --fafile FAFILE file to show statistics usualy *.fa - --log LOG log file if not supplied stdout + --report REPORT log file if not supplied stdout --operator [OPERATOR [OPERATOR ...]] user who have fired script it will be noted in log diff --git a/bin/cmdfatool.py b/bin/cmdfatool.py index 2ec7a47..30abff2 100644 --- a/bin/cmdfatool.py +++ b/bin/cmdfatool.py @@ -6,12 +6,16 @@ import re import datetime from string import maketrans -# from fatool import Contig from fatool import * from decimal import * +import logging def main(): + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) + logger.setLevel(logging.INFO) + #logger.setLevel(logging.DEBUG) parser = argparse.ArgumentParser() #parser.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) parser.add_argument('-v', '--version', help='display version number and exit', action='version', version='%(prog)s 0.2.1') @@ -22,23 +26,23 @@ def main(): sub_cut.add_argument('-r', '--range', help='cutted sequence length', type=int, required=True) sub_cut.add_argument('-o', '--output', help='output file default: output.fa', type=argparse.FileType('w'), default='output.fa') sub_cut.add_argument('-s', '--step', help='step length default: 1', type=int, default=1) - sub_cut.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_cut.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_cut.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_cut.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_cut.set_defaults(func=cut_fa) sub_en = subparsers.add_parser('extractNames', help='extracting contigs names only') sub_en.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) sub_en.add_argument('-o', '--output', help='output file if not supplied stdout', type=argparse.FileType('w')) - sub_en.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_en.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_en.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_en.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_en.set_defaults(func=extract_names) sub_ec = subparsers.add_parser('extractContigs', help='extracting contigs specified in file (output in new file)') sub_ec.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) sub_ec.add_argument('--list', help='file containing list of contigs one contig per line', type=argparse.FileType('r'), required=True) sub_ec.add_argument('-o', '--output', help='output file; if --multifile is set output directory', type=str, required=True) - sub_ec.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_ec.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_ec.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_ec.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_ec.add_argument('--multifile', help='if this flag is set each contig will be saved in separate file', action='store_true') sub_ec.set_defaults(func=extract_contigs) @@ -46,8 +50,8 @@ def main(): sub_rc.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) sub_rc.add_argument('--list', help='file containing list of contigs one contig per line', type=argparse.FileType('r'), required=True) sub_rc.add_argument('-o', '--output', help='output file if not supplied stdout', type=str, required=True) - sub_rc.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_rc.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_rc.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_rc.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_rc.set_defaults(func=remove_contigs) sub_jc = subparsers.add_parser('join', help='joining two or more files, yet not verifing duplicates') @@ -55,22 +59,22 @@ def main(): sub_jc.add_argument('-o', '--output', help='output file if not supplied stdout', type=argparse.FileType('w'), required=True) sub_jc.add_argument('--files', help='files to be joined', nargs='*', type=argparse.FileType('r')) sub_jc.add_argument('--overwrite', help='if set owerwrites contigs with same name', action='store_true') - sub_jc.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_jc.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_jc.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_jc.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_jc.set_defaults(func=join) sub_sc = subparsers.add_parser('split', help='each cotig saved into separate file') sub_sc.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) sub_sc.add_argument('-d', '--outputDir', help='output directory where splited contigs will be saved', type=str, required=True) - sub_sc.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_sc.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_sc.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_sc.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_sc.set_defaults(func=split_contigs) sub_r = subparsers.add_parser('reverse', help='reverse all sequences in file') sub_r.add_argument('-f', '--fafile', help='file to be cut usualy *.fa', type=argparse.FileType('r'), required=True) sub_r.add_argument('-o', '--output', help='output file; if --multifile is set output directory', type=argparse.FileType('w'), required=True) - sub_r.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_r.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) + sub_r.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_r.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) sub_r.set_defaults(func=reverse) sub_v = subparsers.add_parser('validate', help='validates fa file') @@ -81,26 +85,41 @@ def main(): sub_s = subparsers.add_parser('stats', help='show statistics of fa file') sub_s.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) - sub_s.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_s.add_argument('--operator', help='user who have fired script it will be noted in log', nargs='*', type=str) + sub_s.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_s.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) sub_s.set_defaults(func=statistics) - sub_s = subparsers.add_parser('findMotif', help='show statistics of fa file') - sub_s.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) - sub_s.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_s.add_argument('--operator', help='user who have fired script it will be noted in log', nargs='*', type=str) - sub_s.set_defaults(func=find_motif) - - sub_s = subparsers.add_parser('findPrimer', help='show statistics of fa file') - sub_s.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) - sub_s.add_argument('--start', help='strat codon 5\'', type=str, required=True) - sub_s.add_argument('--stop', help='stop codon 3\'', type=str, required=True) - sub_s.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) - sub_s.add_argument('--operator', help='user who have fired script it will be noted in log', nargs='*', type=str) - sub_s.set_defaults(func=find_primers) - - #parser.add_argument('--operator', help='user who have fired script it will be noted in log', type=str) - #parser.add_argument('--log', help='log file if not supplied stdout', type=argparse.FileType('w')) + sub_fm = subparsers.add_parser('findMotif', help='finding given motif; display motif and its position in contig') + sub_fm.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) + sub_fm.add_argument('--mml', help='mismatch level number of allowed missmatches in primers (detfault 0)', type=str, default=0) + sub_fm.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_fm.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) + sub_fm.set_defaults(func=find_motif) + + sub_fp = subparsers.add_parser('findPrimer', help='display list of founded primers') + sub_fp.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) + sub_fp.add_argument('--start', help='strat codon 5\'', type=str, required=True) + sub_fp.add_argument('--stop', help='stop codon 3\'', type=str, required=True) + sub_fp.add_argument('--mode', help='FF (start forward, stop forward) or FR (start 5\' stop 3\')', type=str, choices=['FF', 'FR'], required=True) + sub_fp.add_argument('--minlen', help='minimum length (detfault 50bp)', type=int, default=50) + sub_fp.add_argument('--maxlen', help='max length (detfault 1000bp)', type=int, default=1000) + sub_fp.add_argument('--mml', help='mismatch level number of allowed missmatches in primers (detfault 0)', type=int, default=0) + sub_fp.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_fp.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) + sub_fp.set_defaults(func=find_primers) + ''' + sub_fap = subparsers.add_parser('findPrimer', help='show statistics of fa file') + sub_fap.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) + sub_fap.add_argument('--start', help='strat codon 5\'', type=str, required=True) + sub_fap.add_argument('--stop', help='stop codon 3\'', type=str, required=True) + sub_fap.add_argument('--minlen', help='minimum length (detfault 50bp)', type=str, default=50) + sub_fap.add_argument('--maxlen', help='max length (detfault 1000bp)', type=str, default=1000 + sub_fap.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) + sub_fap.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) + sub_fap.set_defaults(func=find_primers) + ''' + #parser.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) + #parser.add_argument('--report', help='log file if not supplied stdout', type=argparse.FileType('w')) args = parser.parse_args() #if args.version: @@ -126,6 +145,13 @@ def make_log_header(cmd, op): def cut_fa(args): + #logging.basicConfig(level=logging.ERROR) + #logging.basicConfig(level=logging.DEBUG) + logger = logging.getLogger(__name__) + + logger.setLevel(logging.DEBUG) + logger.debug('debug mode started') + logger.info('command: cut starting') rep = str(make_log_header('cut', args.operator)) fafile = args.fafile @@ -134,19 +160,26 @@ def cut_fa(args): step = args.step f = Fa.load_from_file(fafile) + logger.info('file: '+fafile.name+' loaded') contig_list = [] for r in f.contigs: - contig_list.join(r.cut(split_range, step)) + contig_list += r.cut(split_range, step) + logger.info('cutted contigs added from conting: '+r.name) result_fa = Fa(contig_list, 'splited') + logger.info('trying to write file') result_fa.write(output) + logger.info('file written') rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) def extract_names(args): + logger = logging.getLogger(__name__) + logger.setLevel(logging.info) + logger.info('command: extractNames starting') rep = str(make_log_header('extractNames', args.operator)) fafile = args.fafile output = args.output @@ -159,8 +192,8 @@ def extract_names(args): rep += 'Number of neames founded:\t' + str(len(names)) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) def extract_contigs(args): @@ -184,8 +217,8 @@ def extract_contigs(args): rep += '\Extracted contigs:\t'+str(len(result_ta.contigs)) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) else: print rep @@ -205,8 +238,8 @@ def remove_contigs(args): result_fa.write(args.output) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(stats_rep) else: print stats_rep @@ -232,8 +265,8 @@ def join(args): fa.write(args.output) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(stats_rep) else: print stats_rep @@ -245,8 +278,8 @@ def split_contigs(args): fa.write_multiple_files(args.output) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) else: print rep @@ -296,8 +329,8 @@ def statistics(args): stats_rep += '\nL90:\t'+str(stats['L90']) stats_rep += '\n\n------------------------------------------------------' stats_rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(stats_rep) else: print stats_rep @@ -321,8 +354,8 @@ def validate(args): rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) else: print rep @@ -333,8 +366,8 @@ def reverse(args): fa = Fa.load_from_file(args.fafile) fa.reverse() fa.write(args.output) - if args.log: - with args.log as log_file: + if args.report: + with args.report as log_file: log_file.write(rep) else: print rep @@ -344,10 +377,18 @@ def find_motif(args): pass def find_primers(args): - print 'not available yet' - pass - - + fa = Fa.load_from_file(args.fafile) + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + logger.debug(args) + rep = '' + for r in fa.contigs: + rep += '\n================\n\t\t'+r.name+'\n' + for q in r.find_aprox_primers(args.start, args.stop, str(args.mode), int(args.mml), args.minlen, args.maxlen): + rep += q+'\n' + + print rep + if __name__ == '__main__': exit(main()) diff --git a/fatool/fa.py b/fatool/fa.py index 827df1b..1aaba5f 100644 --- a/fatool/fa.py +++ b/fatool/fa.py @@ -4,27 +4,35 @@ import re import math from fatool import Sequence +import logging class Fa(object): def __init__(self, contigs_list, name): - #print contigs_list - # do poprawki + logger = logging.getLogger(__name__) + + logger.debug('creating Fa object') + logger.info('but info works') self.name = name self.contigs = [] self.contigs_idx = {} for r in contigs_list: if not isinstance(r, Sequence): + logger.error('Supplied param is not Sequence object') raise TypeError('Wrong param supplied Sequence was expected') if not r.name in self.contigs_idx: if len(self.contigs) > 0: + logger.debug('appending contig: '+r.name) self.contigs.append(r) else: + logger.debug('adding first contig: '+r.name) self.contigs = [r] self.contigs_idx[r.name] = len(self.contigs) - 1 else: + logger.error('Sequence name: '+r.name+' already exists in file') raise NameError('Sequence name already exists: '+r.name) - # self.stats{'A':0,'C':0,'T':0,'G':0,'N':0, 'L':0, } + + @staticmethod def load_from_file(file): if isinstance(file, str): @@ -110,6 +118,7 @@ def validate(self): def nl_statistics(self, g, percent): ''' Counts statistics of N50, L50, N75 etc. + g array containing sorted contigs by length, from biggest to lowest ''' ncount = -1 # index & number of contigs with +1 nsum = 0 diff --git a/fatool/fuzzy.py b/fatool/fuzzy.py index 2177397..1e41376 100644 --- a/fatool/fuzzy.py +++ b/fatool/fuzzy.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- #import math +import logging def find_aprox_match_iter(needle, hstack, missmatch_level, hs_start_pos = 0): i = hs_start_pos # start iterate from start position @@ -11,8 +12,6 @@ def find_aprox_match_iter(needle, hstack, missmatch_level, hs_start_pos = 0): while i < len(hstack): if hstack[i] != needle[j]: mmatch_count += 1 - #print mmatch_count - #print 'j = '+str(j) if mmatch_count > missmatch_level: # if missmatch level oversized back to strat + 1 and start again i -= j @@ -41,7 +40,6 @@ def find_all_aprox_matches(needle, hstack, missmatch_level, hs_start_pos): i = r[0]+1 # match not found - no more maches in hstack else: - #print 'not found' break return ret_list @@ -49,10 +47,8 @@ def find_all_aprox_matches(needle, hstack, missmatch_level, hs_start_pos): def find_motif_in_aprox_range(start_motif, stop_motif, hstack, missmatch_level, hs_start_pos = 0): start = 0 stop = 0 - #print 'startm: '+start_motif+'\tstop_motif: '+stop_motif start = find_aprox_match_iter(start_motif, hstack, missmatch_level, hs_start_pos = 0) stop = find_aprox_match_iter(stop_motif, hstack, missmatch_level, start[1]) - #print start,stop if start and stop: return hstack[start[1]:stop[0]] @@ -61,21 +57,21 @@ def find_all_motifs_in_aprox_range(start_motif, stop_motif, hstack, missmatch_le start = 0 stop = 0 ret_list = [] - print 'hstack in fuzzy' - print hstack + logger = logging.getLogger(__name__) + #logger.setLevel(logging.DEBUG) + logger.debug([start_motif, stop_motif, hstack, missmatch_level, hs_start_pos, len_min, len_max]) + logger.debug(hstack) + while i <= len(hstack): start = find_aprox_match_iter(start_motif, hstack, missmatch_level, i) stop = find_aprox_match_iter(stop_motif, hstack, missmatch_level, start[1]) - #print start,stop if start and stop: - #print 'start + stop found' - if stop[0] - start[1] > len_min and stop[0] - start[1] < len_max: - #print 'match valid' - ret_list.append(hstack[start[1]:stop[0]]) + if stop[1] - start[0] >= len_min and stop[1] - start[0] <= len_max: + ret_list.append(hstack[start[0]:stop[1]]) i = start[0]+1 - #print i else: break + logger.debug(ret_list) return ret_list def find_motif(needle, hstack, missmatch_level, hs_start_pos = 0): @@ -85,22 +81,13 @@ def find_motif(needle, hstack, missmatch_level, hs_start_pos = 0): return hstack[r[0]:r[1]] def find_all_motifs(needle, hstack, missmatch_level, hs_start_pos = 0): - #print 'fuzzy.find_all_motifs' - #print needle - #print hstack - #print missmatch_level - #print hs_start_pos i = hs_start_pos ret_list = [] while i <= len(hstack): r = find_aprox_match_iter(needle, hstack, missmatch_level, i ) - #print r if r: - #print 'founded: ',r ret_list.append(hstack[r[0]:r[1]]) - #ret_list = [hstack[r[0]:r[1]]] i = r[0]+1 else: break - #print ret_list return ret_list \ No newline at end of file diff --git a/fatool/sequence.py b/fatool/sequence.py index 4e21fa6..3d2b11c 100644 --- a/fatool/sequence.py +++ b/fatool/sequence.py @@ -4,6 +4,7 @@ from collections import Counter import fuzzy import re +import logging class Sequence(object): @@ -133,7 +134,7 @@ def detailed_validate_generic(seq, domain): log_info = [] # if not allowed chars found if m: - # it may be 60 xxxxxxxxxx xxx.... format + # it may be 61 xxxxxxxxxx xxx.... format if re.search('(\d+)', seq): seq_array = seq.split('\n') new_array = [] # array to store new sequence after cleaning and transformation @@ -146,7 +147,7 @@ def detailed_validate_generic(seq, domain): if end_of_seq_array > 1: line_length = int(new_array[1][0])-int(new_array[0][0]) - # validate ecah block (between " " [space]) of given sequence + # validate each block (between " " [space]) of given sequence i = 0 while i < end_of_seq_array: # digit on begining of line was not found - error @@ -251,7 +252,7 @@ def translate2protein_in_range_generic(seq, start, stop, tdict): frame2 = [] frame3 = [] - # creating pattern to find start codons + # creating pattern (from dict) to find start codons for r in start: p += r+'|' p = '('+p.rstrip('|')+')' @@ -331,30 +332,35 @@ def find_aprox_motif(self, motif, missmatch_level): self.normalize() return fuzzy.find_all_motifs(motif, self.seq, missmatch_level, hs_start_pos = 0) - def find_aprox_primers(self, start, stop, missmatch_level = 0, len_min = 50, len_max = 10000): + def find_primers(self, start, stop, mode, len_min = 50, len_max = 10000): + return self.find_aprox_primers(start, stop, mode, 0, len_min, len_max) + + + def find_aprox_primers(self, start, stop, mode, missmatch_level = 0, len_min = 50, len_max = 10000): #start 5'->3' # add missmatch_level condition if 50%> - rev = stop[::-1] - new_stop = rev.translate(maketrans('ACTGactg', 'TGACtgac')) + logger = logging.getLogger(__name__) + #logger.setLevel(logging.DEBUG) + logger.debug('given args: start:'+start+' stop: '+stop+' mode: '+mode+' mm level: '+str(missmatch_level)+' len_min: '+str(len_min)+' len_max: '+str(len_max)) + #logger.debug('sequence: '+self.seq) + if mode.upper() == 'FR': + rev = stop[::-1] + stop = rev.translate(maketrans('ACTGactg', 'TGACtgac')) + elif mode.upper() != 'FF': + raise ('Unexpected mode: '+str(mode)+' expected values [FR|FF]') + r_list = [] self.normalize() - #print '\nAfter normailzation' - #print self.seq res = fuzzy.find_all_motifs_in_aprox_range(start, stop, self.seq, missmatch_level, 0, len_min, len_max) if res: r_list.extend(res) - rev = start[::-1] - new_start = rev.translate(maketrans('ACTGactg', 'TGACtgac')) - #print 'new_seq in sequence\n' - #print new_seq.seq - res = fuzzy.find_all_motifs_in_aprox_range(new_start, stop, self.seq, missmatch_level, 0, len_min, len_max) + res = fuzzy.find_all_motifs_in_aprox_range(start, stop, self.reverse().seq, missmatch_level, 0, len_min, len_max) if res: r_list.extend(res) - print 'Sequence.find_aprox_primers', - for s in r_list: - print s+'\n' + + logger.debug(r_list) return r_list def __str__(self): diff --git a/fatool/tests/test_sequence.py b/fatool/tests/test_sequence.py index a9ad4ec..251f808 100644 --- a/fatool/tests/test_sequence.py +++ b/fatool/tests/test_sequence.py @@ -1,6 +1,7 @@ import unittest import sys from fatool import Sequence +from string import maketrans @@ -170,6 +171,13 @@ def test_statistics(self): c = Sequence('>name', ' 1 ACTG NNNNNNNN\naaanan') self.assertEqual({'A':5, 'C':1, 'T':1, 'G':1, 'N':10, 'L':18}, c.statistics()) + def test_find_primers(self): + test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGC' + c = Sequence('>test', test) + self.assertEqual(['GGAATCGGCTTTTAATACTGCAGGGG'],c.find_primers('GGAA', 'GGGG', 'FF')) + + self.assertEqual(['AATCGGCT','AATACT','AAGTTGGCATGAACT','AACT','AACGCCCCT'],c.find_primers('AA', 'CT', 'ff')) + def test_find_aprox_motif(self): test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGC' c = Sequence('>test', test) @@ -180,32 +188,200 @@ def test_find_aprox_motif(self): self.assertEqual(['TGGAATCGGCT'], c.find_aprox_motif('TGGAATCGGCT',0)) + def test_find_primers(self): + test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGGGATCAGCATTGGCGGAATTACCGACAGTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGGCCGTTTTTGTTGGATAAAAGACCCACAGGGCGCCATTATTGCGGCCATTAGCTATTTAAAACGTTGATGTAA' + c = Sequence('>test', test) + t_TTT_GGG_FF = [ + 'TTTTAATACTGCAGGG', + 'TTTAATACTGCAGGG', + 'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGG', + 'TTTGGCCCTGAAGACATTCCAGAGGTGGG', + 'TTTTTGTTGGATAAAAGACCCACAGGG', + 'TTTTGTTGGATAAAAGACCCACAGGG', + 'TTTGTTGGATAAAAGACCCACAGGG', + 'TTTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG', + 'TTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG', + 'TTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTCAATAATGTGATAGTGACCGTGGG', + 'TTTCAATAATGTGATAGTGACCGTGGG' + ] + + t_TTT_CCC_FR = [ + 'TTTTAATACTGCAGGG', + 'TTTAATACTGCAGGG', + 'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGG', + 'TTTGGCCCTGAAGACATTCCAGAGGTGGG', + 'TTTTTGTTGGATAAAAGACCCACAGGG', + 'TTTTGTTGGATAAAAGACCCACAGGG', + 'TTTGTTGGATAAAAGACCCACAGGG', + 'TTTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG', + 'TTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG', + 'TTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTCAATAATGTGATAGTGACCGTGGG', + 'TTTCAATAATGTGATAGTGACCGTGGG' + ] + + self.assertEqual(t_TTT_GGG_FF, c.find_primers('TTT', 'GGG', 'FF', 0,10000)) + self.assertEqual(t_TTT_CCC_FR, c.find_primers('TTT', 'CCC', 'FR', 0,10000)) + def test_find_aprox_primers(self): test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGGGATCAGCATTGGCGGAATTACCGACAGTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGGCCGTTTTTGTTGGATAAAAGACCCACAGGGCGCCATTATTGCGGCCATTAGCTATTTAAAACGTTGATGTAA' c = Sequence('>test', test) - #print '\n===========reverse==============\n' - #print c.reverse() - #res = c.find_aprox_primers('TTT', 'GGG', 0,0,10000) - #REV 'TTT'<->'CCC' - #NOR 'AAA'<->'GGG' - #print '==== res ====' - #for r in res: - # print r+'\n' - tl = [ - 'TAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATG', - 'AATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATG', - 'TTGGCTGGCACTTTAAAACCGTCAAAATG', - 'TGGCTGGCACTTTAAAACCGTCAAAATG', - 'GGCTGGCACTTTAAAACCGTCAAAATG', - 'AAAACCGTCAAAATG', - 'AAT', - 'GG', - 'TTGTTGGATAAAAGA', - 'TGTTGGATAAAAGA', - 'GTTGGATAAAAGA' + + t_TTT_GGG_FF = [ + 'TTTTAATACTGCAGGG', + 'TTTAATACTGCAGGG', + 'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGG', + 'TTTGGCCCTGAAGACATTCCAGAGGTGGG', + 'TTTTTGTTGGATAAAAGACCCACAGGG', + 'TTTTGTTGGATAAAAGACCCACAGGG', + 'TTTGTTGGATAAAAGACCCACAGGG', + 'TTTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG', + 'TTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG', + 'TTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTCAATAATGTGATAGTGACCGTGGG', + 'TTTCAATAATGTGATAGTGACCGTGGG' ] - - self.assertEqual(tl, c.find_aprox_primers('TTT', 'GGG', 0,0,10000)) + + t_TTT_CCC_FR = [ + 'TTTTAATACTGCAGGG', + 'TTTAATACTGCAGGG', + 'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG', + 'TTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGG', + 'TTTGGCCCTGAAGACATTCCAGAGGTGGG', + 'TTTTTGTTGGATAAAAGACCCACAGGG', + 'TTTTGTTGGATAAAAGACCCACAGGG', + 'TTTGTTGGATAAAAGACCCACAGGG', + 'TTTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG', + 'TTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG', + 'TTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG', + 'TTTTCAATAATGTGATAGTGACCGTGGG', + 'TTTCAATAATGTGATAGTGACCGTGGG' + ] + + self.assertEqual(t_TTT_GGG_FF, c.find_aprox_primers('TTT', 'GGG', 'FF', 0,0,10000)) + self.assertEqual(t_TTT_CCC_FR, c.find_aprox_primers('TTT', 'CCC', 'FR', 0,0,10000)) + + t_TTTT_GGGG_FF = [ + 'CTTTTAATACTGCAGGG', + 'TTTTAATACTGCAGGG', + 'TTTAATACTGCAGGG', + 'TTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'ATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'CTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'TTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'TTATTGAAAACGAGG', + 'TATTGAAAACGAGG', + 'GTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGG', + 'TTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGG', + 'TATTACCGTTAACGATGTGG', + 'TTCTGTTTGGCCCTGAAGACATTCCAGAGG', + 'TGTTTGGCCCTGAAGACATTCCAGAGG', + 'GTTTGGCCCTGAAGACATTCCAGAGG', + 'TTTGGCCCTGAAGACATTCCAGAGG', + 'GTTTTTGTTGGATAAAAGACCCACAGGG', + 'TTTTTGTTGGATAAAAGACCCACAGGG', + 'TTTTGTTGGATAAAAGACCCACAGGG', + 'TTTGTTGGATAAAAGACCCACAGGG', + 'TTGTTGGATAAAAGACCCACAGGG', + 'TGTTGGATAAAAGACCCACAGGG', + 'TTATTGCGG', + 'TATTGCGG', + 'GTTTTAAATAGCTAATGGCCGCAATAATGGCG', + 'TTTTAAATAGCTAATGGCCGCAATAATGGCG', + 'TTTAAATAGCTAATGGCCGCAATAATGGCG', + 'TCTTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'CTTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG', + 'TCTTCAGGG', + 'GTTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'GTTTTCAATAATGTGATAGTGACCGTGG', + 'TTTTCAATAATGTGATAGTGACCGTGG', + 'TTTCAATAATGTGATAGTGACCGTGG', + ] + + + + self.assertEqual(t_TTTT_GGGG_FF, c.find_aprox_primers('TTTT', 'GGGG', 'FF', 1,0,10000)) + self.assertEqual(t_TTTT_GGGG_FF, c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1,0,10000)) + self.assertEqual(c.find_aprox_primers('TTTT', 'CCCC', 'fr', 1,0,10000), c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1,0,10000)) + self.assertEqual(c.find_aprox_primers('TTTT', 'CCCC', 'Fr', 1,0,10000), c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1,0,10000)) + self.assertEqual(c.find_aprox_primers('TTTT', 'CCCC', 'fR', 1,0,10000), c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1,0,10000)) + + t_TTTT_GGGG_FF_60 = [ + 'TTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'ATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'GTTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + ] + + self.assertEqual(t_TTTT_GGGG_FF_60, c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1,60,10000)) + + t_TTTT_GGGG_FF_60_65 = [ + 'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG', + 'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + 'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG', + ] + + for r in c.find_aprox_primers('TTTT', 'GGGG', 'FF', 1,60,65): + print r + + self.assertEqual(t_TTTT_GGGG_FF_60_65, c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1,60,65)) + + if __name__ == "__main__": unittest.main() \ No newline at end of file