From a2b810b550b3a7c01291bf258a41296ac7ee91bd Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 5 Aug 2016 00:17:28 +0200 Subject: [PATCH] Fixed duble > in names, fixed \r\n on names ends modified: .gitignore modified: README.md modified: bin/cmdfatool.py modified: fatool/fa.py modified: fatool/sequence.py modified: fatool/tests/test_sequence.py --- .gitignore | 4 +- README.md | 26 ++++++++++- bin/cmdfatool.py | 92 ++++++++++++++++++------------------ fatool/fa.py | 5 +- fatool/sequence.py | 105 ++++++++++++++++++------------------------ fatool/tests/test_sequence.py | 6 ++- 6 files changed, 125 insertions(+), 113 deletions(-) diff --git a/.gitignore b/.gitignore index 1029c25..41ac187 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ # Python egg metadata, regenerated from source files by setuptools. /*.egg-info -/*.egg \ No newline at end of file +/*.egg + +build/ \ No newline at end of file diff --git a/README.md b/README.md index 715f31b..39528e6 100644 --- a/README.md +++ b/README.md @@ -197,7 +197,7 @@ optional arguments: user who have fired script it will be noted in report - cutNameMarker: + cutNameMarker usage: cmdfatool.py cutNameMarker [-h] -f FAFILE -m MARKER -l LENGTH @@ -210,3 +210,27 @@ optional arguments: -l LENGTH, --length LENGTH length of cut --keepMarker KEEPMARKER weather to keep marker or not default 1 (Yes) -o OUTPUT, --output OUTPUT output file default: output.fa + + translateDNA2Proteins + +usage: cmdfatool.py translateDNA2Proteins [-h] -f FAFILE [-o OUTPUT] + [--startCodons [STARTCODONS [STARTCODONS ...]]] + [--stopCodons [STOPCODONS [STOPCODONS ...]]] + [--tdict {STD,VMTO,YMTO,BAPP}] + [--nss] [--report REPORT] + [--operator [OPERATOR [OPERATOR ...]]] + +optional arguments: + -h, --help show this help message and exit + -f FAFILE, --fafile FAFILE file to show statistics usualy *.fa + -o OUTPUT, --output OUTPUT output file default: output.fa + --startCodons [STARTCODONS [STARTCODONS ...]] list of start codons separated by space bar + --stopCodons [STOPCODONS [STOPCODONS ...]] list of stop codons separated by space bar + --tdict {STD,VMTO,YMTO,BAPP} + Which dictionary use for translation: STD - standard, + VMTO - Vertebrate Mitochondrial, YMTO - Yeast + Mitochondrial, BAPP - Bacterial Archaeal Plant and + Plastid + --nss No Start Stop + --report REPORT report results into file if not supplied stdout + --operator [OPERATOR [OPERATOR ...]] user who have fired script it will be noted in report \ No newline at end of file diff --git a/bin/cmdfatool.py b/bin/cmdfatool.py index 00554f7..bfe9311 100644 --- a/bin/cmdfatool.py +++ b/bin/cmdfatool.py @@ -130,23 +130,14 @@ def main(): sub_trn_d2p.add_argument('-o', '--output', help='output file default: output.fa', type=argparse.FileType('w'), default='output.fa') sub_trn_d2p.add_argument('--startCodons', help='list of start codons separated by space bar', nargs='*', type=str) sub_trn_d2p.add_argument('--stopCodons', help='list of stop codons separated by space bar', nargs='*', type=str) + sub_trn_d2p.add_argument( + '--tdict', help='Which dictionary use for translation: STD - standard, VMTO - Vertebrate Mitochondrial, YMTO - Yeast Mitochondrial, BAPP - Bacterial Archaeal Plant and Plastid', + type=str, choices=['STD', 'VMTO', 'YMTO', 'BAPP'], default = 'STD' + ) sub_trn_d2p.add_argument('--nss', help='No Start Stop', action='store_true') sub_trn_d2p.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) sub_trn_d2p.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) sub_trn_d2p.set_defaults(func=translate_dna_to_protein) - ''' - sub_fap = subparsers.add_parser('findPrimer', help='show statistics of fa file') - sub_fap.add_argument('-f', '--fafile', help='file to show statistics usualy *.fa', type=argparse.FileType('r'), required=True) - sub_fap.add_argument('--start', help='strat codon 5\'', type=str, required=True) - sub_fap.add_argument('--stop', help='stop codon 3\'', type=str, required=True) - sub_fap.add_argument('--minlen', help='minimum length (detfault 50bp)', type=str, default=50) - sub_fap.add_argument('--maxlen', help='max length (detfault 1000bp)', type=str, default=1000 - sub_fap.add_argument('--report', help='report results into file if not supplied stdout', type=argparse.FileType('w')) - sub_fap.add_argument('--operator', help='user who have fired script it will be noted in report', nargs='*', type=str) - sub_fap.set_defaults(func=find_primers) - ''' - #parser.add_argument('--operator', help='user who have fired script it will be noted in report', type=str) - #parser.add_argument('--report', help='log file if not supplied stdout', type=argparse.FileType('w')) args = parser.parse_args() @@ -203,7 +194,7 @@ def cut_fa(args): def extract_names(args): logger = logging.getLogger(__name__) - logger.setLevel(logging.info) + logger.setLevel(logging.INFO) logger.info('command: extractNames starting') rep = str(make_log_header('extractNames', args.operator)) fafile = args.fafile @@ -213,7 +204,7 @@ def extract_names(args): names = fa.show_names() with output as o: for r in names: - o.write('>'+r) + o.write(r+'\n') rep += 'Number of neames founded:\t' + str(len(names)) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) @@ -239,7 +230,7 @@ def extract_contigs(args): else: result_fa.write(args.output) rep += '\nContigs to remove:\t'+str(len(elist)) - rep += '\Extracted contigs:\t'+str(len(result_ta.contigs)) + rep += '\Extracted contigs:\t'+str(len(result_fa.contigs)) rep += '\n\n------------------------------------------------------' rep += '\nFinished:\t'+str(datetime.datetime.now()) if args.report: @@ -429,51 +420,55 @@ def cut_name_pattern(args): for r in fa.contigs: r.leave_name_after_marker(args.marker, args.length, args.keepMarker) fa.write(args.output) + +def print_frame_output(r_dict): + i = 0 + otp = '' + for f in r_dict: + otp += 'FRAME:\t'+str(i+1)+'\n' + otp += '\nBEFORE:\t '+f[0] + otp += '\nTRANSLATION:\n\n'+f[1] + otp += '\n\nAFTER:\t '+f[2] + otp += '\n------------------------------------------------\n' + i+=1 + return otp def translate_dna_to_protein(args): rep = str(make_log_header('translate2protein', args.operator)) fa = Fa.load_from_file(args.fafile) + if args.tdict == 'STD': + tdict = Sequence.tdict_standard + elif args.tdict == 'VMTO': + tdict = Sequence.tdict_vertebrate_mitochondrial + elif args.tdict == 'YMTO': + tdict = Sequence.tdict_yeast_mitochondrial + elif args.tdict == '????????': + tdict = Sequence.tdict_standard + elif args.tdict == 'BAPP': + tdict = Sequence.tdict_bacterial_archaeal_plant_plastid + else: + print 'applied dictionary is not valid!' + exit(1) + r_dict = {} otp = '' if args.nss: for r in fa.contigs: - r_dict = r.translate2protein({}) + r_dict = r.translate2protein(tdict) otp += '\n=============================\n'+r.name+'\n=============================\n' - otp += '\nFORWARD\n' - i = 0 - for f in r_dict['fwd']: - otp += 'FRAME:\t'+str(i+1)+'\n' - otp += 'BEFORE:\t '+f[0] - otp += 'TRANSLATION:\n '+f[1] - otp += 'AFTER:\t '+f[2] - otp += '\n------------------------------------------------\n' - i+=1 - otp += '\nREVERS\n' - otp += '\n------------------------------------------------\n' - i = 0 - for f in r_dict['rev']: - otp += 'FRAME:\t'+str(i+1)+'\n' - otp += 'BEFORE:\t '+f[0] - otp += 'TRANSLATION:\n '+f[1] - otp += 'AFTER:\t '+f[2] - otp += '\n------------------------------------------------\n' - i+=1 + otp += '\nFORWARD\n\n' + otp += print_frame_output(r_dict['fwd']) + otp += '\n'+'='*15+'\n' + otp += '\nREVERS\n\n' + otp += print_frame_output(r_dict['rev']) rep += otp else: - tdict = { - 'GCA':'A','GCC':'A','GCG':'A','GCT':'A', 'TGC':'C','TGT':'C', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', - 'TTC':'F', 'TTT':'F', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'CAC':'H', 'CAT':'H', 'ATA':'I', 'ATC':'I', 'ATT':'I', - 'AAA':'K', 'AAG':'K', 'TTA':'L', 'TTG':'L', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'ATG':'M', 'AAC':'N', 'AAT':'N', - 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAA':'Q', 'CAG':'Q', 'AGA':'R', 'AGG':'R', 'CGA':'R', 'CGC':'R', 'CGG':'R', - 'CGT':'R', 'AGC':'S', 'AGT':'S', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', - 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'TGG':'W', 'TAC':'Y', 'TAT':'Y', 'TAG': '*', 'TGA':'*', 'TAA':'*' - } for r in fa.contigs: r_dict = r.translate2protein_in_range(args.startCodons, args.stopCodons, tdict) otp += '\n=============================\n'+r.name+'\n=============================\n' - otp += 'FORWARD\n' + otp += 'FORWARD\n\n' i = 0 for f in r_dict['fwd']: @@ -481,15 +476,16 @@ def translate_dna_to_protein(args): for k in f: otp += '\n'+k[0]+' start: '+str(k[1]) otp += '\n------------------------------------------------\n' - otp += '\n=================================================\n' - otp += 'REVERS\n' + otp += '\n'+'='*15+'\n' + i += 1 + otp += 'REVERS\n\n' i = 0 for f in r_dict['rev']: otp += 'FRAME:\t'+str(i+1)+'\n' for k in f: otp += '\n'+k[0]+' start: '+str(k[1]) otp += '\n------------------------------------------------\n' - otp += '\n=================================================\n' + i += 1 rep += otp fa.write(args.output) diff --git a/fatool/fa.py b/fatool/fa.py index dfbad5a..d00304e 100644 --- a/fatool/fa.py +++ b/fatool/fa.py @@ -52,7 +52,7 @@ def load_content(content): nc = content.split('>') contigs_list = [] for r in nc[1:]: - contigs_list.append(Sequence('>'+r.split('\n', 1)[0], re.sub('^>.*\n', '', '>'+r.rstrip()))) + contigs_list.append(Sequence('>'+r.split('\n', 1)[0].rstrip(), re.sub('^>.*\n', '', '>'+r.rstrip()))) return contigs_list def write(self, fafile): @@ -97,11 +97,12 @@ def show_names(self): def extract(self, contigs_name_list): + print contigs_name_list new_contig_list = [] for r in contigs_name_list: if r in self.contigs_idx: new_contig_list.append(self.contigs[self.contigs_idx[r]]) - return Fa(new_contig_list, 'extr_'+self.name) + return Fa(new_contig_list, '>extr_'+self.name) def remove(self, contigs_name_list): new_contig_list = [] diff --git a/fatool/sequence.py b/fatool/sequence.py index 1facb01..d3d2118 100644 --- a/fatool/sequence.py +++ b/fatool/sequence.py @@ -8,6 +8,50 @@ class Sequence(object): + # 1 + tdict_standard = { + 'GCA':'A','GCC':'A','GCG':'A','GCT':'A', 'TGC':'C','TGT':'C', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', + 'TTC':'F', 'TTT':'F', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'CAC':'H', 'CAT':'H', 'ATA':'I', 'ATC':'I', 'ATT':'I', + 'AAA':'K', 'AAG':'K', 'TTA':'L', 'TTG':'L', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'ATG':'M', 'AAC':'N', 'AAT':'N', + 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAA':'Q', 'CAG':'Q', 'AGA':'R', 'AGG':'R', 'CGA':'R', 'CGC':'R', 'CGG':'R', + 'CGT':'R', 'AGC':'S', 'AGT':'S', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', + 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'TGG':'W', 'TAC':'Y', 'TAT':'Y', 'TAG': '*', 'TGA':'*', 'TAA':'*' + } + + start_standard = ['ATG', 'TTG', 'CTG'] + + standard_stop = ['TAA', 'TAG', 'TGA'] + + # 2 + tdict_vertebrate_mitochondrial = { + 'GCA':'A','GCC':'A','GCG':'A','GCT':'A', 'TGC':'C','TGT':'C', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', + 'TTC':'F', 'TTT':'F', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'CAC':'H', 'CAT':'H', 'ATA':'M', 'ATC':'I', 'ATT':'I', + 'AAA':'K', 'AAG':'K', 'TTA':'L', 'TTG':'L', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'ATG':'M', 'AAC':'N', 'AAT':'N', + 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAA':'Q', 'CAG':'Q', 'AGA':'*', 'AGG':'*', 'CGA':'R', 'CGC':'R', 'CGG':'R', + 'CGT':'R', 'AGC':'S', 'AGT':'S', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', + 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'TGG':'W', 'TAC':'Y', 'TAT':'Y', 'TAG': '*', 'TGA':'W', 'TAA':'*' + } + + # 3 + tdict_yeast_mitochondrial = { + 'GCA':'A','GCC':'A','GCG':'A','GCT':'A', 'TGC':'C','TGT':'C', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', + 'TTC':'F', 'TTT':'F', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'CAC':'H', 'CAT':'H', 'ATA':'M', 'ATC':'I', 'ATT':'I', + 'AAA':'K', 'AAG':'K', 'TTA':'L', 'TTG':'L', 'CTA':'T', 'CTC':'T', 'CTG':'T', 'CTT':'T', 'ATG':'M', 'AAC':'N', 'AAT':'N', + 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAA':'Q', 'CAG':'Q', 'AGA':'R', 'AGG':'R', 'CGA':'R', 'CGC':'R', 'CGG':'R', + 'CGT':'R', 'AGC':'S', 'AGT':'S', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', + 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'TGG':'W', 'TAC':'Y', 'TAT':'Y', 'TAG': '*', 'TGA':'W', 'TAA':'*' + } + + # 11 + tdict_bacterial_archaeal_plant_plastid = { + 'GCA':'A','GCC':'A','GCG':'A','GCT':'A', 'TGC':'C','TGT':'C', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', + 'TTC':'F', 'TTT':'F', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'CAC':'H', 'CAT':'H', 'ATA':'I', 'ATC':'I', 'ATT':'I', + 'AAA':'K', 'AAG':'K', 'TTA':'L', 'TTG':'L', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'ATG':'M', 'AAC':'N', 'AAT':'N', + 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAA':'Q', 'CAG':'Q', 'AGA':'R', 'AGG':'R', 'CGA':'R', 'CGC':'R', 'CGG':'R', + 'CGT':'R', 'AGC':'S', 'AGT':'S', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', + 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'TGG':'W', 'TAC':'Y', 'TAT':'Y', 'TAG': '*', 'TGA':'*', 'TAA':'*' + } + def __init__(self, name, seq): if Sequence.validate_name_string(name): self.name = name @@ -30,47 +74,7 @@ def validate_seq(self): ''' validates general seqence not specified for DNA or others. ''' - # pattern to find not allowed chars. - pattern = re.compile('[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]') - if pattern.search(self.seq): - if re.search('(\d+)', self.seq): - seq_array = self.seq.split('\n') - new_array = [] # array to store new sequence - for r in seq_array: - r = r.lstrip() # removing ' ' from beginings and ends - nr = r.split(' ') # split to array to catch all blocks aaaaaaaaaa aaaaaaaaaa - new_array.append(nr) - - end_of_seq_array = len(seq_array) - # if min. two lines calculate expected line length - if end_of_seq_array > 1: - line_length = int(new_array[1][0])-int(new_array[0][0]) - - # validate ecah block (between " ") of sequence () - i = 0 - while i < end_of_seq_array: - if not re.search('(\d+)', new_array[i][0]): - return 7 # line doesn't starts with digit - if not (len(new_array[i])-1)*10 == line_length and i != (end_of_seq_array-1): - return 0 # bad line length - for a, r in enumerate(new_array[i][1:]): # skip first elem which is digit - if len(r) != 10: # block not eq 10 - if len(r) < 10: # if less it can be ok if last elem of last line - if(i == end_of_seq_array - 1): - if a != len(new_array[i]) - 2: # if last -2 because enumerate is from first elem not 0 elem. - return 0 # not last elem of last line - else: - return 0 # not last line - else: - return 0 # block not eq 10 - if pattern.search(r): - return 0 - i += 1 - else: - return 0 # digit is not first char - # return pattern.search(self.seq) but nan error code returned before - return 1 - return 1 # valid + return Sequence.generic_validate(self.seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]') @staticmethod def generic_validate(seq, domain): @@ -199,7 +203,6 @@ def cut(self, length, step): def cut_name(self, length, start = 0): self.name = self.name[start:length] - print self.name def leave_name_after_marker(self, mark, length = 0, keep_marker = 1): m = re.search(re.escape(mark), self.name) @@ -305,7 +308,6 @@ def translate2protein_in_range_generic(seq, start, stop, tdict): while i+3 <= stop_pos: ret += Sequence.translate(seq[i:i+3], tdict) if re.match(p_stop, seq[i:i+3]): - #print 'exiting on: '+seq[i:i+3] i = i+3 break else: @@ -322,14 +324,6 @@ def translate2protein_in_range_generic(seq, start, stop, tdict): return [frame1, frame2, frame3] def translate2protein_in_range(self, start, stop, tdict): - tdict = { - 'GCA':'A','GCC':'A','GCG':'A','GCT':'A', 'TGC':'C','TGT':'C', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', - 'TTC':'F', 'TTT':'F', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'CAC':'H', 'CAT':'H', 'ATA':'I', 'ATC':'I', 'ATT':'I', - 'AAA':'K', 'AAG':'K', 'TTA':'L', 'TTG':'L', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'ATG':'M', 'AAC':'N', 'AAT':'N', - 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAA':'Q', 'CAG':'Q', 'AGA':'R', 'AGG':'R', 'CGA':'R', 'CGC':'R', 'CGG':'R', - 'CGT':'R', 'AGC':'S', 'AGT':'S', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', - 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'TGG':'W', 'TAC':'Y', 'TAT':'Y', 'TAG': '*', 'TGA':'*', 'TAA':'*' - } f = Sequence.translate2protein_in_range_generic(self.seq, start, stop, tdict) r = Sequence.translate2protein_in_range_generic(self.reverse().seq, start, stop, tdict) @@ -353,14 +347,7 @@ def translate2protein_generic(seq, tdict): return [('',f1,seq[-2:]),(seq[0:1],f2,seq[-1:]),(seq[0:2],f2,'')] def translate2protein(self, tdict): - tdict = { - 'GCA':'A','GCC':'A','GCG':'A','GCT':'A', 'TGC':'C','TGT':'C', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', - 'TTC':'F', 'TTT':'F', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'CAC':'H', 'CAT':'H', 'ATA':'I', 'ATC':'I', 'ATT':'I', - 'AAA':'K', 'AAG':'K', 'TTA':'L', 'TTG':'L', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'ATG':'M', 'AAC':'N', 'AAT':'N', - 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAA':'Q', 'CAG':'Q', 'AGA':'R', 'AGG':'R', 'CGA':'R', 'CGC':'R', 'CGG':'R', - 'CGT':'R', 'AGC':'S', 'AGT':'S', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T', - 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'TGG':'W', 'TAC':'Y', 'TAT':'Y', 'TAG': '*', 'TGA':'*', 'TAA':'*' - } + f = Sequence.translate2protein_generic(self.seq, tdict) r = Sequence.translate2protein_generic(self.reverse().seq, tdict) return {'fwd':f, 'rev':r} diff --git a/fatool/tests/test_sequence.py b/fatool/tests/test_sequence.py index cf6c1cb..84508fc 100644 --- a/fatool/tests/test_sequence.py +++ b/fatool/tests/test_sequence.py @@ -98,8 +98,10 @@ def translate2protein_generic(self): def test_translate2protein(self): - pass - + test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGGGATCAGCATTGGCGGAATTACCGACAGTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGGCCGTTTTTGTTGGATAAAAGACCCACAGGGCGCCATTATTGCGGCCATTAGCTATTTAAAACGTTGATGTAA' + c = Sequence('>name', test) + + def test_validate_seq(self): c = Sequence('>name', 'ACTGactg') self.assertEqual(c.validate_seq(), 1)