Permalink
Please sign in to comment.
Browse files
Builds removed
deleted: build/lib/fatool/__init__.py deleted: build/lib/fatool/fa.py deleted: build/lib/fatool/fuzzy.py deleted: build/lib/fatool/sequence.py deleted: build/scripts-2.7/cmdfatool.py
- Loading branch information...
Showing
with
0 additions
and 1,252 deletions.
- +0 −3 build/lib/fatool/__init__.py
- +0 −216 build/lib/fatool/fa.py
- +0 −93 build/lib/fatool/fuzzy.py
- +0 −425 build/lib/fatool/sequence.py
- +0 −515 build/scripts-2.7/cmdfatool.py
| @@ -1,3 +0,0 @@ | ||
| -from .sequence import Sequence | ||
| -from .fa import Fa | ||
| -import fuzzy |
| @@ -1,216 +0,0 @@ | ||
| -# -*- coding: utf-8 -*- | ||
| - | ||
| - | ||
| -import re | ||
| -import math | ||
| -from fatool import Sequence | ||
| -import logging | ||
| - | ||
| -class Fa(object): | ||
| - def __init__(self, contigs_list, name): | ||
| - logger = logging.getLogger(__name__) | ||
| - | ||
| - logger.debug('creating Fa object') | ||
| - self.name = name | ||
| - self.contigs = [] | ||
| - self.contigs_idx = {} | ||
| - for r in contigs_list: | ||
| - if not isinstance(r, Sequence): | ||
| - logger.error('Supplied param is not Sequence object') | ||
| - raise TypeError('Wrong param supplied Sequence was expected') | ||
| - if not r.name in self.contigs_idx: | ||
| - if len(self.contigs) > 0: | ||
| - logger.debug('appending contig: '+r.name) | ||
| - self.contigs.append(r) | ||
| - else: | ||
| - logger.debug('adding first contig: '+r.name) | ||
| - self.contigs = [r] | ||
| - | ||
| - self.contigs_idx[r.name] = len(self.contigs) - 1 | ||
| - else: | ||
| - logger.error('Sequence name: '+r.name+' already exists in file') | ||
| - raise NameError('Sequence name already exists: '+r.name) | ||
| - | ||
| - | ||
| - @staticmethod | ||
| - def load_from_file(file): | ||
| - if isinstance(file, str): | ||
| - with open(file, 'r') as f: | ||
| - contigs = Fa.load_content(f.read()) | ||
| - name = file | ||
| - else: | ||
| - name = file.name | ||
| - with file as f: | ||
| - contigs = Fa.load_content(f.read() ) | ||
| - | ||
| - | ||
| - return Fa(contigs, name) | ||
| - | ||
| - @staticmethod | ||
| - def load_content(content): | ||
| - #print content | ||
| - nc = content.split('>') | ||
| - contigs_list = [] | ||
| - for r in nc[1:]: | ||
| - contigs_list.append(Sequence('>'+r.split('\n', 1)[0], re.sub('^>.*\n', '', '>'+r.rstrip()))) | ||
| - return contigs_list | ||
| - | ||
| - def write(self, fafile): | ||
| - if isinstance(fafile, str): | ||
| - with open(fafile, 'w') as f: | ||
| - f.write(str(self)) | ||
| - else: | ||
| - with fafile as f: | ||
| - f.write(str(self)) | ||
| - | ||
| - def write_multiple_files(self, dir): | ||
| - dir = dir.rstrip('/') | ||
| - dir = dir.rstrip('\\') | ||
| - if len(dir) > 0: | ||
| - dir = dir+'/' | ||
| - for r in self.contigs: | ||
| - with open(dir+r.name+'.fa', 'w') as w: | ||
| - w.write(str(r)) | ||
| - | ||
| - def add_contigs(self, contig_list, owrite=0): | ||
| - for r in contig_list: | ||
| - self.add_contig(r, owrite) | ||
| - | ||
| - | ||
| - def add_contig(self, contig, owrite = 0): | ||
| - if not isinstance(contig, Sequence): | ||
| - raise TypeError('Wrong param supplied contig was expected') | ||
| - if contig.name in self.contigs_idx: | ||
| - if owrite == 1: | ||
| - #rem old item and add new name | ||
| - del self.contigs[self.contigs_idx[contig.name]] | ||
| - self.contigs.append(contig) | ||
| - for a, r in enumerate(self.contigs): | ||
| - #print 'cnt '+str(r) | ||
| - self.contigs_idx[r.name] = a | ||
| - else: | ||
| - self.contigs.append(contig) | ||
| - self.contigs_idx[contig.name] = len(self.contigs) - 1 | ||
| - | ||
| - def show_names(self): | ||
| - return sorted(self.contigs_idx, key=self.contigs_idx.get) | ||
| - | ||
| - | ||
| - def extract(self, contigs_name_list): | ||
| - new_contig_list = [] | ||
| - for r in contigs_name_list: | ||
| - if r in self.contigs_idx: | ||
| - new_contig_list.append(self.contigs[self.contigs_idx[r]]) | ||
| - return Fa(new_contig_list, 'extr_'+self.name) | ||
| - | ||
| - def remove(self, contigs_name_list): | ||
| - new_contig_list = [] | ||
| - for r in self.contigs: | ||
| - if not r.name in contigs_name_list: | ||
| - new_contig_list.append(r) | ||
| - return Fa(new_contig_list, 'rem_'+self.name) | ||
| - | ||
| - def validate(self): | ||
| - ''' | ||
| - ''' | ||
| - | ||
| - def nl_statistics(self, g, percent): | ||
| - ''' | ||
| - Counts statistics of N50, L50, N75 etc. | ||
| - g array containing sorted contigs by length, from biggest to lowest | ||
| - ''' | ||
| - ncount = -1 # index & number of contigs with +1 | ||
| - nsum = 0 | ||
| - stop = math.floor(self.stats['L']*(percent/100.00)) | ||
| - while nsum < stop: | ||
| - ncount += 1 | ||
| - nsum += g[ncount] | ||
| - | ||
| - self.stats['N'+str(percent)] = g[ncount] | ||
| - self.stats['L'+str(percent)] = ncount + 1 | ||
| - | ||
| - def bp_stats(self, length): | ||
| - self.stats['totalc'] += 1 | ||
| - if length > 50000: | ||
| - self.stats['nbp50000'] += 1 # number of contigs with length | ||
| - self.stats['lbp50000'] += length # total length of contigs with min. len | ||
| - elif length > 25000: | ||
| - self.stats['nbp25000'] += 1 | ||
| - self.stats['lbp25000'] += length | ||
| - elif length > 10000: | ||
| - self.stats['nbp10000'] += 1 | ||
| - self.stats['lbp10000'] += length | ||
| - elif length > 5000: | ||
| - self.stats['nbp5000'] += 1 | ||
| - self.stats['lbp5000'] += length | ||
| - elif length > 1000: | ||
| - self.stats['nbp1000'] += 1 | ||
| - self.stats['lbp1000'] += length | ||
| - | ||
| - def statistics(self): | ||
| - self.stats = { | ||
| - 'A': 0, 'C': 0, 'T': 0, 'G': 0, 'N': 0, 'L': 0, | ||
| - 'nbp1000': 0, 'nbp5000': 0, 'nbp10000': 0, 'nbp25000': 0, 'nbp50000': 0, | ||
| - 'lbp1000': 0, 'lbp5000': 0, 'lbp10000': 0, 'lbp25000': 0, 'lbp50000': 0, | ||
| - 'totalc':0 | ||
| - } | ||
| - nstat_list = [] | ||
| - bp_stats = [] | ||
| - for r in self.contigs: | ||
| - temp = r.statistics() | ||
| - self.stats['A'] += temp['A'] | ||
| - self.stats['C'] += temp['C'] | ||
| - self.stats['T'] += temp['T'] | ||
| - self.stats['G'] += temp['G'] | ||
| - self.stats['N'] += temp['N'] | ||
| - self.stats['L'] += temp['L'] | ||
| - nstat_list.append(temp['L']) | ||
| - self.bp_stats(temp['L']) | ||
| - | ||
| - self.stats['longest'] = max(nstat_list) | ||
| - nstat_list.sort() | ||
| - nstat_list.reverse() | ||
| - | ||
| - self.nl_statistics(nstat_list, 50) | ||
| - self.nl_statistics(nstat_list, 75) | ||
| - self.nl_statistics(nstat_list, 90) | ||
| - | ||
| - #print self.stats | ||
| - | ||
| - return self.stats | ||
| - | ||
| - def sort(self, mono): | ||
| - contig_list = [] | ||
| - temp = {} # dict to store name:len(contig) | ||
| - for r in self.contigs: | ||
| - temp[r.name] = len(r) | ||
| - | ||
| - if mono == -1: | ||
| - for r in sorted(temp, key=temp.get)[::-1]: | ||
| - contig_list.append(self.contigs[self.contigs_idx[r]]) | ||
| - else: | ||
| - for r in sorted(temp, key=temp.get): | ||
| - contig_list.append(self.contigs[self.contigs_idx[r]]) | ||
| - | ||
| - return Fa(contig_list, 'sorted_'+self.name) | ||
| - | ||
| - def reverse(): | ||
| - cl = [] | ||
| - for r in self.contigs: | ||
| - cl.append(r.reverse) | ||
| - return Fa(cl, 'rev_'+self.name) | ||
| - | ||
| - def join(self, fa_list, owrite = 0): | ||
| - for fa in fa_list: | ||
| - if not isinstance(fa, Fa): | ||
| - raise TypeError('Wrong param supplied Fa was expected') | ||
| - self.add_contigs(fa.contigs, owrite) | ||
| - | ||
| - def count_contigs(self): | ||
| - return len(self.contigs) | ||
| - | ||
| - def __str__(self): | ||
| - return_string = '' | ||
| - for r in self.contigs: | ||
| - return_string += str(r) | ||
| - return return_string |
| @@ -1,93 +0,0 @@ | ||
| -# -*- coding: utf-8 -*- | ||
| - | ||
| -#import math | ||
| -import logging | ||
| - | ||
| -def find_aprox_match_iter(needle, hstack, missmatch_level, hs_start_pos = 0): | ||
| - i = hs_start_pos # start iterate from start position | ||
| - start = hs_start_pos # start of founded region at begining start of search | ||
| - mmatch_count = 0 # missmatch counter | ||
| - needle_len = len(needle) | ||
| - j = 0 # needle iterator | ||
| - while i < len(hstack): | ||
| - if hstack[i] != needle[j]: | ||
| - mmatch_count += 1 | ||
| - if mmatch_count > missmatch_level: | ||
| - # if missmatch level oversized back to strat + 1 and start again | ||
| - i -= j | ||
| - # needle iterator restart (-1) because it will be increased in a moment | ||
| - j = -1 | ||
| - # new start = start + 1 | ||
| - start = i+1 | ||
| - #print 'start = '+str(start) | ||
| - # reset mmatch_count | ||
| - mmatch_count = 0 | ||
| - i += 1 | ||
| - j += 1 | ||
| - # if needle iterator = len of needle match found return it. | ||
| - if j >= needle_len: | ||
| - return (start,i,mmatch_count) | ||
| - | ||
| -def find_all_aprox_matches(needle, hstack, missmatch_level, hs_start_pos): | ||
| - ret_list = [] # list of matches to return | ||
| - i = hs_start_pos # start iteration from start position | ||
| - needle_len = len(needle) | ||
| - while i+needle_len <= len(hstack): | ||
| - r = find_aprox_match_iter(needle, hstack, missmatch_level, i) | ||
| - # match found append to list strat new look in start + 1 position | ||
| - if r: | ||
| - ret_list.append(r) | ||
| - i = r[0]+1 | ||
| - # match not found - no more maches in hstack | ||
| - else: | ||
| - break | ||
| - return ret_list | ||
| - | ||
| -# return string from between two aproximated motifs | ||
| -def find_motif_in_aprox_range(start_motif, stop_motif, hstack, missmatch_level, hs_start_pos = 0): | ||
| - start = 0 | ||
| - stop = 0 | ||
| - start = find_aprox_match_iter(start_motif, hstack, missmatch_level, hs_start_pos = 0) | ||
| - stop = find_aprox_match_iter(stop_motif, hstack, missmatch_level, start[1]) | ||
| - if start and stop: | ||
| - return hstack[start[1]:stop[0]] | ||
| - | ||
| -def find_all_motifs_in_aprox_range(start_motif, stop_motif, hstack, missmatch_level, hs_start_pos = 0, len_min = 0, len_max = float('inf')): | ||
| - i = hs_start_pos | ||
| - start = 0 | ||
| - stop = 0 | ||
| - ret_list = [] | ||
| - logger = logging.getLogger(__name__) | ||
| - #logger.setLevel(logging.DEBUG) | ||
| - logger.debug([start_motif, stop_motif, hstack, missmatch_level, hs_start_pos, len_min, len_max]) | ||
| - logger.debug(hstack) | ||
| - | ||
| - while i <= len(hstack): | ||
| - start = find_aprox_match_iter(start_motif, hstack, missmatch_level, i) | ||
| - stop = find_aprox_match_iter(stop_motif, hstack, missmatch_level, start[1]) | ||
| - if start and stop: | ||
| - if stop[1] - start[0] >= len_min and stop[1] - start[0] <= len_max: | ||
| - ret_list.append(hstack[start[0]:stop[1]]) | ||
| - i = start[0]+1 | ||
| - else: | ||
| - break | ||
| - logger.debug(ret_list) | ||
| - return ret_list | ||
| - | ||
| -def find_motif(needle, hstack, missmatch_level, hs_start_pos = 0): | ||
| - r = 0 | ||
| - r = find_aprox_match_iter(needle, hstack, missmatch_level, hs_start_pos = 0) | ||
| - if r: | ||
| - return hstack[r[0]:r[1]] | ||
| - | ||
| -def find_all_motifs(needle, hstack, missmatch_level, hs_start_pos = 0): | ||
| - i = hs_start_pos | ||
| - ret_list = [] | ||
| - while i <= len(hstack): | ||
| - r = find_aprox_match_iter(needle, hstack, missmatch_level, i ) | ||
| - if r: | ||
| - ret_list.append(hstack[r[0]:r[1]]) | ||
| - i = r[0]+1 | ||
| - else: | ||
| - break | ||
| - return ret_list |
Oops, something went wrong.
0 comments on commit
0ff0ed9