Skip to content

Commit

Permalink
fix bug introducted in writeinfo bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
snacktavish committed Jul 23, 2020
1 parent c0ee604 commit 226b3d9
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 15 deletions.
25 changes: 12 additions & 13 deletions physcraper/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,6 @@ def run_local_blast_cmd(self, query, taxon_label, fn_path):
proc = subprocess.check_output(blastcmd, cwd=self.config.blastdb)
except KeyboardInterrupt:
if os.stat(abs_outfile).st_size == 0:
print("deleting {}".format(abs_outfile))
os.remove(abs_outfile)
sys.stderr.write("KeyboardInterrupt")
sys.exit()
Expand Down Expand Up @@ -465,26 +464,25 @@ def read_webbased_blast_query(self, fn_path):
for blast_record in blast_records:
for alignment in blast_record.alignments:
for hsp in alignment.hsps:
gb_id = alignment.title.split("|")[3] # 1 is for gi
if float(hsp.expect) < float(self.config.e_value_thresh):
gb_id = alignment.title.split("|")[3] # 1 is for gi
if len(gb_id.split(".")) == 1:
sys.stdout.write("skipping acc {}, incorrect format\n".format(gb_id))
elif gb_id not in self.data.gb_dict: # skip ones we already have
taxid,taxname, seq = self.ids.get_tax_seq_acc(gb_id)
gi_id = alignment.title.split('|')[1]
gb_acc = alignment.accession
stitle = alignment.title
hsps = alignment.hsps
for hsp in hsps:
match = hsp.sbjct.lower().replace('-','')
if match not in seq.lower():
seq = self.check_complement(match, seq, gb_id)
match = hsp.sbjct.lower().replace('-','')
if match not in seq.lower():
seq = self.check_complement(match, seq, gb_id)
length = alignment.length
query_dict = {'^ncbi:gi': gi_id, 'accession': gb_acc, 'title': stitle,
'length': length, 'hsps': hsps}
'length': length, 'hsp': hsp}
self.data.gb_dict[gb_id] = query_dict
self.new_seqs[gb_id] = seq

else:
pass
else:
fi = open("{}/below_eval_thresh.txt".format(self.rundir), 'a')
fi.write("{}, {}\n".format(gb_id, hsp.expect))
Expand Down Expand Up @@ -685,7 +683,8 @@ def remove_identical_seqs(self):
# debug("len new seqs otu dict after remove identical{}".format(len(self.new_seqs_otu_id)))
sys.stdout.write("**** Found {} new sequences****\n".format(len(self.new_seqs_otu_id)))
if len(self.new_seqs_otu_id)==0:
sys.exit()
pass
# sys.exit()
with open(self.logfile, "a") as log:
log.write("{} new sequences added from Genbank after removing identical seq, "
"of {} before filtering\n".format(len(self.new_seqs_otu_id), len(self.new_seqs)))
Expand Down Expand Up @@ -733,9 +732,9 @@ def filter_seqs(self, tmp_dict, type="random", threshold=None):



def make_sp_dict(self, otu_list=[]):
"""Mkaes dict of OT_ids by species"""
if otu_list == []:
def make_sp_dict(self, otu_list=None):
"""Makes dict of OT_ids by species"""
if otu_list is None:
otu_list = self.new_seqs_otu_id.keys()
debug("make sp_dict")
sp_d = {}
Expand Down
3 changes: 1 addition & 2 deletions tests/test_fromfile.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

import sys
from physcraper import generate_ATT_from_files, generate_ATT_from_run

from physcraper import generate_ATT_from_files, generate_ATT_from_run

def test_generate_ATT_from_files():

Expand Down Expand Up @@ -31,4 +31,3 @@ def test_generate_ATT_from_run():

sys.stdout.write("\nTesting 'generate_ATT_from_run (fromfile.py)'\n")
data_obj = generate_ATT_from_run(workdir=workdir)

0 comments on commit 226b3d9

Please sign in to comment.