Skip to content

Commit

Permalink
Merge pull request #91 from McTavishLab/filenames
Browse files Browse the repository at this point in the history
Filenames
  • Loading branch information
snacktavish committed May 27, 2020
2 parents b99484c + 27bd6e1 commit fe68fb6
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 26 deletions.
2 changes: 1 addition & 1 deletion docs/examples/example.config
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ min_length = 0.8
max_length = 1.5

#You should not need to change any of these!
taxonomy_path = taxonomy
#taxonomy_path = taxonomy
8 changes: 4 additions & 4 deletions docs/examples/minimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@
scraper.read_blast_wrapper(blast_dir=blast_dir)
sys.stdout.write("Running write_aln()...\n")
aln_path1 = scraper.data.write_aln()
aln_path_alt = scraper.data.write_aln(filename="already_aligned_seqs.fas")
unaln_path = scraper.write_new_seqs(filename='unaligned.fas')
#aln_path_alt = scraper.data.write_aln(filename="already_aligned_seqs.fas")
#unaln_path = scraper.write_new_seqs(filename='unaligned.fas')

sys.stdout.write("Running align_query_seqs()...\n")
scraper.align_new_seqs()
scraper.est_full_tree()
scraper.data.write_labelled(label="^ot:ottTaxonName", norepeats=False)
scraper.data.write_labelled(label="^ncbi:taxon", filename="ncbi", norepeats=False)
scraper.data.write_labelled(label="^ot:ottTaxonName", filename="updated_taxon_name", norepeats=True)
scraper.data.write_labelled(label="^ncbi:taxon", filename="updated_ncbi_id", norepeats=False)


# sys.stdout.write("estimating tree...")
Expand Down
10 changes: 6 additions & 4 deletions physcraper/aligntreetax.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,10 +678,12 @@ def write_otus(self, filename = "otu_info", schema="table"):
with open("{}/{}_{}.json".format(self.workdir, filename, self.tag), "w") as outfile:
json.dump(self.otu_dict, outfile)
if schema == "table":
all_keys = set()
for otu in self.otu_dict:
all_keys.update(self.otu_dict[otu].keys())
keys = list(all_keys)
#all_keys = set()
#for otu in self.otu_dict:
# all_keys.update(self.otu_dict[otu].keys())
#keys = list(all_key)
#keys.sort()
keys = ['^ot:ottTaxonName','^ot:ottId','^ncbi:taxon','^ncbi:accession','^ncbi:gi','^physcraper:last_blasted','^physcraper:status','^ot:originalLabel','^ncbi:title']
header = ["otu_id"] + keys
with open("{}/{}_{}.csv".format(self.workdir, filename, self.tag), "w") as outfile:
outfile.write("\t".join(header)+"\n")
Expand Down
31 changes: 15 additions & 16 deletions physcraper/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def run_blast_wrapper(self): # TODO Should this be happening elsewhere?
"""
delay = self.config.delay
today = str(datetime.date.today()).replace("-", "/")
debug("Today's date is {}".format(today))
debug("run_blast_wrapper")
debug(self.blast_subdir)
self._blast_read = 0
Expand Down Expand Up @@ -281,11 +282,6 @@ def run_blast_wrapper(self): # TODO Should this be happening elsewhere?
else:
equery = "txid{}[orgn]".format(self.mrca_ncbi)
debug(equery)
# tmpfile.write("\nequery\n")
# tmpfile.write(equery)
# tmpfile.write("\nquery\n")
# tmpfile.write(query)
# tmpfile.close()
self.run_web_blast_query(query, equery, fn_path)
self.data.otu_dict[otu_id]['^physcraper:last_blasted'] = today
else:
Expand All @@ -294,8 +290,8 @@ def run_blast_wrapper(self): # TODO Should this be happening elsewhere?
"delete file to force\n".format(fn_path))
else:
if _VERBOSE:
sys.stdout.write("otu {} was last blasted {} days ago and is not being re-blasted. "
"Use run_blast_wrapper(delay = 0) to force a search.\n".format(otu_id, last_blast))
sys.stdout.write("otu {} was last blasted on {}, {} days ago and is not being re-blasted. "
"Use run_blast_wrapper(delay = 0) to force a search.\n".format(otu_id, last_blast, time_passed))
#except KeyboardInterrupt:
# sys.exit()
self._blasted = 1
Expand Down Expand Up @@ -939,9 +935,13 @@ def replace_tre(self, filename, schema = 'newick'):



def run_muscle(self, input_aln_path = None, new_seqs_path = None, outname = 'muscle_aln.fas'):
def run_muscle(self, input_aln_path = None, new_seqs_path = None, outname = 'all_align'):
outpath_ALL = "{}/{}_{}.fas".format(self.workdir, outname, self.data.tag)
if os.path.exists(outpath_ALL):
self.replace_aln(outpath_ALL)
return(outpath_ALL)
if input_aln_path == None:
aln_filename = "before_physcraper_{}.fas".format(self.data.tag)
aln_filename = "original_{}.fas".format(self.data.tag)
aln_path = "{}/{}".format(self.workdir, aln_filename)
if os.path.exists(aln_path):
input_aln_path = aln_path
Expand All @@ -950,17 +950,16 @@ def run_muscle(self, input_aln_path = None, new_seqs_path = None, outname = 'mus
else:
assert(os.path.exists(input_aln_path))
if new_seqs_path == None:
new_filename = "NEW{}_{}.fasta".format(self.date, self.data.tag)
new_filename = "new_seqs_UNaligned_{}_{}.fas".format(self.date, self.data.tag)
tmp_new_seqs_path = "{}/{}".format(self.workdir, new_filename)
if os.path.exists(tmp_new_seqs_path):
new_seqs_path = tmp_new_seqs_path
else:
new_seqs_path = self.write_new_seqs(filename = new_filename)
else:
assert(os.path.exists(new_seqs_path))
outpath_NEW = "{}/muscle_NEW.fas".format(self.workdir)
outpath_ALL = "{}/{}".format(self.workdir, outname)
f = open('{}/muscle_NEW.log'.format(self.workdir), 'w')
outpath_NEW = "new_seqs_aligned_{}_{}.fas".format(self.date, self.data.tag)
f = open('{}/muscle.log'.format(self.workdir), 'a')
try:
subprocess.check_call(["muscle",
"-in", new_seqs_path,
Expand All @@ -969,18 +968,18 @@ def run_muscle(self, input_aln_path = None, new_seqs_path = None, outname = 'mus
sys.stdout.write("Muscle NEW done.\n")
except subprocess.CalledProcessError as grepexc:
sys.stderr.write("error code {}, {}".format(grepexc.returncode, grepexc.output))

f2 = open('{}/muscle_ALL.log'.format(self.workdir), 'w')
f = open('{}/muscle.log'.format(self.workdir), 'a')
try:
subprocess.check_call(["muscle", "-profile",
"-in1", input_aln_path,
"-in2", outpath_NEW,
"-out", outpath_ALL], stdout=f2, stderr=subprocess.STDOUT)
"-out", outpath_ALL], stdout=f, stderr=subprocess.STDOUT)
if _VERBOSE:
sys.stdout.write("Muscle ALL done.\n")
except subprocess.CalledProcessError as grepexc:
sys.stderr.write("error code {}, {}".format(grepexc.returncode, grepexc.output))
self.replace_aln(outpath_ALL)
return(outpath_ALL)


def run_papara(self, papara_runname="extended"):
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
argparse
biopython
biopython==1.76
configparser
coverage
DendroPy
Expand Down

0 comments on commit fe68fb6

Please sign in to comment.