Merge pull request #91 from McTavishLab/filenames

Filenames
McTavishLab · May 27, 2020 · fe68fb6 · fe68fb6
2 parents b99484c + 27bd6e1
commit fe68fb6
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 26 deletions.
diff --git a/docs/examples/example.config b/docs/examples/example.config
@@ -48,4 +48,4 @@ min_length = 0.8
 max_length = 1.5
 
 #You should not need to change any of these!
-taxonomy_path = taxonomy
+#taxonomy_path = taxonomy
diff --git a/docs/examples/minimal.py b/docs/examples/minimal.py
@@ -55,14 +55,14 @@
 scraper.read_blast_wrapper(blast_dir=blast_dir)
 sys.stdout.write("Running write_aln()...\n")
 aln_path1 = scraper.data.write_aln()
-aln_path_alt = scraper.data.write_aln(filename="already_aligned_seqs.fas")
-unaln_path = scraper.write_new_seqs(filename='unaligned.fas')
+#aln_path_alt = scraper.data.write_aln(filename="already_aligned_seqs.fas")
+#unaln_path = scraper.write_new_seqs(filename='unaligned.fas')
 
 sys.stdout.write("Running align_query_seqs()...\n")
 scraper.align_new_seqs()
 scraper.est_full_tree()
-scraper.data.write_labelled(label="^ot:ottTaxonName", norepeats=False)
-scraper.data.write_labelled(label="^ncbi:taxon", filename="ncbi", norepeats=False)
+scraper.data.write_labelled(label="^ot:ottTaxonName", filename="updated_taxon_name", norepeats=True)
+scraper.data.write_labelled(label="^ncbi:taxon", filename="updated_ncbi_id", norepeats=False)
 
 
 # sys.stdout.write("estimating tree...")

diff --git a/physcraper/aligntreetax.py b/physcraper/aligntreetax.py
@@ -678,10 +678,12 @@ def write_otus(self, filename = "otu_info", schema="table"):
             with open("{}/{}_{}.json".format(self.workdir, filename, self.tag), "w") as outfile:
                 json.dump(self.otu_dict, outfile)
         if schema == "table":
-            all_keys =  set()
-            for otu in self.otu_dict:
-                all_keys.update(self.otu_dict[otu].keys())
-            keys = list(all_keys) 
+            #all_keys =  set()
+            #for otu in self.otu_dict:
+            #    all_keys.update(self.otu_dict[otu].keys())
+            #keys = list(all_key)
+            #keys.sort()
+            keys = ['^ot:ottTaxonName','^ot:ottId','^ncbi:taxon','^ncbi:accession','^ncbi:gi','^physcraper:last_blasted','^physcraper:status','^ot:originalLabel','^ncbi:title']
             header = ["otu_id"] + keys
             with open("{}/{}_{}.csv".format(self.workdir, filename, self.tag), "w") as outfile:
                 outfile.write("\t".join(header)+"\n")

diff --git a/physcraper/scrape.py b/physcraper/scrape.py
@@ -243,6 +243,7 @@ def run_blast_wrapper(self):  # TODO Should this be happening elsewhere?
         """
         delay = self.config.delay
         today = str(datetime.date.today()).replace("-", "/")
+        debug("Today's date is {}".format(today))
         debug("run_blast_wrapper")
         debug(self.blast_subdir)
         self._blast_read = 0
@@ -281,11 +282,6 @@ def run_blast_wrapper(self):  # TODO Should this be happening elsewhere?
                         else:
                             equery = "txid{}[orgn]".format(self.mrca_ncbi)
                         debug(equery)
-               #         tmpfile.write("\nequery\n")
-               #         tmpfile.write(equery)
-               #         tmpfile.write("\nquery\n")
-               #         tmpfile.write(query)
-               #         tmpfile.close()
                         self.run_web_blast_query(query, equery, fn_path)
                     self.data.otu_dict[otu_id]['^physcraper:last_blasted'] = today
                 else:
@@ -294,8 +290,8 @@ def run_blast_wrapper(self):  # TODO Should this be happening elsewhere?
                                          "delete file to force\n".format(fn_path))
             else:
                 if _VERBOSE:
-                    sys.stdout.write("otu {} was last blasted {} days ago and is not being re-blasted. "
-                                     "Use run_blast_wrapper(delay = 0) to force a search.\n".format(otu_id, last_blast))
+                    sys.stdout.write("otu {} was last blasted on {}, {} days ago and is not being re-blasted. "
+                                     "Use run_blast_wrapper(delay = 0) to force a search.\n".format(otu_id, last_blast, time_passed))
     #except KeyboardInterrupt:
            # sys.exit()
         self._blasted = 1
@@ -939,9 +935,13 @@ def replace_tre(self, filename, schema = 'newick'):
 
 
 
-    def run_muscle(self, input_aln_path = None, new_seqs_path = None, outname = 'muscle_aln.fas'):
+    def run_muscle(self, input_aln_path = None, new_seqs_path = None, outname = 'all_align'):
+        outpath_ALL = "{}/{}_{}.fas".format(self.workdir, outname, self.data.tag)
+        if os.path.exists(outpath_ALL):
+            self.replace_aln(outpath_ALL)
+            return(outpath_ALL)
         if input_aln_path == None:
-            aln_filename = "before_physcraper_{}.fas".format(self.data.tag)
+            aln_filename = "original_{}.fas".format(self.data.tag)
             aln_path = "{}/{}".format(self.workdir, aln_filename)
             if os.path.exists(aln_path):
                 input_aln_path = aln_path
@@ -950,17 +950,16 @@ def run_muscle(self, input_aln_path = None, new_seqs_path = None, outname = 'mus
         else:
             assert(os.path.exists(input_aln_path))
         if new_seqs_path == None:
-            new_filename = "NEW{}_{}.fasta".format(self.date, self.data.tag)
+            new_filename = "new_seqs_UNaligned_{}_{}.fas".format(self.date, self.data.tag)
             tmp_new_seqs_path = "{}/{}".format(self.workdir, new_filename)
             if os.path.exists(tmp_new_seqs_path):
                 new_seqs_path = tmp_new_seqs_path
             else:
                 new_seqs_path = self.write_new_seqs(filename = new_filename)
         else:
             assert(os.path.exists(new_seqs_path))
-        outpath_NEW = "{}/muscle_NEW.fas".format(self.workdir)
-        outpath_ALL = "{}/{}".format(self.workdir, outname)
-        f = open('{}/muscle_NEW.log'.format(self.workdir), 'w')
+        outpath_NEW = "new_seqs_aligned_{}_{}.fas".format(self.date, self.data.tag)
+        f = open('{}/muscle.log'.format(self.workdir), 'a')
         try:
             subprocess.check_call(["muscle",
                                    "-in", new_seqs_path,
@@ -969,18 +968,18 @@ def run_muscle(self, input_aln_path = None, new_seqs_path = None, outname = 'mus
                 sys.stdout.write("Muscle NEW done.\n")
         except subprocess.CalledProcessError as grepexc:
             sys.stderr.write("error code {}, {}".format(grepexc.returncode, grepexc.output))
-
-        f2 = open('{}/muscle_ALL.log'.format(self.workdir), 'w')
+        f = open('{}/muscle.log'.format(self.workdir), 'a')
         try:
             subprocess.check_call(["muscle", "-profile",
                                    "-in1", input_aln_path,
                                    "-in2", outpath_NEW,
-                                   "-out", outpath_ALL], stdout=f2, stderr=subprocess.STDOUT)
+                                   "-out", outpath_ALL], stdout=f, stderr=subprocess.STDOUT)
             if _VERBOSE:
                 sys.stdout.write("Muscle ALL done.\n")
         except subprocess.CalledProcessError as grepexc:
             sys.stderr.write("error code {}, {}".format(grepexc.returncode, grepexc.output))
         self.replace_aln(outpath_ALL)
+        return(outpath_ALL)
 
 
     def run_papara(self, papara_runname="extended"):

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 argparse
-biopython
+biopython==1.76
 configparser
 coverage
 DendroPy