Skip to content

Commit

Permalink
Merge pull request #82 from McTavishLab/muscle
Browse files Browse the repository at this point in the history
Muscle profile alignment
  • Loading branch information
snacktavish committed May 19, 2020
2 parents 07855b9 + f07cfd1 commit 0df4ad7
Show file tree
Hide file tree
Showing 8 changed files with 207 additions and 98 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,7 @@ dist/
.DS_Store
ott3.1*
*.tsv
physcraper_example_minimal/
scrape_ot_350_compact/
physcraper_example_ot_350/
taxonomy/full_seqs/
14 changes: 7 additions & 7 deletions docs/examples/data_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@
configfi = "docs/examples/example.config"
study_id = "ot_350"
tree_id = "Tr53297"
workdir ="scrape_ot_350_compact"
workdir ="physcraper_example_ot_350"
aln_fi = "docs/examples/{}{}.aln".format(study_id, tree_id)


# Create an 'scraper' object to get data from NCBI, align it an
scraper = scraper_from_opentree(study_id = study_id,
tree_id = tree_id,
alnfile = aln_fi,
aln_schema = "nexus",
configfile = configfi,
scraper = scraper_from_opentree(study_id = study_id,
tree_id = tree_id,
alnfile = aln_fi,
aln_schema = "nexus",
configfile = configfi,
workdir = workdir)

sys.stdout.write("{} taxa in alignment and tree\n".format(len(scraper.data.aln)))


#scraper.read_blast_wrapper()
scraper.est_full_tree()
scraper.data.write_labelled(label='^ot:ottTaxonName')
scraper.data.write_labelled(label='^ot:ottTaxonName')
70 changes: 70 additions & 0 deletions docs/examples/minimal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
""" This is a minimal example taking precooked data from the tests
"""
import os
import sys
import json
import physcraper
from physcraper import OtuJsonDict, generate_ATT_from_files, ConfigObj, IdDicts, PhyscraperScrape
from physcraper import opentree_helpers
from physcraper.opentree_helpers import scraper_from_opentree

configfi = "tests/data/test.config"
workdir ="physcraper_example_minimal"
aln_fi = "tests/data/tiny_test_example/test.fas"
blast_dir = "tests/data/precooked/fixed/tte_blast_files"
# mattype="fasta"
tre_fi= "tests/data/tiny_test_example/test.tre"
# schema_trf = "newick"
otu_jsonfi = "{}/otu_dict.json".format(workdir)
id_to_spn = "tests/data/tiny_test_example/test_nicespl.csv"

if not os.path.exists("{}".format(workdir)):
os.makedirs("{}".format(workdir))

conf_base = ConfigObj(configfi)

ids = IdDicts(configfi)

otu_json = OtuJsonDict(id_to_spn, ids)
with open(otu_jsonfi,"w") as outfile:
json.dump(otu_json, outfile)

ottids = [otu_json[ite]['^ot:ottId'] for ite in otu_json]

mrca = opentree_helpers.get_mrca_ott(ottids)

# Create a 'scraper' object to get data from NCBI

data_obj = generate_ATT_from_files(alnfile=aln_fi,
aln_schema="fasta", #mattype
workdir=workdir,
configfile=configfi,
treefile=tre_fi,
tree_schema = "newick", #schema_trf
otu_json=otu_jsonfi,
ingroup_mrca=mrca)

data_obj.tag = "minEx"

scraper = PhyscraperScrape(data_obj, ids)

sys.stdout.write("{} taxa in alignment and tree\n".format(len(scraper.data.aln)))

scraper._blasted = 1 # this tricks PhyScraper into not blasting anything new
sys.stdout.write("Running read_blast_wrapper()...\n")
scraper.read_blast_wrapper(blast_dir=blast_dir)
sys.stdout.write("Running write_aln()...\n")
aln_path1 = scraper.data.write_aln()
aln_path_alt = scraper.data.write_aln(filename="already_aligned_seqs.fas")
unaln_path = scraper.write_new_seqs(filename='unaligned.fas')

sys.stdout.write("Running align_query_seqs()...\n")
scraper.align_new_seqs()
scraper.est_full_tree()
scraper.data.write_labelled(label="^ot:ottTaxonName", norepeats=False)
scraper.data.write_labelled(label="^ncbi:taxon", filename="ncbi", norepeats=False)


# sys.stdout.write("estimating tree...")
# scraper.est_full_tree()
# scraper.data.write_labelled(label='^ot:ottTaxonName')
20 changes: 17 additions & 3 deletions mds/examples.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
# Example runs and datasets
# Example runs

There is a full example python script with comments in `docs/example.py`.
Some more example files can be found in `docs/example_scripts/`.
<!-- There is a full example python script with comments in `docs/example.py`.
Some more example files can be found in `docs/example_scripts/`. -->

## Example ot_350: blasting remotely

Run physcraper interactively on python with the command:

```
python -i docs/examples/data_scrape.py
```

Or directly from the command line with:

```
```
9 changes: 9 additions & 0 deletions mds/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,12 @@ To run these tests locally (in your own computer), do
sh tests/run_test.sh
sh ws-tests/run_ws-tests.sh`.
```

If you want to see each step of the physcraper algorithm in action, you can also
run part of the tests in a python session.

For example, you can do:

```
python test/test_multi.py
```
25 changes: 15 additions & 10 deletions physcraper/aligntreetax.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,26 +573,31 @@ def write_random_resolve_tre(self, treefilename='random_resolve.tre'):
fi.close()
return treepath

def write_aln(self, alnname=None, alnschema="fasta"):
if alnname == None:
alnname = "physcraper_{}.fas".format(self.tag)
alnpath = "{}/{}".format(self.workdir, alnname)
def write_aln(self, filename=None, alnschema="fasta"):
if filename == None:
filename = "physcraper_{}.fas".format(self.tag)
alnpath = "{}/{}".format(self.workdir, filename)
self.aln.write(path=alnpath,
schema=alnschema)
return os.path.abspath(alnpath)

def write_files(self, treepath=None, treeschema="newick", alnpath=None, alnschema="fasta"):
def write_files(self, treefilename=None, treeschema="newick", alnfilename=None, alnschema="fasta"):
"""Outputs both the streaming files, labeled with OTU ids.
Can be mapped to original labels using otu_dict.json or otu_seq_info.csv"""
#debug("write_files")
if alnpath == None:
alnpath = "physcraper_{}.fas".format(self.tag)
if treepath == None:
if treefilename == None:
treepath = "physcraper_{}.tre".format(self.tag)
self.tre.write(path="{}/{}".format(self.workdir, treepath),
else:
treepath = "{}/{}".format(self.workdir, treefilename)
if alnfilename == None:
alnpath = "physcraper_{}.fas".format(self.tag)
else:
alnpath = "{}/{}".format(self.workdir, alnfilename)
self.tre.write(path=treepath,
schema=treeschema, unquoted_underscores=True)
self.aln.write(path="{}/{}".format(self.workdir, alnpath),
self.aln.write(path=alnpath,
schema=alnschema)
return(treepath, alnpath)


def write_labelled(self, label, filename = "labelled", direc='workdir', norepeats=True, add_gb_id=False):
Expand Down

0 comments on commit 0df4ad7

Please sign in to comment.