Skip to content

Commit

Permalink
tips subcommand for starting mini tree workflow, unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
JTFouquier committed Feb 10, 2015
1 parent f6b7bc5 commit 0bf9423
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 1 deletion.
Empty file added ghosttree/tips/__init__.py
Empty file.
17 changes: 17 additions & 0 deletions ghosttree/tips/cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import skbio
from skbio import BiologicalSequence


""" takes 97_OTUs (at "species" level) and clusters them at a lower level of
similarity using swarm.
"""


def preprocess_tip_sequences(species_level_otus_f):
for seq in skbio.read(species_level_otus_f, format="fasta"):
if "-" not in seq.id:
new_seq_id = str(seq.id) + "_1"
seq = BiologicalSequence(seq.sequence, id=new_seq_id)
yield seq

# swarmdir = "/Users/jenniferfouquier/dev/ghost-tree/swarm"
Empty file.
25 changes: 25 additions & 0 deletions ghosttree/tips/tests/test_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import unittest
from StringIO import StringIO

from skbio import BiologicalSequence

from ghosttree.tips.cluster import preprocess_tip_sequences


# Fix input OTU level sequence files (unnecessary characters and other
# requirements set forth by SWARM software)
class TestClusterTipSequences(unittest.TestCase):
def setUp(self):
self.tips_with_returns = StringIO(tips_with_returns)

def test_tip_sequences_with_returns(self):
result = preprocess_tip_sequences(self.tips_with_returns)
self.assertEqual(list(result), [BiologicalSequence("ATC",
id="SSS456_1")])

tips_with_returns = """>SSS456
ATC
"""

if __name__ == "__main__":
unittest.main()
15 changes: 14 additions & 1 deletion scripts/ghost-tree
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import skbio
# library code imports
from ghosttree.silva.filter import fungi_from_fasta
from ghosttree.util import compare_tip_to_tip_distances

from ghosttree.tips.cluster import preprocess_tip_sequences

@click.group()
def cli():
Expand Down Expand Up @@ -43,5 +43,18 @@ def extract_fungi(fasta_file, accession_file, taxonomy_file, output_file):
skbio.write(fungi_from_fasta(fasta_file, accession_file, taxonomy_file), into=output_file, format="fasta")


@cli.group()
def tips():
pass

@tips.command("group-tips")
@click.argument("species-level-otus-file", type=click.File("U"))
@click.argument("fixed-species-level-otus-file", type=click.File("w"))
def group_tips(species_level_otus_file, fixed_species_level_otus_file):
skbio.write(preprocess_tip_sequences(species_level_otus_file), into=fixed_species_level_otus_file, format="fasta")




if __name__ == "__main__":
cli()

0 comments on commit 0bf9423

Please sign in to comment.