Skip to content

Commit

Permalink
add transformer; rna to dna; plugin setup fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
JTFouquier committed Aug 28, 2018
1 parent bb9464d commit e8657a5
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 3 deletions.
67 changes: 67 additions & 0 deletions q2_ghost_tree/_transformer.py
@@ -0,0 +1,67 @@
from io import StringIO
from q2_ghost_tree.plugin_setup import plugin

from q2_types.feature_data import AlignedDNAFASTAFormat, \
AlignedDNASequencesDirectoryFormat
from ._aligned_rna_sequences import AlignedRNAFASTAFormat
from q2_types.feature_data import FeatureData, Sequence, AlignedSequence


def parse_fasta(f, trim_desc=False):
# TODO this is from Kyle B. Cite properly before release
"""Parse a FASTA format file.
Parameters
----------
f : File object or iterator returning lines in FASTA format.
Returns
-------
An iterator of tuples containing two strings
First string is the sequence description, second is the
sequence.
Notes
-----
This function removes whitespace in the sequence and translates
"U" to "T", in order to accommodate FASTA files downloaded from
SILVA and the Living Tree Project.
"""
f = iter(f)
desc = next(f).strip()[1:]
if trim_desc:
desc = desc.split()[0]
seq = StringIO()
for line in f:
line = line.strip()
if line.startswith(">"):
yield desc, seq.getvalue()
desc = line[1:]
if trim_desc:
desc = desc.split()[0]
seq = StringIO()
else:
seq.write(line.replace(" ", "").replace("U", "T"))
yield desc, seq.getvalue()



def write_fasta(f, seqs):
for desc, seq in seqs:
f.write(">{0}\n{1}\n".format(desc, seq))


# TODO change silly function name
# The issue here is the wrong data format AND transformer does not work
# even with simple test like seq.write("ATCG")
@plugin.register_transformer
def _my_great_transformer(ff: AlignedRNAFASTAFormat) -> \
AlignedDNAFASTAFormat:

ff2 = AlignedDNAFASTAFormat()
seqs = parse_fasta(ff)

write_fasta(ff2, seqs)

return ff2
# convert RNA to DNA, output is a new instance of AlignedDNAFASTAFormat
12 changes: 9 additions & 3 deletions q2_ghost_tree/plugin_setup.py
@@ -1,7 +1,7 @@
import qiime2.plugin

from q2_types.feature_data import FeatureData, Sequence, AlignedSequence, \
Taxonomy
Taxonomy, AlignedDNASequencesDirectoryFormat
from q2_types.tree import Phylogeny, Rooted, Unrooted

import q2_ghost_tree
Expand Down Expand Up @@ -214,23 +214,29 @@
plugin.register_semantic_type_to_format(
SilvaTaxonomy, artifact_format=SilvaTaxonomyDirectoryFormat)

# TODO
# Changing Silva dependent functions to only require DNA
AlignedRNASequences = qiime2.plugin.SemanticType('AlignedRNASequences')
plugin.register_formats(AlignedRNAFASTAFormat, AlignedRNAFASTADirectoryFormat)
plugin.register_semantic_types(AlignedRNASequences)
plugin.register_semantic_type_to_format(
AlignedRNASequences, artifact_format=AlignedRNAFASTADirectoryFormat)
# TODO
plugin.register_semantic_type_to_format(
FeatureData[AlignedSequence],
artifact_format=AlignedRNAFASTADirectoryFormat)

plugin.methods.register_function(
function=extract_fungi,
inputs={
'aligned_silva_file': AlignedRNASequences,
'aligned_silva_file': FeatureData[AlignedSequence],
'accession_file': SilvaAccession, # Silva semantic type
'taxonomy_file': SilvaTaxonomy, # Silva semantic type
},
parameters={
},
outputs=[
('aligned_seqs', AlignedRNASequences),
('aligned_seqs', FeatureData[AlignedSequence]),
],
input_descriptions={
'aligned_silva_file': 'TODO',
Expand Down

0 comments on commit e8657a5

Please sign in to comment.