-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #13 from Merck/master
Apply master changes to develop
- Loading branch information
Showing
7 changed files
with
210 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = '0.1.7-dev' | ||
__version__ = '0.1.8' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
from deepbgc.output.writer import OutputWriter | ||
from deepbgc import util | ||
from deepbgc import __version__ | ||
import json | ||
import collections | ||
|
||
ANTISMASH_SUBREGION_LABEL_MAX_LENGTH = 20 | ||
|
||
|
||
class AntismashJSONWriter(OutputWriter): | ||
|
||
def __init__(self, out_path): | ||
super(AntismashJSONWriter, self).__init__(out_path) | ||
self.record_ids = [] | ||
self.record_subregions = [] | ||
self.record_protoclusters = [] | ||
self.tool_meta = collections.OrderedDict() | ||
|
||
@classmethod | ||
def get_description(cls): | ||
return 'AntiSMASH JSON file for sideloading.' | ||
|
||
@classmethod | ||
def get_name(cls): | ||
return 'antismash-json' | ||
|
||
def write(self, record): | ||
cluster_features = util.get_cluster_features(record) | ||
classifier_names = util.get_record_classifier_names(record) | ||
subregions = [] | ||
protoclusters = [] | ||
for cluster in cluster_features: | ||
subregion = self._create_cluster_json(cluster, classifier_names=classifier_names) | ||
subregions.append(subregion) | ||
# TODO add protocluster? | ||
|
||
self.record_ids.append(record.id) | ||
self.record_subregions.append(subregions) | ||
self.record_protoclusters.append(protoclusters) | ||
for detector_label, meta in util.get_record_detector_meta(record).items(): | ||
for k, v in meta.items(): | ||
self.tool_meta['{}_{}'.format(detector_label, k)] = str(v) | ||
|
||
def _get_cluster_classes_str(self, cluster, classifier_name): | ||
class_str_list = cluster.qualifiers.get(util.format_classification_column(classifier_name)) | ||
return class_str_list[0] if class_str_list else 'no confident class' | ||
|
||
def _create_cluster_json(self, cluster, classifier_names): | ||
classes = {cls_name: self._get_cluster_classes_str(cluster, cls_name) for cls_name in classifier_names} | ||
tool_name = cluster.qualifiers.get('tool', ['unspecified'])[0] | ||
detector_name = cluster.qualifiers.get('detector', [tool_name])[0] | ||
score_column = util.format_bgc_score_column(detector_name) | ||
score = cluster.qualifiers.get(score_column) | ||
details = { | ||
'detector': detector_name, | ||
} | ||
details.update(classes) | ||
if score: | ||
details['score'] = score[0] | ||
return { | ||
'start': int(cluster.location.start), | ||
'end': int(cluster.location.end), | ||
'label': 'Putative BGC', | ||
'details': details | ||
} | ||
|
||
def _create_record_json(self, name, subregions, protoclusters): | ||
return { | ||
"name": name, | ||
"subregions": subregions, | ||
"protoclusters": protoclusters | ||
} | ||
|
||
def close(self): | ||
zipped_records = zip(self.record_ids, self.record_subregions, self.record_protoclusters) | ||
data = { | ||
"tool": { | ||
"name": "DeepBGC", | ||
"version": __version__, | ||
"description": "Putative BGCs predicted using DeepBGC", | ||
"configuration": self.tool_meta | ||
}, | ||
"records": [self._create_record_json(name, sr, pc) for name, sr, pc in zipped_records] | ||
} | ||
with open(self.out_path, 'w') as f: | ||
json.dump(data, f, indent=2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
from deepbgc.output.bgc_genbank import BGCGenbankWriter | ||
from deepbgc.output.evaluation.pr_plot import PrecisionRecallPlotWriter | ||
from deepbgc.output.evaluation.roc_plot import ROCPlotWriter | ||
from deepbgc.output.genbank import GenbankWriter | ||
from deepbgc.output.evaluation.bgc_region_plot import BGCRegionPlotWriter | ||
from deepbgc.output.cluster_tsv import ClusterTSVWriter | ||
from deepbgc.output.evaluation.pfam_score_plot import PfamScorePlotWriter | ||
from deepbgc.output.pfam_tsv import PfamTSVWriter | ||
from deepbgc.output.antismash_json import AntismashJSONWriter | ||
from deepbgc import util | ||
from deepbgc.data import PFAM_DB_VERSION | ||
import collections | ||
from Bio.Seq import Seq | ||
from Bio.SeqRecord import SeqRecord | ||
from Bio.SeqFeature import SeqFeature, FeatureLocation | ||
from Bio.Alphabet import generic_dna | ||
import os | ||
import pytest | ||
|
||
|
||
class WriterTest: | ||
def __init__(self, cls, path): | ||
self.cls = cls | ||
self.path = path | ||
|
||
def __str__(self): | ||
return str(self.cls) | ||
|
||
def __repr__(self): | ||
return str(self.cls) | ||
|
||
|
||
WRITERS = [ | ||
BGCGenbankWriter, | ||
PrecisionRecallPlotWriter, | ||
ROCPlotWriter, | ||
GenbankWriter, | ||
BGCRegionPlotWriter, | ||
ClusterTSVWriter, | ||
PfamScorePlotWriter, | ||
PfamTSVWriter, | ||
AntismashJSONWriter | ||
] | ||
|
||
@pytest.fixture | ||
def processed_record(detector_name='deepbgc', detector_label='deepbgc', score_threshold=0.5): | ||
comment_key = util.format_detector_meta_key(detector_label) | ||
record = SeqRecord(Seq('ACTGCTCGACTGATT', alphabet=generic_dna)) | ||
record.annotations['structured_comment'] = collections.OrderedDict() | ||
record.annotations['structured_comment'][comment_key] = collections.OrderedDict( | ||
name=detector_name, | ||
label=detector_label, | ||
score_threshold=score_threshold | ||
) | ||
# Add protein features | ||
record.features.append(SeqFeature(FeatureLocation(0, 2), type='CDS', qualifiers={'locus_tag': ['A']})) | ||
record.features.append(SeqFeature(FeatureLocation(2, 5), type='CDS', qualifiers={'locus_tag': ['B']})) | ||
record.features.append(SeqFeature(FeatureLocation(5, 8), type='CDS', qualifiers={'locus_tag': ['C']})) | ||
# Add pfam features | ||
score_column = util.format_bgc_score_column(detector_name) | ||
qualifiers = {score_column: [0.4], 'db_xref': ['PF00001'], 'locus_tag': ['A'], 'database': [PFAM_DB_VERSION]} | ||
record.features.append(SeqFeature(FeatureLocation(0, 2), type=util.PFAM_FEATURE, qualifiers=qualifiers)) | ||
qualifiers = {score_column: [0.7], 'db_xref': ['PF00002'], 'locus_tag': ['B'], 'database': [PFAM_DB_VERSION]} | ||
record.features.append(SeqFeature(FeatureLocation(2, 5), type=util.PFAM_FEATURE, qualifiers=qualifiers)) | ||
qualifiers = {score_column: [0.6], 'db_xref': ['PF00003'], 'locus_tag': ['C'], 'database': [PFAM_DB_VERSION]} | ||
record.features.append(SeqFeature(FeatureLocation(5, 8), type=util.PFAM_FEATURE, qualifiers=qualifiers)) | ||
# Add BGC features | ||
qualifiers = { score_column: ['0.6'], 'detector': [detector_name], 'detector_label': [detector_label]} | ||
record.features.append(SeqFeature(FeatureLocation(0, 5), type='cluster', qualifiers=qualifiers)) | ||
qualifiers = { 'detector': ['annotated'], 'detector_label': ['annotated']} | ||
record.features.append(SeqFeature(FeatureLocation(2, 8), type='cluster', qualifiers=qualifiers)) | ||
return record | ||
|
||
|
||
@pytest.mark.parametrize("writer_cls", WRITERS) | ||
def test_unit_writer_full_record(tmpdir, writer_cls, processed_record): | ||
out_path = os.path.join(str(tmpdir), 'file.png') | ||
writer = writer_cls(out_path=out_path) | ||
writer.write(processed_record) | ||
writer.close() | ||
assert os.path.exists(out_path) | ||
|
||
|
||
@pytest.mark.parametrize("writer_cls", WRITERS) | ||
def test_unit_writer_no_record(tmpdir, writer_cls): | ||
out_path = os.path.join(str(tmpdir), 'file.png') | ||
writer = writer_cls(out_path=out_path) | ||
writer.close() | ||
|
||
|
||
@pytest.mark.parametrize("writer_cls", WRITERS) | ||
def test_unit_writer_no_features(tmpdir, writer_cls, processed_record): | ||
out_path = os.path.join(str(tmpdir), 'file.png') | ||
processed_record.features = [] | ||
writer = writer_cls(out_path=out_path) | ||
writer.write(processed_record) | ||
writer.close() | ||
|
||
|
||
@pytest.mark.parametrize("writer_cls", WRITERS) | ||
def test_unit_writer_single_feature(tmpdir, writer_cls, processed_record): | ||
out_path = os.path.join(str(tmpdir), 'file.png') | ||
cds_features = util.get_protein_features(processed_record) | ||
pfam_features = util.get_pfam_features(processed_record) | ||
cluster_features = util.get_cluster_features(processed_record) | ||
processed_record.features = cds_features[:1] + pfam_features[:1] + cluster_features[:1] | ||
writer = writer_cls(out_path=out_path) | ||
writer.write(processed_record) | ||
writer.close() |