Skip to content

Commit

Permalink
Merge pull request #1309 from teharrison/develop
Browse files Browse the repository at this point in the history
repo cleanup, bug fixes, function lookup
  • Loading branch information
teharrison committed Dec 20, 2017
2 parents 6b681ce + 8626fba commit 2830180
Show file tree
Hide file tree
Showing 9 changed files with 263 additions and 533 deletions.
175 changes: 0 additions & 175 deletions src/MGRAST/bin/clean_taxonomy.py

This file was deleted.

65 changes: 65 additions & 0 deletions src/MGRAST/bin/delete_job_and_files.pl
@@ -0,0 +1,65 @@
#!/usr/bin/env perl

###### cleanup tool ######
# given AWE ID
# 1. delete job from AWE
# 2. delete all output files in shock

use lib "/MG-RAST/conf";

use strict;
use warnings;
no warnings('once');

use Conf;
use JSON;
use LWP::UserAgent;
use HTTP::Request::Common;

my $shock_url = $Conf::shock_url;
my $awe_url = $Conf::awe_url;
my $auth_token = $Conf::pipeline_token;

my $aweid = shift @ARGV;

unless ($aweid) {
print STDERR "Usage: \tdelete_job_and_files.pl <awe_id> \n";
exit 1;
}

# set handels
my $agent = LWP::UserAgent->new();
$agent->timeout(3600);
my $json = JSON->new;
$json = $json->utf8();
$json->max_size(0);
$json->allow_nonref;

# get job document
my $response = undef;
my $job_doc = undef;
eval {
my $get = $self->agent->get($awe_url.'/job/'.$aweid, 'Authorization', $auth_token);
$response = $self->json->decode( $get->content );
};
if ($@ || (! ref($response))) {
print STDERR "ERROR: unable to connect to AWE server\n";
} elsif (exists($response->{error}) && $response->{error}) {
print STDERR "ERROR: ".$response->{error}[0]."\n";
} else {
$job_doc = $response->{data};
}

# delete job
system("curl -X DELETE -H 'Authorization: $auth_token' '$awe_url/job/$aweid?full=1'; echo");

# delete nodes
foreach my $task (@{$job_doc->{tasks}}) {
foreach my $out (@{$task->{outputs}}) {
if ($out->{node} && ($out->{node} ne '-')) {
system("curl -X DELETE -H 'Authorization: $auth_token' '$shock_url/node/".$out->{node}."'; echo");
}
}
}

exit 0;
29 changes: 22 additions & 7 deletions src/MGRAST/bin/parse_obo.py
Expand Up @@ -10,7 +10,6 @@
# declare a blank dictionary, keys are the term_ids
terms = {}
quote = re.compile(r'\"(.+?)\"')
term = re.compile(r'^[A-Z]+:\d+$')
rank = re.compile(r'^has_rank NCBITaxon:(.+)$')
# to check for circular recursion
ascSeen = set()
Expand Down Expand Up @@ -115,7 +114,7 @@ def getParents(tid, full=False):
def getTop(full=False):
top = {} if full else []
for t, info in terms.iteritems():
if (len(info['parentNodes']) == 0) and (len(info['childNodes']) > 0) and term.match(t):
if (len(info['parentNodes']) == 0) and (len(info['childNodes']) > 0):
if full:
top[t] = terms[t]
else:
Expand All @@ -136,7 +135,7 @@ def outputTab(data, ofile):
print out_str

def main(args):
global terms, addRank
global terms
parser = OptionParser(usage="usage: %prog [options] -i <input file> -o <output file>")
parser.add_option("-i", "--input", dest="input", default=None, help="input .obo file")
parser.add_option("-o", "--output", dest="output", default=None, help="output: .json file or stdout, default is stdout")
Expand All @@ -150,16 +149,16 @@ def main(args):
parser.add_option("", "--rank", dest="rank", action="store_true", default=False, help="return output with 'rank' field, only for --full")
parser.add_option("", "--common", dest="common", action="store_true", default=False, help="use only common name synonyms (--full / NCBI taxonomy)")
parser.add_option("", "--no_id", dest="no_id", action="store_true", default=False, help="remove 'id' from struct to reduce size, only for --full")
parser.add_option("", "--strip_prefix", dest="strip_prefix", action="store_true", default=False, help="remove prefix from 'id' to reduce size")
parser.add_option("", "--no_parents", dest="no_parents", action="store_true", default=False, help="remove 'parentNodes' from struct to reduce size, only for --full")
parser.add_option("", "--no_description", dest="no_description", action="store_true", default=False, help="remove 'description' from struct to reduce size, only for --full")
(opts, args) = parser.parse_args()
if not (opts.input and os.path.isfile(opts.input)):
parser.error("missing input")
if not opts.relations:
parser.error("missing relations")
if (not opts.term_id) and (opts.get != 'top'):
opts.get = 'all'
if opts.rank:
addRank = True

oboFile = open(opts.input, 'r')
relations = opts.relations.split(',')
Expand All @@ -174,6 +173,8 @@ def main(args):
term = parseTagValue(getTerm(oboFile), opts.common)
if (len(term) != 0) and ('name' in term) and (len(term['name']) > 0):
termID = term['id'][0]
if opts.strip_prefix:
termID = termID.split(":")[1]
termName = term['name'][0]
if 'def' in term:
termDesc = term['def'][0]
Expand All @@ -187,7 +188,11 @@ def main(args):
termParents = []
for rel in relations:
if rel in term:
termParents.extend([p.split()[0] for p in term[rel]])
for p in term[rel]:
if opts.strip_prefix:
termParents.append(p.split()[0].split(":")[1])
else:
termParents.append(p.split()[0])

# each ID will have two arrays of parents and children
if termID not in terms:
Expand All @@ -201,6 +206,8 @@ def main(args):
rval = rank.match(term['property_value'][0])
if rval:
terms[termID]['rank'] = rval.group(1)
if terms[termID]['rank'] == 'superkingdom':
terms[termID]['rank'] = 'domain'

# append parents of the current term
terms[termID]['parentNodes'] = termParents
Expand Down Expand Up @@ -241,6 +248,9 @@ def main(args):
if opts.no_parents:
for v in data.itervalues():
del v['parentNodes']
if opts.no_description:
for v in data.itervalues():
del v['description']

# have global info
if opts.full and opts.metadata:
Expand All @@ -249,7 +259,12 @@ def main(args):
mdata['nodes'] = data
outputJson(mdata, opts.output)
except:
outputJson(data, opts.output)
# default action
mdata = {
'nodes': data,
'rootNode': opts.term_id
}
outputJson(mdata, opts.output)
# tabbed list output
elif opts.tab and (not opts.full):
outputTab(data, opts.output)
Expand Down

0 comments on commit 2830180

Please sign in to comment.