Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

minor changs: api and db schema #171

Merged
merged 6 commits into from

2 participants

Commits on Nov 27, 2012
  1. changed notebook cloneing process

    Travis Harrison authored
  2. added function to POST to shock node

    Travis Harrison authored
Commits on Nov 29, 2012
  1. more optimized schema changes

    Travis Harrison authored
This page is out of date. Refresh to see the latest.
View
6 src/Babel/bin/create_pg_indexes.sql
@@ -1,3 +1,4 @@
+CREATE UNIQUE INDEX md5s_md5 ON md5s (md5);
CREATE INDEX md5_protein_id ON md5_protein (id);
CREATE INDEX md5_protein_md5 ON md5_protein (md5);
@@ -17,7 +18,8 @@ CREATE INDEX md5_rna_organism ON md5_rna (organism);
CREATE INDEX md5_rna_tax_rank ON md5_rna (tax_rank);
CREATE INDEX md5_rna_source ON md5_rna (source);
-CREATE INDEX md5_organism_unique_key ON md5_organism_unique (md5, source);
+CREATE INDEX md5_organism_unique_md5 ON md5_organism_unique (md5);
+CREATE INDEX md5_organism_unique_source ON md5_organism_unique (source);
CREATE INDEX aliases_protein_id on aliases_protein (id);
@@ -39,7 +41,7 @@ CREATE INDEX ontology_eggnog_id ON ontology_eggnog (id);
CREATE INDEX ontology_eggnog_type ON ontology_eggnog (type);
CREATE INDEX ontologies_id ON ontologies (id);
-CREATE INDEX ontologies_type ON ontologies (type);
+CREATE INDEX ontologies_type ON ontologies (source);
CREATE INDEX sources_name ON sources (name);
CREATE INDEX sources_type ON sources (type);
View
14 src/Babel/bin/create_pg_tables.sql
@@ -1,3 +1,9 @@
+DROP TABLE IF EXISTS md5s;
+CREATE TABLE md5s (
+_id SERIAL PRIMARY KEY,
+md5 char(32) NOT NULL,
+is_protein boolean
+);
DROP TABLE IF EXISTS md5_protein;
CREATE TABLE md5_protein (
@@ -45,9 +51,9 @@ level integer
DROP TABLE IF EXISTS md5_organism_unique;
CREATE TABLE md5_organism_unique (
-md5 char(32) NOT NULL,
-organism text,
-source text
+md5 integer NOT NULL,
+organism integer NOT NULL,
+source integer NOT NULL
);
DROP TABLE IF EXISTS aliases_protein;
@@ -137,7 +143,7 @@ CREATE TABLE ontologies (
level3 text,
level4 text,
id text,
- type text
+ source integer
);
DROP TABLE IF EXISTS sources;
View
5 src/Babel/bin/load_memcache.pl
@@ -46,7 +46,7 @@
unless ($dbh) { print STDERR "Error: " . $DBI::errstr . "\n"; exit 1; }
my @types = ('source', 'organism', 'function', 'ontology');
-my @md5s = ('md5_protein', 'md5_rna', 'md5_ontology', 'md5_lca');
+my @md5s = ('md5', 'md5_protein', 'md5_rna', 'md5_ontology', 'md5_lca');
# get lca table
my $has_lca = $dbh->selectcol_arrayref("SELECT COUNT(relname) FROM pg_class WHERE relname = 'md5_lca'");
@@ -65,7 +65,8 @@
$dbh->do("COPY (SELECT _id, name, type FROM sources) TO '$tmpdir/source_map' WITH NULL AS ''");
$dbh->do("COPY (SELECT _id, name, ncbi_tax_id FROM organisms_ncbi) TO '$tmpdir/organism_map' WITH NULL AS ''");
$dbh->do("COPY (SELECT _id, name FROM functions) TO '$tmpdir/function_map' WITH NULL AS ''");
-$dbh->do("COPY (SELECT _id, id, type FROM ontologies) TO '$tmpdir/ontology_map' WITH NULL AS ''");
+$dbh->do("COPY (SELECT _id, id, source FROM ontologies) TO '$tmpdir/ontology_map' WITH NULL AS ''");
+$dbh->do("COPY (SELECT _id, md5 FROM md5s) TO '$tmpdir/md5_map' WITH NULL AS ''");
$dbh->do("COPY (SELECT DISTINCT md5, source, function, organism FROM md5_protein ORDER BY md5) TO '$tmpdir/md5_protein_map' WITH NULL AS ''");
$dbh->do("COPY (SELECT DISTINCT md5, source, function, organism FROM md5_rna ORDER BY md5) TO '$tmpdir/md5_rna_map' WITH NULL AS ''");
$dbh->do("COPY (SELECT DISTINCT m.md5, m.source, m.function, o._id FROM md5_ontology m, ontologies o WHERE m.id=o.id ORDER BY m.md5) TO '$tmpdir/md5_ontology_map' WITH NULL AS ''");
View
28 src/Babel/bin/md52memcache.pl
@@ -18,6 +18,7 @@
md5_protein => 1,
md5_rna => 1,
md5_lca => 1,
+ md5 => 1,
ontology => 1,
function => 1,
organism => 1,
@@ -29,6 +30,7 @@
$usage .= "md5_protein file (sorted md5s):\t\tmd5, source, function, organism\n";
$usage .= "md5_rna file (sorted md5s):\t\tmd5, source, function, organism\n";
$usage .= "md5_lca file (unique md5s):\t\tmd5, domain, phylum, class, order, family, genus, species, name, level\n";
+$usage .= "md5 file (unique md5s):\t\tinteger id, md5\n";
$usage .= "annotation file:\t\tinteger id, text name, optional\n";
if ( (@ARGV > 0) && ($ARGV[0] =~ /-h/) ) { print STDERR $usage; exit 1; }
@@ -52,7 +54,20 @@
my $mem_cache = new Cache::Memcached {'servers' => [$memhost], 'debug' => 0, 'compress_threshold' => 10_000};
unless ($mem_cache && ref($mem_cache)) { print STDERR "Unable to connect to memcache:\n$usage"; exit 1; }
-if ($select eq 'md5_lca') {
+if ($select eq 'md5') {
+ print STDERR "Parsing md5 file / adding to memcache ... " if ($verbose);
+ open(MAPF, "<$mapf") || die "Can't open file $mapf: $!\n";
+ while (my $line = <MAPF>) {
+ chomp $line;
+ $num += 1;
+ my ($id, $md5) = split(/\t/, $line);
+ $mem_cache->set($md5.$memkey, { id => $id }, undef); # no experiation
+ }
+ close MAPF;
+ print STDERR "Done parsing / adding $num md5s\n" if ($verbose);
+
+}
+elsif ($select eq 'md5_lca') {
print STDERR "Parsing md5_lca file / adding to memcache ... " if ($verbose);
open(LCAF, "<$mapf") || die "Can't open file $mapf: $!\n";
while (my $line = <LCAF>) {
@@ -60,7 +75,9 @@
$num += 1;
my ($md5, @taxa) = split(/\t/, $line);
my $rank = pop @taxa;
- $mem_cache->set($md5.$memkey, { lca => \@taxa }, undef); # no experiation
+ my $data = $mem_cache->get($md5.$memkey) || {};
+ $data->{lca} = [ @taxa ];
+ $mem_cache->set($md5.$memkey, $data, undef); # no experiation
}
close LCAF;
print STDERR "Done parsing / adding $num md5s\n" if ($verbose);
@@ -91,12 +108,13 @@
}
# add data
$data->{is_aa} = ($select eq 'md5_rna') ? 0 : 1;
- push @{ $data->{$sid}->{$fid}->{$mtype} }, $oid;
+ $data->{ann} = {};
+ push @{ $data->{ann}->{$sid}->{$fid}->{$mtype} }, $oid;
}
close MD5F;
# add last
if (scalar(keys %$data) > 0) {
- $mem_cache->set($curr.$memkey, $data, undef); # no experiation
+ $mem_cache->set($curr.$memkey, $data, undef); # no experiation
}
print STDERR "Done parsing / adding $num md5s\n" if ($verbose);
}
@@ -112,7 +130,7 @@
$other =~ s/\\N//;
$map_data->{$id} = [ $name, $other ];
} elsif ($select eq 'ontology') {
- $map_data->{$other}{$id} = $name;
+ $map_data->{$other}{$id} = $name;
} else {
$map_data->{$id} = $name;
}
View
17 src/Babel/bin/md52unique_organism.pl
@@ -40,14 +40,14 @@
my ($type, $src_map) = @$src_set;
print STDERR "Processing $type md5s.\n" if ($verbose);
while ( my ($sid, $sname) = each %$src_map) {
- print STDERR "\t$sname ... " if ($verbose);
+ print STDERR "\t$sname ($sid)... " if ($verbose);
my $only_one = 0;
my $has_max = 0;
my $random = 0;
my $no_taxid = 0;
my $org_num = {};
my $md5_org = {};
- my $query = "select distinct md5, organism from md5_$type".(($sname =~ /^M5(NR|RNA)$/) ? "" : " where source = $sid");
+ my $query = "select distinct m._id, t.organism from md5_$type t, md5s m where t.md5=m.md5".(($sname =~ /^M5(NR|RNA)$/) ? "" : " and t.source = $sid");
my $db_rows = $dbh->prepare($query);
$db_rows->execute();
@@ -61,7 +61,7 @@
if (scalar($orgs) == 1) {
#my $oname = exists($ncbi_org->{$orgs->[0]}) ? $ncbi_org->{$orgs->[0]} : $other_org->{$orgs->[0]};
#print OUTF join("\t", ($md5, $oname, $sname))."\n";
- print OUTF join("\t", ($md5, $orgs->[0], $sname))."\n";
+ print OUTF join("\t", ($md5, $orgs->[0], $sid))."\n";
$only_one += 1;
}
# get ncbi set or other set sorted by abundance
@@ -79,13 +79,13 @@
my @top = map { $_->[0] } grep { $_->[1] == $max } @org_set;
# if we have a top one, use
if (@top == 1) {
- print OUTF join("\t", ($md5, $top[0], $sname))."\n";
+ print OUTF join("\t", ($md5, $top[0], $sid))."\n";
$has_max += 1;
}
# randomly choose
else {
my $rand_index = int( rand(scalar(@top)) );
- print OUTF join("\t", ($md5, $top[$rand_index], $sname))."\n";
+ print OUTF join("\t", ($md5, $top[$rand_index], $sid))."\n";
$random += 1;
}
}
@@ -99,7 +99,7 @@
if ($load_db) {
print STDERR "Creating table md5_organism_unique ... " if ($verbose);
$dbh->do("DROP TABLE IF EXISTS md5_organism_unique");
- $dbh->do("CREATE TABLE md5_organism_unique (md5 char(32) NOT NULL, organism integer, source text);");
+ $dbh->do("CREATE TABLE md5_organism_unique (md5 integer NOT NULL, organism integer, source integer);");
$dbh->commit;
print STDERR "Done.\n" if ($verbose);
@@ -109,7 +109,8 @@
print STDERR "Done.\n" if ($verbose);
print STDERR "Creating indexes for md5_organism_unique ... " if ($verbose);
- $dbh->do("CREATE INDEX md5_organism_unique_key ON md5_organism_unique (md5, source);");
+ $dbh->do("CREATE INDEX md5_organism_unique_md5 ON md5_organism_unique (md5);");
+ $dbh->do("CREATE INDEX md5_organism_unique_source ON md5_organism_unique (source);");
$dbh->commit;
print STDERR "Done.\n" if ($verbose);
}
@@ -141,7 +142,5 @@ sub get_source_maps {
elsif ($r->[2] eq 'rna') { $rnas->{$r->[0]} = $r->[1]; }
}
}
- $prots->{M5NR} = 'M5NR';
- $rnas->{M5RNA} = 'M5RNA';
return ($prots, $rnas);
}
View
47 src/MGRAST/Schema/mgrast_analysis_v3.sql
@@ -28,7 +28,7 @@ DROP TABLE IF EXISTS job_md5s;
CREATE TABLE job_md5s (
version smallint NOT NULL,
job integer NOT NULL,
- md5 char(32) NOT NULL,
+ md5 integer NOT NULL,
abundance integer NOT NULL,
evals integer[5],
exp_avg real,
@@ -59,8 +59,8 @@ CREATE TABLE job_functions (
len_stdv real,
ident_avg real,
ident_stdv real,
- md5s char(32)[],
- source text NOT NULL
+ md5s integer[],
+ source integer NOT NULL
);
-- COPY job_functions (version,job,id,abundance,exp_avg,exp_stdv,len_avg,len_stdv,ident_avg,ident_stdv,md5s,source) FROM 'FILE' WITH NULL AS '';
CREATE INDEX job_functions_vj ON job_functions (version, job);
@@ -80,8 +80,8 @@ CREATE TABLE job_organisms (
len_stdv real,
ident_avg real,
ident_stdv real,
- md5s char(32)[],
- source text NOT NULL
+ md5s integer[],
+ source integer NOT NULL
);
-- COPY job_organisms (version,job,id,abundance,exp_avg,exp_stdv,len_avg,len_stdv,ident_avg,ident_stdv,md5s,source) FROM 'FILE' WITH NULL AS '';
CREATE INDEX job_organisms_vj ON job_organisms (version, job);
@@ -101,8 +101,8 @@ CREATE TABLE job_rep_organisms (
len_stdv real,
ident_avg real,
ident_stdv real,
- md5s char(32)[],
- source text NOT NULL
+ md5s integer[],
+ source integer NOT NULL
);
CREATE INDEX job_rep_organisms_vj ON job_rep_organisms (version, job);
CREATE INDEX job_rep_organisms_id ON job_rep_organisms (id);
@@ -121,8 +121,8 @@ CREATE TABLE job_ontologies (
len_stdv real,
ident_avg real,
ident_stdv real,
- md5s char(32)[],
- source text NOT NULL
+ md5s integer[],
+ source integer NOT NULL
);
-- COPY job_ontologies (version,job,id,abundance,exp_avg,exp_stdv,len_avg,len_stdv,ident_avg,ident_stdv,md5s,source) FROM 'FILE' WITH NULL AS '';
CREATE INDEX job_ontologies_vj ON job_ontologies (version, job);
@@ -176,11 +176,12 @@ CREATE INDEX organisms_ncbi_tax_id ON organisms_ncbi (ncbi_tax_id);
DROP TABLE IF EXISTS md5_organism_unique;
CREATE TABLE md5_organism_unique (
-md5 char(32) NOT NULL,
+md5 integer NOT NULL,
organism integer NOT NULL,
-source text
+source integer NOT NULL
);
-CREATE INDEX md5_organism_unique_key ON md5_organism_unique (md5, source);
+CREATE INDEX md5_organism_unique_md5 ON md5_organism_unique (md5);
+CREATE INDEX md5_organism_unique_source ON md5_organism_unique (source);
DROP TABLE IF EXISTS ontologies;
CREATE TABLE ontologies (
@@ -190,7 +191,25 @@ CREATE TABLE ontologies (
level3 text,
level4 text,
name text,
- type text
+ source integer
);
CREATE INDEX ontologies_name ON ontologies (name);
-CREATE INDEX ontologies_type ON ontologies (type);
+CREATE INDEX ontologies_source ON ontologies (source);
+
+DROP TABLE IF EXISTS sources;
+CREATE TABLE sources (
+_id integer PRIMARY KEY,
+name text NOT NULL,
+type text NOT NULL
+);
+CREATE INDEX sources_name ON sources (name);
+CREATE INDEX sources_type ON sources (type);
+
+DROP TABLE IF EXISTS md5s;
+CREATE TABLE md5s (
+_id SERIAL PRIMARY KEY,
+md5 char(32) NOT NULL,
+is_protein boolean
+);
+CREATE UNIQUE INDEX md5s_md5 ON md5s (md5);
+CREATE UNIQUE INDEX md5s_protein ON md5s (is_protein);
View
9 src/MGRAST/lib/resources2/metagenome.pm
@@ -110,11 +110,12 @@ sub instance {
my $master = $self->connect_to_datasource();
# get data
- my $job = $master->Job->init( {metagenome_id => $id} );
- unless ($job && ref($job)) {
- $self->return_data( {"ERROR" => "id $id does not exists"}, 404 );
+ my $job = $master->Job->get_objects( {metagenome_id => $id} );
+ unless ($job && @$job) {
+ $self->return_data( {"ERROR" => "id $id does not exist"}, 404 );
}
-
+ $job = $job->[0];
+
# check rights
unless ($job->{public} || exists($self->rights->{$id})) {
$self->return_data( {"ERROR" => "insufficient permissions to view this data"}, 401 );
View
12 src/MGRAST/lib/resources2/notebook.pm
@@ -114,15 +114,17 @@ sub instance {
$self->return_data( {"ERROR" => "insufficient permissions to view this data"}, 401 );
}
- # clone node if requested
+ # clone node if requested (update shock attributes and ipynb metadata)
if (@{$self->rest} > 1) {
- my $attr = { type => $node->{attributes}{type},
- name => $node->{attributes}{name},
+ my $file = $self->json->decode( $self->get_shock_file($node->{id}) );
+ my $attr = { type => $node->{attributes}{type} || 'ipynb',
+ name => $node->{attributes}{name} || '',
user => $uname || 'public',
uuid => $self->rest->[1],
created => strftime("%Y-%m-%dT%H:%M:%S", localtime)
};
- my $clone = $self->create_virtual_shock_node($node->{id}, $attr);
+ $file->['metadata'] = $attr;
+ my $clone = $self->set_shock_node($node->{id}.'ipynb', $file, $attr);
$data = $self->prepare_data( [$clone] );
} else {
$data = $self->prepare_data( [$node] );
@@ -169,7 +171,7 @@ sub prepare_data {
my $url = $self->cgi->url;
my $obj = {};
$obj->{id} = $node->{id};
- $obj->{name} = $node->{attributes}{name};
+ $obj->{name} = $node->{attributes}{name} || '';
$obj->{uuid} = $node->{attributes}{uuid};
$obj->{created} = $node->{attributes}{created};
$obj->{version} = 1;
View
17 src/MGRAST/lib/resources2/resource.pm
@@ -355,6 +355,23 @@ sub create_virtual_shock_node {
}
}
+sub set_shock_node {
+ my ($self, $name, $file, $attr) = @_;
+
+ my $attr_str = $self->json->encode($attr);
+ my $file_str = $self->json->encode($file);
+ my $content = [ attributes => [undef, "$name.json", Content => $attr_str], upload => [undef, $name, Content => $file_str] ];
+ my $response = undef;
+ eval {
+ $response = $self->json->decode( $self->agent->post($Conf::shock_url.'/node', $content, Content_Type => 'form-data')->content );
+ };
+ if ($@ || (! ref($response)) || $response->{E}) {
+ return undef;
+ } else {
+ return $response->{D};
+ }
+}
+
sub get_shock_node {
my ($self, $id) = @_;
Something went wrong with that request. Please try again.