Skip to content

Commit

Permalink
new scaffold for a Gadfly adaptor layer
Browse files Browse the repository at this point in the history
  • Loading branch information
lstein committed Jul 29, 2002
1 parent ca4d23f commit 2dba83e
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 12 deletions.
36 changes: 35 additions & 1 deletion lib/Bio/DB/Gadfly/Adaptor.pm
Expand Up @@ -113,6 +113,29 @@ sub segment {
return;
}

# things that user could ask for (ss = "search string")
# WARNING WARNING WARNING: gadfly is interbase coordinates
# 1. gene
# name ("CG" name)
# symbol (fruitfly nomenclature official name)
# synonym (may return multiple results)
# SOLUTION: fetch object via lget_Gene with constraint name|symbol|synonym=>ss
# get start, end, and seq_object and throw out rest
# 2. primary sequence
# BUT, what user really wants is alignment of this primary sequence
# to genome (via a resultset)
# SOLUTION: fetch object via lget_ResultSet with constraint subject_seq=>ss
# get start, end and seq_object and throw out rest
# 3. chromosome arm
# SOLUTION: fetch primary seq from database
# get start, end and seq_object and throw out rest
#
# 4. cytological range
# just feed into CytoRange object
# SOLUTION: if range given construct a CytoRange object (BioModel::CytoRange->new(-range_string=>$string)
# call start_band() and end_band() to give band objects.
# get_CytoBand({name=>$range->start_band()->name}) this will give coordinates

=head2 @classes = $db->classes;
Return the namespaces known to the database.
Expand All @@ -121,9 +144,20 @@ Return the namespaces known to the database.

# hard-coded classes
sub classes {
return qw(name symbol);
return qw(name symbol synonym sequence arm);
}

# this is not very useful and shouldn't be using SQL
# useful:

# object constraint
# ------ ----------
# gene comment
# transcript_comment
# term
# dbxref
# primary_seq description

=head2 @result = $db->search_notes('search string')
Perform a text search on database. Result is an array of [$name,$description,$score].
Expand Down
59 changes: 48 additions & 11 deletions lib/Bio/DB/Gadfly/Segment.pm
Expand Up @@ -30,6 +30,9 @@ BioModel::AnnotatedSeq. We evaluate in a lazy way.
=cut

# INSTEAD OF LAZY EVALUATION, GIVE AS OPTION ALTERNATIVE IN WHICH CALL GET_ANNOTATEDSEQ() in new()
# store result and then grep for features of certain types

sub new {
my $class = shift;
my ($name,$start,$end,$gx,$other) = rearrange([qw(NAME START END GX)],@_);
Expand Down Expand Up @@ -67,13 +70,26 @@ Return the end coordinate using bioperl conventions

sub end { shift->{end} }

=head2 @features = $db->features(@args);
=head2 @features = $db->overlapping_features(@args);
Return a list of Bio::SeqFeatureI objects that overlap this segment.
=cut

sub features {
# gadfly types
# annotation type space
# biological names like "gene", "snRNA", etc
# annotation:gene, annotation:snRNA
# analysis type space
# "blastx against nonfly"...
# analysis:blastx_against_nonfly
# scaffold type space
# scaffold(:nothing)

# chris will implement a to_bioperl_feature() method in the
# genes, analyses and scaffolds

sub overlapping_features {
my $self = shift;
my ($types,$rangetype,$attributes,$iterator);

Expand All @@ -95,7 +111,7 @@ sub features {
# everything else is handled as an analysis
my %types = map {$_=>1} @$types;
my @gene_types = grep {/^transcript(:|$ )/x} keys %types;
my @segment_types = grep {/^segment(:|$ )/x} keys %types;
my @scaffold_types = grep {/^scaffold(:|$ )/x} keys %types;
my %special_types = map {$_=>1} (@gene_types,@segment_types);
my @analysis_types = grep {!$special_types{$_}} keys %types;

Expand All @@ -104,30 +120,38 @@ sub features {
my @features;

if ($all_types or @gene_types) {
push @features,$self->_get_genes($segment,\@gene_types);
push @features,map {$_->to_bioperl_feature} $self->_get_genes($segment,\@gene_types);
}
if ($all_types or @segment_types) {
push @features,$self->_get_segments($segment,\@segment_types);
if ($all_types or @scaffold_types) {
push @features,map {$_->to_bioperl_feature} $self->_get_scaffolds($segment,\@scaffold_types);
}
if ($all_types or @analysis_types) {
push @features,$self->_get_analyses($segment,\@analysis_types);
push @features,map {$_->to_bioperl_feature} $self->_get_analyses($segment,\@analysis_types);
}

# iteration doesn't really help us with gadfly, but we support it because
# it's part of the interface.
if ($iterator) {
return Bio::DB::Gadfly::SegmentIterator->new(\@analysis_types);
return Bio::DB::Gadfly::SegmentIterator->new(\@features);
}

elsif ($callback) {
my $continue = 1;
map { $continue &&= $callback->($_)} @features;
foreach (@features) {
my $result = $callback->($_);
last unless $result;
}
return scalar @features;
}

else {
return @features;
}
}

#NOTE: the Bio::SeqIO::game class should be able to dump out GAME/XML
# from these objects by using the gx() method and the start/end position
# information.

=head2 $gx_adaptor = $db->gx([$new_adaptor])
Get or set the underlying GxAdapter (note spelling difference).
Expand All @@ -144,12 +168,25 @@ sub gx {
sub _get_genes {
my $self = shift;
my ($segments,$types) = @_;
# $gx->lget_Genes() for lazy evaluation
# $as->gene_list() for non-lazy evaluation
}

sub _get_segments {
sub _get_scaffolds {
# lazy:
# @scaffolds = $gx->lget_ResultSet({constraints...,analysis_program=>'tiling_path'});
# non-lazy:
# $analysis = $as->get_analysis('tiling_path')
# @scaffolds = $analysis->result_set_list()
# @scaffolds are result sets
}

sub _get_analyses {
# lazy:
# @analyses = $gx->lget_ResultSet({constraints...,analysis_types=>['types'...]})
# non-lazy
# $all_analyses = $as->analysis_list()
# foreach ($all_analyses) { next unless $_->get_property('type'); do something }
}

sub _tosegment {
Expand Down

0 comments on commit 2dba83e

Please sign in to comment.