Permalink
Browse files

Chado adaptor: desc(), species(), translation seq added; keep peptide

  • Loading branch information...
dongilbert committed Mar 27, 2007
1 parent 708a4d2 commit 89b0eafc5111ef2c817101eefbeb616e9fb320ae
Showing with 179 additions and 25 deletions.
  1. +125 −15 lib/Bio/DB/Das/Chado.pm
  2. +41 −8 lib/Bio/DB/Das/Chado/Segment.pm
  3. +13 −2 lib/Bio/DB/Das/Chado/Segment/Feature.pm
View
@@ -1,4 +1,4 @@
# $Id: Chado.pm,v 1.68.4.9.2.12.2.2 2007-03-23 21:02:38 briano Exp $
# $Id: Chado.pm,v 1.68.4.9.2.12.2.3 2007-03-27 23:21:39 dongilbert Exp $
=head1 NAME
@@ -95,6 +95,7 @@ use Carp qw(longmess);
use vars qw($VERSION @ISA);
use constant SEGCLASS => 'Bio::DB::Das::Chado::Segment';
use constant MAP_REFERENCE_TYPE => 'MapReferenceType'; #dgg
use constant DEBUG => 0;
$VERSION = 0.11;
@@ -141,7 +142,7 @@ sub new {
# get the cvterm relationships here and save for later use
my $cvterm_query="select ct.cvterm_id,ct.name
my $cvterm_query="select ct.cvterm_id,ct.name as name, c.name as cvname
from cvterm ct, cv c
where ct.cv_id=c.cv_id and
(c.name IN (
@@ -161,13 +162,18 @@ sub new {
# my $cvname = {};
my(%term2name,%name2term) = ({},{});
my %termcv=();
while (my $hashref = $sth->fetchrow_hashref) {
$term2name{ $hashref->{cvterm_id} } = $hashref->{name};
$termcv{ $hashref->{cvterm_id} } = $hashref->{cvname}; # dgg
#this addresses a bug in gmod_load_gff3 (Scott!), which creates a 'part_of'
#term in addition to the OBO_REL one that already exists! this will also
#help with names that exist in both GO and SO, like 'protein'.
# dgg: but this array is bad for callers of name2term() who expect scalar result
# mostly want only sofa terms
if(defined($name2term{ $hashref->{name} })){ #already seen this name
if(ref($name2term{ $hashref->{name} }) ne 'ARRAY'){ #already array-converted
@@ -186,7 +192,7 @@ sub new {
}
$self->term2name(\%term2name);
$self->name2term(\%name2term);
$self->name2term(\%name2term, \%termcv);
#Recursive Mapping
$self->recursivMapping($arg{-recursivMapping} ? $arg{-recursivMapping} : 0);
@@ -389,11 +395,23 @@ Note: Should be replaced by Bio::GMOD::Util->name2term
sub name2term {
my $self = shift;
my $arg = shift;
my $cvnames = shift;
if(ref($cvnames) eq 'HASH'){ $self->{'termcvs'} = $cvnames; }
if(ref($arg) eq 'HASH'){
return $self->{'name2term'} = $arg;
} elsif($arg) {
return $self->{'name2term'}{$arg};
my $val= $self->{'name2term'}{$arg};
if(ref($val)) {
#? use $cvnames scalar here to pick which cv?
my @val= @$val;
foreach $val (@val) {
my $cv= $self->{'termcvs'}{$val};
return $val if($cv =~ /^(SO|sequence)/i); # want sofa_id
}
return $val[0]; #? 1st is best guess
}
return $val;
} else {
return $self->{'name2term'};
}
@@ -447,13 +465,13 @@ sub segment {
my $self = shift;
my ($name,$base_start,$stop,$end,$class,$version,$db_id,$feature_id)
= $self->_rearrange([qw(NAME
START
STOP
END
CLASS
VERSION
DB_ID
FEATURE_ID )],@_);
START
STOP
END
CLASS
VERSION
DB_ID
FEATURE_ID )],@_);
# lets the Segment class handle all the lifting.
$end ||= $stop;
@@ -1182,10 +1200,10 @@ attributes depend on implementation) and returns a list of
$description is a human-readable description such as a locus line, and
$score is the match strength.
search_notes is the sub to support keyword wildcard searching
=cut
=head2 ** NOT YET ACTIVE: search_notes IS IN TESTING STAGE **
sub search_notes {
my $self = shift;
my ($search_string,$limit) = @_;
@@ -1286,6 +1304,31 @@ sub attributes {
my @array = @$arrayref;
return () if scalar @array == 0;
## dgg; ugly patch to copy polypeptide/protein residues into 'translation' attribute
# need to add to gfffeatureatts ..
if (!defined $tag || $tag eq 'translation') {
$sth = $self->dbh->prepare("select type_id from feature where feature_id = ?");
$sth->execute($feature_id); # or $self->throw("failed to get feature_id in attributes");
$hashref = $sth->fetchrow_hashref;
my $type_id = $$hashref{'type_id'};
## warn("DEBUG: dgg ugly prot. patch; type=$type_id for ftid=$feature_id\n");
if( $type_id == $self->name2term('polypeptide')
|| $type_id == $self->name2term('protein')
) {
$sth = $self->dbh->prepare("select residues from feature where feature_id = ?");
$sth->execute($feature_id); # or $self->throw("failed to get feature_id in attributes");
$hashref = $sth->fetchrow_hashref;
my $aa = $$hashref{'residues'};
if($aa) {
## warn("DEBUG: dgg ugly prot. patch; aalen=",length($aa),"\n");
## this wasn't working till I added in a featureprop 'translation=dummy' .. why?
if($tag) { push( @array, [ $aa]); }
else { push( @array, ['translation', $aa]); }
}
}
}
my @result;
foreach my $lineref (@array) {
my @la = @$lineref;
@@ -1325,12 +1368,79 @@ sub default_class {
my $self = shift;
#dgg
unless( $self->{'reference_class'} || @_ ) {
$self->{'reference_class'} = $self->chado_reference_class();
}
if(@_) {
my $checkref = $self->check_chado_reference_class(@_);
unless($checkref) {
$self->throw("unable to find reference_class '$_[0]' feature in the database");
}
}
$self->{'reference_class'} = shift || 'Sequence' if(@_);
return $self->{'reference_class'};
}
sub check_chado_reference_class {
my $self = shift;
if(@_) {
my $refclass= shift;
my $type_id = $self->name2term($refclass);
my $query = "select feature_id from feature where type_id = ?";
my $sth = $self->dbh->prepare($query);
$sth->execute($type_id) or $self->throw("trying to find chado_reference_class");
my $data = $sth->fetchrow_hashref();
my $refid= $$data{'feature_id'};
## warn("check_chado_reference_class: $refclass = $type_id -> $refid"); # DEBUG
return $refid;
}
}
=head2 chado_reference_class
Title : chado_reference_class
Usage : $obj->chado_reference_class()
Function: get or return the ID to use for Gbrowse map reference class
using cvtermprop table, value = MAP_REFERENCE_TYPE
Returns : the cvterm.name
Args : to return the id, none; to determine the id, 1
See also: default_class, refclass_feature_id
Optionally test that user/config supplied ref class is indeed a proper
chado feature type.
=cut
sub chado_reference_class {
my $self = shift;
return $self->{'chado_reference_class'} if($self->{'chado_reference_class'});
my $chado_reference_class='Sequence'; # default ?
my $query = "select cvterm_id from cvtermprop where value = ?";
my $sth = $self->dbh->prepare($query);
$sth->execute(MAP_REFERENCE_TYPE) or $self->throw("trying to find chado_reference_class");
my $data = $sth->fetchrow_hashref(); #? FIXME: could be many values *?
my $ref_cvtermid = $$data{'cvterm_id'};
if($ref_cvtermid) {
$query = "select name from cvterm where cvterm_id = ?";
$sth = $self->dbh->prepare($query);
$sth->execute($ref_cvtermid) or $self->throw("trying to find chado_reference_class");
$data = $sth->fetchrow_hashref();
$chado_reference_class = $$data{'name'} if ($$data{'name'});
# warn("chado_reference_class: $chado_reference_class = $ref_cvtermid"); # DEBUG
}
return $self->{'chado_reference_class'} = $chado_reference_class;
}
=head2 refclass_feature_id
Title : refclass_feature_id
@@ -1,4 +1,4 @@
# $Id: Segment.pm,v 1.84.4.9.2.19.2.2 2007-03-22 02:57:02 scottcain Exp $
# $Id: Segment.pm,v 1.84.4.9.2.19.2.3 2007-03-27 23:21:39 dongilbert Exp $
=head1 NAME
@@ -90,10 +90,14 @@ package Bio::DB::Das::Chado::Segment;
use strict;
use Carp qw(carp croak cluck);
use Bio::Root::Root;
use Bio::SeqI;
use Bio::Das::SegmentI;
use Bio::DB::Das::Chado;
use Bio::DB::Das::Chado::Segment::Feature;
use Bio::DB::GFF::Typename;
use Data::Dumper;
#dgg;not working# use Bio::Species;
use constant DEBUG => 0;
use vars '@ISA','$VERSION';
@@ -115,7 +119,7 @@ sub new {
my $strand;
warn "$name, $factory\n" if DEBUG;
warn "na:$name, id:$db_id, $factory\n" if DEBUG;
warn "base_start = $base_start, stop = $stop\n" if DEBUG;
# clicking on the help in gbrowse calls this constructor without a
# name. return to avoid performances issues
@@ -376,18 +380,18 @@ sub _search_by_name {
my ($factory,$quoted_name,$db_id,$feature_id) = @_;
my $sth;
if ($feature_id) {
if ($feature_id) {
$sth = $factory->dbh->prepare("
select name,feature_id,seqlen from feature
where feature_id = $feature_id");
}
elsif ($db_id) {
}
elsif ($db_id) {
$sth = $factory->dbh->prepare ("
select name,feature_id,seqlen from feature
where uniquename = \'$db_id\' ");
}
else {
}
else {
$sth = $factory->dbh->prepare ("
select name,feature_id,seqlen from feature
where lower(name) = $quoted_name ");
@@ -1466,7 +1470,36 @@ just giving back the name.
=cut
*display_id = *display_name = *accession_number = *desc = \&name;
*display_id = *display_name = *accession_number = \&name;
# *desc =
#dgg patch for SeqI.desc -- use ref segment Note property for description
sub desc {
my $self= shift;
return $self->{'desc'} if defined $self->{'desc'};
my $sth = $self->factory->dbh->prepare( "select value from featureprop
where feature_id = ? and type_id = (select cvterm_id from cvterm where name = 'Note') ");
$sth->execute( $self->srcfeature_id );
my $hashref = $sth->fetchrow_hashref();
return $self->{'desc'}= $hashref->{value};
}
#dgg patch for SeqI -- Bio::SeqI::species
sub species {
my $self= shift;
return $self->{'species'} if defined $self->{'species'};
my $sth = $self->factory->dbh->prepare( "select genus,species from organism
where organism_id = (select organism_id from feature where feature_id = ?) ");
$sth->execute( $self->srcfeature_id );
my $hashref = $sth->fetchrow_hashref();
## this is dying; why? dgg
# my $spp= Bio::Species->new( -classification => [ $hashref->{species}, $hashref->{genus} ] );
my $spp= $hashref->{genus}.' '.$hashref->{species}; # works for display uses
return $self->{'species'}= $spp;
}
=head2 get_feature_stream
@@ -1101,13 +1101,24 @@ sub sub_SeqFeature {
warn @ok_feats if DEBUG;
return @ok_feats;
}
elsif ($inferCDS) {
=item Argh...! DONT DROP THE PROTEIN FEATURE
dgg: polypeptide or protein is a most important feature, don't drop it!
This is the part of a gene that has lots of attached critical info:
protein ID, translation, GO terms, Dbxrefs to other proteins)
While this exclusion fixes a display bug, e.g. Glyph/processed_transcript
it is much less problematic to patch the glyph displayers.
elsif ( 0 && $inferCDS) {
#just remove polypeptide features
my @ok_feats = grep {$_->type->method ne 'polypeptide'} @features;
warn @ok_feats if DEBUG;
return @ok_feats;
}
=cut
return @features;
}

0 comments on commit 89b0eaf

Please sign in to comment.