Skip to content

Commit

Permalink
Merge 3d64a02 into 5308e4c
Browse files Browse the repository at this point in the history
  • Loading branch information
tgrego committed Feb 18, 2019
2 parents 5308e4c + 3d64a02 commit 01a61ff
Show file tree
Hide file tree
Showing 62 changed files with 244 additions and 65 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ a.out
*.obj
*.class
MultiTestDB.conf
MultiTestDB.*.conf
*.frozen.conf
bioperl*.zip
/modules/t/CLEAN.t
Expand Down
27 changes: 25 additions & 2 deletions modules/Bio/EnsEMBL/Biotype.pm
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ use parent qw(Bio::EnsEMBL::Storable);
string - the name of the biotype group (for ensembl)
Arg [-SO_ACC] :
string - the Sequence Ontology accession of this biotype
Arg [-SO_TERM] :
string - the Sequence Ontology term for the SO accession of this biotype
Arg [-DESCRIPTION] :
string - the biotype description
Arg [-DB_TYPE] :
Expand All @@ -107,14 +109,15 @@ sub new {

my $self = $class->SUPER::new();

my($dbID, $name, $object_type, $biotype_group, $so_acc, $description, $db_type, $attrib_type_id) =
rearrange([qw(BIOTYPE_ID NAME OBJECT_TYPE BIOTYPE_GROUP SO_ACC DESCRIPTION DB_TYPE ATTRIB_TYPE_ID)], @args);
my($dbID, $name, $object_type, $biotype_group, $so_acc, $so_term, $description, $db_type, $attrib_type_id) =
rearrange([qw(BIOTYPE_ID NAME OBJECT_TYPE BIOTYPE_GROUP SO_ACC SO_TERM DESCRIPTION DB_TYPE ATTRIB_TYPE_ID)], @args);

$self->{'dbID'} = $dbID;
$self->{'name'} = $name;
$self->{'object_type'} = $object_type;
$self->{'biotype_group'} = $biotype_group;
$self->{'so_acc'} = $so_acc;
$self->{'so_term'} = $so_term;
$self->{'description'} = $description;
$self->{'db_type'} = $db_type;
$self->{'attrib_type_id'} = $attrib_type_id;
Expand Down Expand Up @@ -215,6 +218,26 @@ sub so_acc {
return $self->{'so_acc'};
}

=head2 so_term
Arg [1] : (optional) string $so_term
Example : $feat->so_term();
Description: Getter/Setter for the Sequence Ontology term of this biotype.
Returntype : string
Exceptions : none
=cut

sub so_term {
my ( $self, $so_term ) = @_;

if ( defined($so_term) ) {
$self->{'so_term'} = $so_term;
}

return $self->{'so_term'};
}

=head2 object_type
Arg [1] : (optional) string $object_type
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/CDS.pm
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ use Scalar::Util qw(weaken isweak);

@ISA = qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0000316';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000316',
term => 'CDS',
};

=head2 new
Expand Down
7 changes: 4 additions & 3 deletions modules/Bio/EnsEMBL/DBSQL/BiotypeAdaptor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ sub _tables {
sub _columns {
my $self = shift;

return ('b.biotype_id', 'b.name', 'b.object_type', 'b.db_type', 'b.attrib_type_id', 'b.description', 'b.biotype_group', 'b.so_acc');
return ('b.biotype_id', 'b.name', 'b.object_type', 'b.db_type', 'b.attrib_type_id', 'b.description', 'b.biotype_group', 'b.so_acc', 'b.so_term');
}

=head2 _objs_from_sth
Expand All @@ -107,9 +107,9 @@ sub _columns {
sub _objs_from_sth {
my ($self, $sth) = @_;

my ($dbID, $name, $object_type, $db_type, $attrib_type_id, $description, $biotype_group, $so_acc);
my ($dbID, $name, $object_type, $db_type, $attrib_type_id, $description, $biotype_group, $so_acc, $so_term);

$sth->bind_columns(\$dbID, \$name, \$object_type, \$db_type, \$attrib_type_id, \$description, \$biotype_group, \$so_acc);
$sth->bind_columns(\$dbID, \$name, \$object_type, \$db_type, \$attrib_type_id, \$description, \$biotype_group, \$so_acc, \$so_term);

my @biotypes;

Expand All @@ -124,6 +124,7 @@ sub _objs_from_sth {
'description' => $description,
'biotype_group' => $biotype_group,
'so_acc' => $so_acc,
'so_term' => $so_term,
} )
);
}
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/DnaDnaAlignFeature.pm
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ use Bio::EnsEMBL::Utils::Exception qw(throw);

@ISA = qw( Bio::EnsEMBL::BaseAlignFeature );

use constant SO_ACC => 'SO:0000347';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000347',
term => 'nucleotide_match',
};

=head2 new
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/DnaPepAlignFeature.pm
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ use vars qw(@ISA);

@ISA = qw( Bio::EnsEMBL::BaseAlignFeature );

use constant SO_ACC => 'SO:0000349';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000349',
term => 'protein_match',
};

=head2 _hit_unit
Expand Down
6 changes: 4 additions & 2 deletions modules/Bio/EnsEMBL/Exon.pm
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ use Bio::EnsEMBL::DBSQL::SupportingFeatureAdaptor;
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0000147';

use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000147',
term => 'exon',
};

=head2 new
Expand Down
39 changes: 33 additions & 6 deletions modules/Bio/EnsEMBL/Feature.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1449,9 +1449,9 @@ sub get_nearest_Gene {
=head2 feature_so_acc
Description: This method returns a string containing the SO accession number of the feature
Define constant SO_ACC in classes that require it, or override it for multiple possible values for a class.
Define constant SEQUENCE_ONTOLOGY in classes that require it, or override it for multiple possible values for a class.
Returntype : String (Sequence Ontology accession number)
Exceptions : Thrown if caller SO_ACC is undefined and is not a Bio::EnsEMBL::Feature
Exceptions : Thrown if caller SEQUENCE_ONTOLOGY is undefined and is not a Bio::EnsEMBL::Feature
=cut

Expand All @@ -1463,16 +1463,43 @@ sub feature_so_acc {

# Get the caller class SO acc
try {
$so_acc = $ref->SO_ACC;
$so_acc = $ref->SEQUENCE_ONTOLOGY->{'acc'};
};
unless ($so_acc || $ref eq 'Bio::EnsEMBL::Feature' ) {
throw( "constant SO_ACC in ${ref} is not defined");

if (!$so_acc && $ref ne 'Bio::EnsEMBL::Feature' ) {
throw( "constant SEQUENCE_ONTOLOGY in ${ref} is not defined");
}

return $so_acc // 'SO:0000001';
}

=head2 feature_so_term
Description: This method returns a string containing the SO term of the feature
Define constant SEQUENCE_ONTOLOGY in classes that require it, or override it for multiple possible values for a class.
Returntype : String (Sequence Ontology term)
Exceptions : Thrown if caller SEQUENCE_ONTOLOGY is undefined and is not a Bio::EnsEMBL::Feature
=cut

sub feature_so_term {
my ($self) = @_;

my $ref = ref $self;
my $so_term;

# Get the caller class SO acc
try {
$so_term = $ref->SEQUENCE_ONTOLOGY->{'term'};
};

if (!$so_term && $ref ne 'Bio::EnsEMBL::Feature' ) {
throw( "constant SEQUENCE_ONTOLOGY in ${ref} is not defined");
}

return $so_term // 'region';
}

=head2 summary_as_hash
Example : $feature_summary = $feature->summary_as_hash();
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/Gene.pm
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);

use parent qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0000704';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000704',
term => 'gene',
};

=head2 new
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/KaryotypeBand.pm
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ use Bio::EnsEMBL::Utils::Exception qw(warning);

@ISA = qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0000341';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000341',
term => 'chromosome_band',
};

=head2 new
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/MiscFeature.pm
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,10 @@ use vars qw(@ISA);

@ISA = qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0001411';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0001411',
term => 'biological_region',
};

=head2 new
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/RepeatFeature.pm
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ use Bio::EnsEMBL::Utils::Argument qw(rearrange);

use base qw/Bio::EnsEMBL::Feature/;

use constant SO_ACC => 'SO:0000657';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000657',
term => 'repeat_region',
};

=head2 new
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/SimpleFeature.pm
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ use Scalar::Util qw(weaken isweak);

@ISA = qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0001411';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0001411',
term => 'biological_region',
};

=head2 new
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/Transcript.pm
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );

use parent qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0000673';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000673',
term => 'transcript',
};

=head2 new
Expand Down
28 changes: 25 additions & 3 deletions modules/Bio/EnsEMBL/UTR.pm
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,14 @@ sub type {

# package variable to minimize duplication
my %utr_type_so_mapping = (
'five_prime_utr' => 'SO:0000204',
'three_prime_utr' => 'SO:0000205'
'five_prime_utr' => {
acc => 'SO:0000204',
term => 'five_prime_UTR',
},
'three_prime_utr' => {
acc => 'SO:0000205',
term => 'three_prime_UTR',
}
);

=head2 feature_so_acc
Expand All @@ -240,7 +246,23 @@ sub feature_so_acc {
my $self = shift;

# return UTR type SO acc, or UTR acc
return $utr_type_so_mapping{$self->type} // 'SO:0000203';
return $utr_type_so_mapping{$self->type}->{'acc'} // 'SO:0000203';
}

=head2 feature_so_term
Example : print $utr->feature_so_term;
Description: This method returns a string containing the SO accession term of the UTR, based on type.
Overrides Bio::EnsEMBL::Feature::feature_so_term
Returntype : string (Sequence Ontology term)
=cut

sub feature_so_term {
my $self = shift;

# return UTR type SO acc, or UTR acc
return $utr_type_so_mapping{$self->type}->{'term'} // 'UTR';
}

=head2 summary_as_hash
Expand Down
8 changes: 6 additions & 2 deletions modules/t/biotype.t
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,21 @@ is($biotype1->object_type, 'gene', 'Biotype is from Gene object');
is($biotype1->name, 'protein_coding', 'Biotype name is protein_coding');
is($biotype1->biotype_group, 'coding', 'Biotype group is coding');
is($biotype1->so_acc, 'SO:0001217', 'Biotype protein_coding refers to SO:0001217');
is($biotype1->so_term, 'protein_coding_gene', 'Biotype protein_coding refers to SO term protein_coding_gene');
throws_ok { $biotype1->so_acc('test') } qr/so_acc must be a Sequence Ontology accession/, 'so_acc() requires a SO acc like string';
throws_ok { $biotype1->object_type('test') } qr/object_type must be gene or transcript/, 'object_type() must be gene or transcript';

# test transcript biotype object
my $transcript = $gene->canonical_transcript;
debug("transcript biotype");
is($transcript->biotype, 'protein_coding', "Trancript biotype is protein_coding");
is($transcript->biotype, 'protein_coding', "Transcript biotype is protein_coding");
my $biotype2 = $transcript->get_Biotype;
ok($biotype2->isa("Bio::EnsEMBL::Biotype"), "Biotype object retrieved successfully");
is($biotype2->object_type, 'transcript', 'Biotype is from Transcript object');
is($biotype2->name, 'protein_coding', 'Biotype name is protein_coding');
is($biotype2->biotype_group, 'coding', 'Biotype group is coding');
is($biotype2->so_acc, 'SO:0000234', 'Biotype protein_coding refers to SO:0000234');
is($biotype2->so_term, 'mRNA', 'Biotype protein_coding refers to SO term mRNA');
ok($transcript->set_Biotype('new_biotype'), "Can successfully set new_biotype");
throws_ok { $gene->set_Biotype() } qr/No argument provided/, 'set_Biotype() requires an argument';

Expand All @@ -77,6 +79,7 @@ is($biotype3->object_type, 'gene', 'Biotype is from Gene object');
is($biotype3->name, 'tRNA', 'Biotype name is tRNA');
is($biotype3->biotype_group, 'snoncoding', 'Biotype group is snoncoding');
is($biotype3->so_acc, 'SO:0001263', 'Biotype tRNA refers to SO:0001263');
is($biotype3->so_term, 'ncRNA_gene', 'Biotype protein_coding refers to SO term ncRNA_gene');

# set biotype with term not in database
debug("set biotype with term not in db");
Expand All @@ -87,6 +90,7 @@ is($biotype4->object_type, 'gene', 'Biotype is from Gene object');
is($biotype4->name, 'dummy', 'Biotype name is dummy');
is($biotype4->biotype_group, undef, 'Biotype group is not set');
is($biotype4->so_acc, undef, 'Biotype SO acc is not set');
is($biotype4->so_term, undef, 'Biotype SO term is not set');
throws_ok { $gene->set_Biotype() } qr/No argument provided/, 'set_Biotype() requires an argument';

# test fetch biotypes of object_type gene
Expand All @@ -95,7 +99,7 @@ my $biotypes1 = $biotype_adaptor->fetch_all_by_object_type('gene');
is(ref $biotypes1, 'ARRAY', 'Got an array');
is(scalar @{$biotypes1}, '2', 'of size 2');
is_deeply($biotypes1, [$biotype1, $biotype3], 'with the correct objects');
my $warning1 = warning {
my $warning1 = warning {
$biotypes1 = $biotype_adaptor->fetch_all_by_object_type('none') };
like( $warning1,
qr/No objects retrieved. Check if object_type 'none' is correct./,
Expand Down
8 changes: 4 additions & 4 deletions modules/t/cds.t
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,10 @@ ok($db);

my $stable_id = 'ENST00000217347';
my $transcript_adaptor = $db->get_TranscriptAdaptor();
my $transcript =
$transcript_adaptor->fetch_by_stable_id($stable_id);
my $transcript = $transcript_adaptor->fetch_by_stable_id($stable_id);


my @cds = @{ $transcript->get_all_CDS() };
my @cds = @{ $transcript->get_all_CDS() };
my @exons = @{ $transcript->get_all_translateable_Exons() };
my $n = scalar(@cds);

Expand All @@ -50,6 +49,7 @@ for (my $i = 0; $i < $n; $i++) {

is($cds[0]->start, $transcript->coding_region_start, "First cds is coding start");
is($cds[$n-1]->end, $transcript->coding_region_end, "Last cds is coding end");
is($cds[0]->feature_so_acc, 'SO:0000316', 'CDS feature SO acc is correct (CDS)');;
is($cds[0]->feature_so_acc, 'SO:0000316', 'CDS feature SO acc is correct (CDS)');
is($cds[0]->feature_so_term, 'CDS', 'CDS feature SO term is correct (CDS)');;

done_testing();
1 change: 1 addition & 0 deletions modules/t/dnaDnaAlignFeature.t
Original file line number Diff line number Diff line change
Expand Up @@ -159,5 +159,6 @@ ok($f);


is($dnaf->feature_so_acc, 'SO:0000347', 'DnaDnaAlignFeature feature SO acc is correct (nucleotide_match)');
is($dnaf->feature_so_term, 'nucleotide_match', 'DnaDnaAlignFeature feature SO term is correct (nucleotide_match)');

done_testing();
1 change: 1 addition & 0 deletions modules/t/dnaPepAlignFeature.t
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ ok($dnaf->end == 16);
ok( scalar($dnaf->ungapped_features) == 2);

is($dnaf->feature_so_acc, 'SO:0000349', 'dnaPepAlignFeature feature SO acc is correct (protein_match)');
is($dnaf->feature_so_term, 'protein_match', 'dnaPepAlignFeature feature SO term is correct (protein_match)');

#
# 12 Test retrieval from database
Expand Down
Loading

0 comments on commit 01a61ff

Please sign in to comment.