Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Biotype so_term #364

Merged
merged 11 commits into from
Feb 19, 2019
27 changes: 25 additions & 2 deletions modules/Bio/EnsEMBL/Biotype.pm
Expand Up @@ -86,6 +86,8 @@ use parent qw(Bio::EnsEMBL::Storable);
string - the name of the biotype group (for ensembl)
Arg [-SO_ACC] :
string - the Sequence Ontology accession of this biotype
Arg [-SO_TERM] :
string - the Sequence Ontology term for the SO accession of this biotype
Arg [-DESCRIPTION] :
string - the biotype description
Arg [-DB_TYPE] :
Expand All @@ -107,14 +109,15 @@ sub new {

my $self = $class->SUPER::new();

my($dbID, $name, $object_type, $biotype_group, $so_acc, $description, $db_type, $attrib_type_id) =
rearrange([qw(BIOTYPE_ID NAME OBJECT_TYPE BIOTYPE_GROUP SO_ACC DESCRIPTION DB_TYPE ATTRIB_TYPE_ID)], @args);
my($dbID, $name, $object_type, $biotype_group, $so_acc, $so_term, $description, $db_type, $attrib_type_id) =
rearrange([qw(BIOTYPE_ID NAME OBJECT_TYPE BIOTYPE_GROUP SO_ACC SO_TERM DESCRIPTION DB_TYPE ATTRIB_TYPE_ID)], @args);

$self->{'dbID'} = $dbID;
$self->{'name'} = $name;
$self->{'object_type'} = $object_type;
$self->{'biotype_group'} = $biotype_group;
$self->{'so_acc'} = $so_acc;
$self->{'so_term'} = $so_term;
$self->{'description'} = $description;
$self->{'db_type'} = $db_type;
$self->{'attrib_type_id'} = $attrib_type_id;
Expand Down Expand Up @@ -215,6 +218,26 @@ sub so_acc {
return $self->{'so_acc'};
}

=head2 so_term

Arg [1] : (optional) string $so_term
Example : $feat->so_term();
Description: Getter/Setter for the Sequence Ontology term of this biotype.
Returntype : string
Exceptions : none

=cut

sub so_term {
my ( $self, $so_term ) = @_;

if ( defined($so_term) ) {
$self->{'so_term'} = $so_term;
}

return $self->{'so_term'};
}

=head2 object_type

Arg [1] : (optional) string $object_type
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/CDS.pm
Expand Up @@ -66,7 +66,10 @@ use Scalar::Util qw(weaken isweak);

@ISA = qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0000316';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000316',
term => 'CDS',
};

=head2 new

Expand Down
7 changes: 4 additions & 3 deletions modules/Bio/EnsEMBL/DBSQL/BiotypeAdaptor.pm
Expand Up @@ -90,7 +90,7 @@ sub _tables {
sub _columns {
my $self = shift;

return ('b.biotype_id', 'b.name', 'b.object_type', 'b.db_type', 'b.attrib_type_id', 'b.description', 'b.biotype_group', 'b.so_acc');
return ('b.biotype_id', 'b.name', 'b.object_type', 'b.db_type', 'b.attrib_type_id', 'b.description', 'b.biotype_group', 'b.so_acc', 'b.so_term');
}

=head2 _objs_from_sth
Expand All @@ -107,9 +107,9 @@ sub _columns {
sub _objs_from_sth {
my ($self, $sth) = @_;

my ($dbID, $name, $object_type, $db_type, $attrib_type_id, $description, $biotype_group, $so_acc);
my ($dbID, $name, $object_type, $db_type, $attrib_type_id, $description, $biotype_group, $so_acc, $so_term);

$sth->bind_columns(\$dbID, \$name, \$object_type, \$db_type, \$attrib_type_id, \$description, \$biotype_group, \$so_acc);
$sth->bind_columns(\$dbID, \$name, \$object_type, \$db_type, \$attrib_type_id, \$description, \$biotype_group, \$so_acc, \$so_term);

my @biotypes;

Expand All @@ -124,6 +124,7 @@ sub _objs_from_sth {
'description' => $description,
'biotype_group' => $biotype_group,
'so_acc' => $so_acc,
'so_term' => $so_term,
} )
);
}
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/DnaDnaAlignFeature.pm
Expand Up @@ -54,7 +54,10 @@ use Bio::EnsEMBL::Utils::Exception qw(throw);

@ISA = qw( Bio::EnsEMBL::BaseAlignFeature );

use constant SO_ACC => 'SO:0000347';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000347',
term => 'nucleotide_match',
};

=head2 new

Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/DnaPepAlignFeature.pm
Expand Up @@ -51,7 +51,10 @@ use vars qw(@ISA);

@ISA = qw( Bio::EnsEMBL::BaseAlignFeature );

use constant SO_ACC => 'SO:0000349';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000349',
term => 'protein_match',
};

=head2 _hit_unit

Expand Down
6 changes: 4 additions & 2 deletions modules/Bio/EnsEMBL/Exon.pm
Expand Up @@ -80,8 +80,10 @@ use Bio::EnsEMBL::DBSQL::SupportingFeatureAdaptor;
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0000147';

use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000147',
term => 'exon',
};

=head2 new

Expand Down
37 changes: 32 additions & 5 deletions modules/Bio/EnsEMBL/Feature.pm
Expand Up @@ -1449,9 +1449,9 @@ sub get_nearest_Gene {
=head2 feature_so_acc

Description: This method returns a string containing the SO accession number of the feature
Define constant SO_ACC in classes that require it, or override it for multiple possible values for a class.
Define constant SEQUENCE_ONTOLOGY in classes that require it, or override it for multiple possible values for a class.
Returntype : String (Sequence Ontology accession number)
Exceptions : Thrown if caller SO_ACC is undefined and is not a Bio::EnsEMBL::Feature
Exceptions : Thrown if caller SEQUENCE_ONTOLOGY is undefined and is not a Bio::EnsEMBL::Feature

=cut

Expand All @@ -1463,16 +1463,43 @@ sub feature_so_acc {

# Get the caller class SO acc
try {
$so_acc = $ref->SO_ACC;
$so_acc = $ref->SEQUENCE_ONTOLOGY->{'acc'};
};

unless ($so_acc || $ref eq 'Bio::EnsEMBL::Feature' ) {
throw( "constant SO_ACC in ${ref} is not defined");
throw( "constant SEQUENCE_ONTOLOGY in ${ref} is not defined");
}

return $so_acc // 'SO:0000001';
}

=head2 feature_so_term

Description: This method returns a string containing the SO term of the feature
Define constant SEQUENCE_ONTOLOGY in classes that require it, or override it for multiple possible values for a class.
Returntype : String (Sequence Ontology term)
Exceptions : Thrown if caller SEQUENCE_ONTOLOGY is undefined and is not a Bio::EnsEMBL::Feature

=cut

sub feature_so_term {
my ($self) = @_;

my $ref = ref $self;
my $so_term;

# Get the caller class SO acc
try {
$so_term = $ref->SEQUENCE_ONTOLOGY->{'term'};
};

unless ($so_term || $ref eq 'Bio::EnsEMBL::Feature' ) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not like using unless for complex conditions, I end up mentally substituting it with if not and propagating the negation to individual conditions. Maybe it's just me, though.

throw( "constant SEQUENCE_ONTOLOGY in ${ref} is not defined");
}

return $so_term // 'region';
}

=head2 summary_as_hash

Example : $feature_summary = $feature->summary_as_hash();
Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/Gene.pm
Expand Up @@ -73,7 +73,10 @@ use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);

use parent qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0000704';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000704',
term => 'gene',
};

=head2 new

Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/KaryotypeBand.pm
Expand Up @@ -82,7 +82,10 @@ use Bio::EnsEMBL::Utils::Exception qw(warning);

@ISA = qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0000341';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000341',
term => 'chromosome_band',
};

=head2 new

Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/MiscFeature.pm
Expand Up @@ -125,7 +125,10 @@ use vars qw(@ISA);

@ISA = qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0001411';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0001411',
term => 'biological_region',
};

=head2 new

Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/RepeatFeature.pm
Expand Up @@ -82,7 +82,10 @@ use Bio::EnsEMBL::Utils::Argument qw(rearrange);

use base qw/Bio::EnsEMBL::Feature/;

use constant SO_ACC => 'SO:0000657';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000657',
term => 'repeat_region',
};

=head2 new

Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/SimpleFeature.pm
Expand Up @@ -69,7 +69,10 @@ use Scalar::Util qw(weaken isweak);

@ISA = qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0001411';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0001411',
term => 'biological_region',
};

=head2 new

Expand Down
5 changes: 4 additions & 1 deletion modules/Bio/EnsEMBL/Transcript.pm
Expand Up @@ -77,7 +77,10 @@ use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );

use parent qw(Bio::EnsEMBL::Feature);

use constant SO_ACC => 'SO:0000673';
use constant SEQUENCE_ONTOLOGY => {
acc => 'SO:0000673',
term => 'transcript',
};

=head2 new

Expand Down
9 changes: 9 additions & 0 deletions modules/t/MultiTestDB.mysql.conf
@@ -0,0 +1,9 @@

{
'port' => '3306',
'driver' => 'mysql',
'user' => 'test_user',
'db_version' => 93,
'pass' => 'passwd',
'host' => 'localhost'
}
5 changes: 5 additions & 0 deletions modules/t/MultiTestDB.sqlite.conf
@@ -0,0 +1,5 @@
{
'driver' => 'SQLite',
'dbdir' => '/home/tgrego/temp/',
'user' => 'test_user',
}
8 changes: 6 additions & 2 deletions modules/t/biotype.t
Expand Up @@ -52,19 +52,21 @@ is($biotype1->object_type, 'gene', 'Biotype is from Gene object');
is($biotype1->name, 'protein_coding', 'Biotype name is protein_coding');
is($biotype1->biotype_group, 'coding', 'Biotype group is coding');
is($biotype1->so_acc, 'SO:0001217', 'Biotype protein_coding refers to SO:0001217');
is($biotype1->so_term, 'protein_coding_gene', 'Biotype protein_coding refers to SO term protein_coding_gene');
throws_ok { $biotype1->so_acc('test') } qr/so_acc must be a Sequence Ontology accession/, 'so_acc() requires a SO acc like string';
throws_ok { $biotype1->object_type('test') } qr/object_type must be gene or transcript/, 'object_type() must be gene or transcript';

# test transcript biotype object
my $transcript = $gene->canonical_transcript;
debug("transcript biotype");
is($transcript->biotype, 'protein_coding', "Trancript biotype is protein_coding");
is($transcript->biotype, 'protein_coding', "Transcript biotype is protein_coding");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would say this and the trailing-whitespace fixes in line 102 of this file + in line 41 of cds.t should go into a separate commit, those are a different logical change from the so_term thing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't want to edit commits just for this tiny thing, will keep attention to in the future commit separately when I spot those small issues.

my $biotype2 = $transcript->get_Biotype;
ok($biotype2->isa("Bio::EnsEMBL::Biotype"), "Biotype object retrieved successfully");
is($biotype2->object_type, 'transcript', 'Biotype is from Transcript object');
is($biotype2->name, 'protein_coding', 'Biotype name is protein_coding');
is($biotype2->biotype_group, 'coding', 'Biotype group is coding');
is($biotype2->so_acc, 'SO:0000234', 'Biotype protein_coding refers to SO:0000234');
is($biotype2->so_term, 'mRNA', 'Biotype protein_coding refers to SO term mRNA');
ok($transcript->set_Biotype('new_biotype'), "Can successfully set new_biotype");
throws_ok { $gene->set_Biotype() } qr/No argument provided/, 'set_Biotype() requires an argument';

Expand All @@ -77,6 +79,7 @@ is($biotype3->object_type, 'gene', 'Biotype is from Gene object');
is($biotype3->name, 'tRNA', 'Biotype name is tRNA');
is($biotype3->biotype_group, 'snoncoding', 'Biotype group is snoncoding');
is($biotype3->so_acc, 'SO:0001263', 'Biotype tRNA refers to SO:0001263');
is($biotype3->so_term, 'ncRNA_gene', 'Biotype protein_coding refers to SO term ncRNA_gene');

# set biotype with term not in database
debug("set biotype with term not in db");
Expand All @@ -87,6 +90,7 @@ is($biotype4->object_type, 'gene', 'Biotype is from Gene object');
is($biotype4->name, 'dummy', 'Biotype name is dummy');
is($biotype4->biotype_group, undef, 'Biotype group is not set');
is($biotype4->so_acc, undef, 'Biotype SO acc is not set');
is($biotype4->so_term, undef, 'Biotype SO term is not set');
throws_ok { $gene->set_Biotype() } qr/No argument provided/, 'set_Biotype() requires an argument';

# test fetch biotypes of object_type gene
Expand All @@ -95,7 +99,7 @@ my $biotypes1 = $biotype_adaptor->fetch_all_by_object_type('gene');
is(ref $biotypes1, 'ARRAY', 'Got an array');
is(scalar @{$biotypes1}, '2', 'of size 2');
is_deeply($biotypes1, [$biotype1, $biotype3], 'with the correct objects');
my $warning1 = warning {
my $warning1 = warning {
$biotypes1 = $biotype_adaptor->fetch_all_by_object_type('none') };
like( $warning1,
qr/No objects retrieved. Check if object_type 'none' is correct./,
Expand Down
8 changes: 4 additions & 4 deletions modules/t/cds.t
Expand Up @@ -35,11 +35,10 @@ ok($db);

my $stable_id = 'ENST00000217347';
my $transcript_adaptor = $db->get_TranscriptAdaptor();
my $transcript =
$transcript_adaptor->fetch_by_stable_id($stable_id);
my $transcript = $transcript_adaptor->fetch_by_stable_id($stable_id);


my @cds = @{ $transcript->get_all_CDS() };
my @cds = @{ $transcript->get_all_CDS() };
my @exons = @{ $transcript->get_all_translateable_Exons() };
my $n = scalar(@cds);

Expand All @@ -50,6 +49,7 @@ for (my $i = 0; $i < $n; $i++) {

is($cds[0]->start, $transcript->coding_region_start, "First cds is coding start");
is($cds[$n-1]->end, $transcript->coding_region_end, "Last cds is coding end");
is($cds[0]->feature_so_acc, 'SO:0000316', 'CDS feature SO acc is correct (CDS)');;
is($cds[0]->feature_so_acc, 'SO:0000316', 'CDS feature SO acc is correct (CDS)');
is($cds[0]->feature_so_term, 'CDS', 'CDS feature SO term is correct (CDS)');;

done_testing();
1 change: 1 addition & 0 deletions modules/t/dnaDnaAlignFeature.t
Expand Up @@ -159,5 +159,6 @@ ok($f);


is($dnaf->feature_so_acc, 'SO:0000347', 'DnaDnaAlignFeature feature SO acc is correct (nucleotide_match)');
is($dnaf->feature_so_term, 'nucleotide_match', 'DnaDnaAlignFeature feature SO term is correct (nucleotide_match)');

done_testing();
1 change: 1 addition & 0 deletions modules/t/dnaPepAlignFeature.t
Expand Up @@ -116,6 +116,7 @@ ok($dnaf->end == 16);
ok( scalar($dnaf->ungapped_features) == 2);

is($dnaf->feature_so_acc, 'SO:0000349', 'dnaPepAlignFeature feature SO acc is correct (protein_match)');
is($dnaf->feature_so_term, 'protein_match', 'dnaPepAlignFeature feature SO term is correct (protein_match)');

#
# 12 Test retrieval from database
Expand Down
1 change: 1 addition & 0 deletions modules/t/exon.t
Expand Up @@ -114,6 +114,7 @@ allow_warnings(0) if $db->dbc->driver() eq 'SQLite';
ok($exon->dbID() && $exon->adaptor == $exonad);

is($exon->feature_so_acc, 'SO:0000147', 'Exon feature SO acc is correct (exon)');
is($exon->feature_so_term, 'exon', 'Exon feature SO term is correct (exon)');

# now test fetch_by_dbID

Expand Down
2 changes: 2 additions & 0 deletions modules/t/feature.t
Expand Up @@ -68,6 +68,8 @@ ok($feature->strand == $strand);
ok($feature->analysis == $analysis);
ok($feature->slice == $slice);
is($feature->feature_so_acc, 'SO:0000001', 'Feature feature SO acc is correct (feature)');
is($feature->feature_so_term, 'region', 'Feature feature SO term is correct (feature)');


#
# Test setters
Expand Down
1 change: 1 addition & 0 deletions modules/t/gene.t
Expand Up @@ -99,6 +99,7 @@ debug("Gene display xref id: " . $gene->display_xref->dbID);
ok($gene->display_xref->dbID() == 128324);

is($gene->feature_so_acc, 'SO:0000704', 'Gene feature SO acc is correct (gene)');
is($gene->feature_so_term, 'gene', 'Gene feature SO term is correct (gene)');

# test the getters and setters
ok(test_getter_setter($gene, "external_name", "banana"));
Expand Down
1 change: 1 addition & 0 deletions modules/t/karyotypeBand.t
Expand Up @@ -58,6 +58,7 @@ ok($kb->name() eq $name);
ok($kb->slice == $slice);
ok($kb->display_id eq $name);
is($kb->feature_so_acc, 'SO:0000341', 'KaryotypeBand feature SO acc is correct (chromosome_band)');
is($kb->feature_so_term, 'chromosome_band', 'KaryotypeBand feature SO term is correct (chromosome_band)');

#
# test getter/setters
Expand Down