Skip to content

Commit

Permalink
Merge branch 'release/102' into hotfix/MetaKeyFormat
Browse files Browse the repository at this point in the history
  • Loading branch information
leannehaggerty committed Sep 17, 2020
2 parents 245dd79 + 07e3098 commit e6378c8
Show file tree
Hide file tree
Showing 56 changed files with 1,831 additions and 319 deletions.
44 changes: 41 additions & 3 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CheckConstrainedElementTable.pm
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use strict;
use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;
use Bio::EnsEMBL::Utils::SqlHelper;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

Expand All @@ -33,17 +34,54 @@ use constant {
GROUPS => ['compara', 'compara_genome_alignments'],
DATACHECK_TYPE => 'critical',
DB_TYPES => ['compara'],
TABLES => ['constrained_elements']
TABLES => ['constrained_elements', 'method_link_species_set']
};

sub skip_tests {
my ($self) = @_;
my $mlss_adap = $self->dba->get_MethodLinkSpeciesSetAdaptor;
my $mlss = $mlss_adap->fetch_all_by_method_link_type('GERP_CONSTRAINED_ELEMENT');
my $db_name = $self->dba->dbc->dbname;

if ( scalar(@$mlss) == 0 ) {
return( 1, "There are no GERP_CONSTRAINED_ELEMENT MLSS in $db_name" );
}
}

sub tests {
my ($self) = @_;
my $dba = $self->dba;

my $helper = $dba->dbc->sql_helper;

my $desc = "All the rows in constrained_element have a one-to-many relationship for constrained_element_id";

is_one_to_many($dba->dbc, "constrained_element", "constrained_element_id", $desc);


my $mlsss = $self->dba->get_MethodLinkSpeciesSetAdaptor->fetch_all_by_method_link_type('GERP_CONSTRAINED_ELEMENT');

my $expected_ce_count;

foreach my $mlss ( @$mlsss ) {

my $mlss_id = $mlss->dbID;
my $mlss_name = $mlss->name;

my $sql = qq/
SELECT COUNT(*)
FROM constrained_element
WHERE method_link_species_set_id = $mlss_id
/;

$expected_ce_count += $helper->execute_single_result(-SQL => $sql);

my $desc_1 = "The constrained elements for $mlss_id ($mlss_name) are present as expected";
is_rows_nonzero($dba, $sql, $desc_1);
}

my $desc_2 = "All the constrained elements with corresponding method_link_species_sets are expected";
my $row_count_sql = "SELECT COUNT(*) FROM constrained_element";
is_rows($dba, $row_count_sql, $expected_ce_count, $desc_2);

}

1;
Expand Down
84 changes: 84 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CheckFamiliesMLSS.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
=head1 LICENSE
Copyright [2018-2020] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the 'License');
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut

package Bio::EnsEMBL::DataCheck::Checks::CheckFamiliesMLSS;

use warnings;
use strict;

use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;
use Bio::EnsEMBL::Utils::SqlHelper;
use Data::Dumper;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

use constant {
NAME => 'CheckFamiliesMLSS',
DESCRIPTION => 'The expected number of families MLSSs are present',
GROUPS => ['compara', 'compara_gene_trees'],
DATACHECK_TYPE => 'critical',
DB_TYPES => ['compara'],
TABLES => ['method_link_species_set', 'family']
};

sub skip_tests {
my ($self) = @_;
my $mlss_adap = $self->dba->get_MethodLinkSpeciesSetAdaptor;
my $mlss = $mlss_adap->fetch_all_by_method_link_type('FAMILY');
my $db_name = $self->dba->dbc->dbname;

if ( scalar @$mlss == 0 ) {
return( 1, "There are no family MLSS in $db_name" );
}
}

sub tests {
my ($self) = @_;
my $dba = $self->dba;
my $helper = $dba->dbc->sql_helper;

my $expected_family_count;

my $mlsss = $self->dba->get_MethodLinkSpeciesSetAdaptor->fetch_all_by_method_link_type('FAMILY');

foreach my $mlss ( @$mlsss ) {

my $mlss_id = $mlss->dbID;
my $mlss_name = $mlss->name;

my $sql = qq/
SELECT COUNT(*)
FROM family
WHERE method_link_species_set_id = $mlss_id
/;

$expected_family_count += $helper->execute_single_result(-SQL => $sql);

my $desc_1 = "The family for $mlss_id ($mlss_name) has rows as expected";
is_rows_nonzero($dba, $sql, $desc_1);
}

# Check that all the families correspond to a method_link_species_set that should have families
my $desc_2 = "All the family rows with corresponding method_link_species_sets are expected";
my $row_count_sql = "SELECT COUNT(*) FROM family";
is_rows($dba, $row_count_sql, $expected_family_count, $desc_2);
}

1;
79 changes: 79 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CheckGeneTreeRootMLSS.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
=head1 LICENSE
Copyright [2018-2020] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the 'License');
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut

package Bio::EnsEMBL::DataCheck::Checks::CheckGeneTreeRootMLSS;

use warnings;
use strict;

use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;
use Bio::EnsEMBL::Utils::SqlHelper;
use Data::Dumper;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

use constant {
NAME => 'CheckGeneTreeRootMLSS',
DESCRIPTION => 'The expected number of gene_tree_root MLSSs are present',
GROUPS => ['compara', 'compara_gene_trees'],
DATACHECK_TYPE => 'critical',
DB_TYPES => ['compara'],
TABLES => ['method_link_species_set', 'gene_tree_root']
};

sub tests {
my ($self) = @_;
my $dba = $self->dba;
my $helper = $dba->dbc->sql_helper;
my @method_links = qw(NC_TREES PROTEIN_TREES);

my $expected_gene_count;

foreach my $method_link_type ( @method_links ) {

my $mlsss = $self->dba->get_MethodLinkSpeciesSetAdaptor->fetch_all_by_method_link_type($method_link_type);
# Only check from the method_links that have mlsss there are other datachecks to check if mlsss are correct
next if scalar(@$mlsss) == 0;

foreach my $mlss ( @$mlsss ) {

my $mlss_id = $mlss->dbID;
my $mlss_name = $mlss->name;

my $sql = qq/
SELECT COUNT(*)
FROM gene_tree_root
WHERE method_link_species_set_id = $mlss_id
/;

$expected_gene_count += $helper->execute_single_result(-SQL => $sql);

my $desc_1 = "The gene_tree_root for $mlss_id ($mlss_name) has rows as expected";
is_rows_nonzero($dba, $sql, $desc_1);
}
}

# Check that all the gene_tree_roots correspond to a method_link_species_set that should have gene_tree_roots
my $desc_2 = "All the gene_tree_root rows with corresponding method_link_species_sets are expected";
my $row_count_sql = "SELECT COUNT(*) FROM gene_tree_root";
is_rows($dba, $row_count_sql, $expected_gene_count, $desc_2);
}

1;
98 changes: 98 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CheckGenomicAlignments.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
=head1 LICENSE
Copyright [2018-2020] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the 'License');
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut

package Bio::EnsEMBL::DataCheck::Checks::CheckGenomicAlignments;

use warnings;
use strict;

use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;
use Bio::EnsEMBL::Utils::SqlHelper;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

use constant {
NAME => 'CheckGenomicAlignments',
DESCRIPTION => 'The expected number of genomic alignments have been merged',
GROUPS => ['compara', 'compara_genome_alignments'],
DATACHECK_TYPE => 'critical',
DB_TYPES => ['compara'],
TABLES => ['method_link_species_set', 'genomic_align', 'genomic_align_block']
};

sub skip_tests {
my ($self) = @_;
my $mlss_adap = $self->dba->get_MethodLinkSpeciesSetAdaptor;

my @method_links = qw(LASTZ_NET LASTZ_PATCH EPO EPO_EXTENDED PECAN POLYPLOID);
my @mlsss;
foreach my $method (@method_links) {
my $mlss = $mlss_adap->fetch_all_by_method_link_type($method);
push @mlsss, @$mlss;
}

my $db_name = $self->dba->dbc->dbname;

if ( scalar(@mlsss) == 0 ) {
return( 1, "There are no genomic alignment MLSS in $db_name" );
}
}

sub tests {
my ($self) = @_;
my $dba = $self->dba;
my $helper = $dba->dbc->sql_helper;
my @method_links = qw(LASTZ_NET LASTZ_PATCH EPO EPO_EXTENDED PECAN POLYPLOID);

my $expected_align_count;
my @tables = qw(genomic_align genomic_align_block);

foreach my $table (@tables) {
foreach my $method_link_type ( @method_links ) {

my $mlsss = $self->dba->get_MethodLinkSpeciesSetAdaptor->fetch_all_by_method_link_type($method_link_type);
# Only check from the method_links that have mlsss there are other datachecks to check if mlsss are correct
next if scalar(@$mlsss) == 0;

foreach my $mlss ( @$mlsss ) {

my $mlss_id = $mlss->dbID;
my $mlss_name = $mlss->name;

my $sql = qq/
SELECT COUNT(*)
FROM $table
WHERE method_link_species_set_id = $mlss_id
/;

$expected_align_count += $helper->execute_single_result(-SQL => $sql) if $table eq "genomic_align";

my $desc_1 = "The $table for $mlss_id ($mlss_name) has rows as expected";
is_rows_nonzero($dba, $sql, $desc_1);
}
}
}
# Check that all the genomic_aligns correspond to a method_link_species_set that should have an alignment
my $desc_2 = "All the genomic_align rows with corresponding method_link_species_sets are expected";
my $row_count_sql = "SELECT COUNT(*) FROM genomic_align";
is_rows($dba, $row_count_sql, $expected_align_count, $desc_2);
}

1;

0 comments on commit e6378c8

Please sign in to comment.