-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
56 changed files
with
1,739 additions
and
334 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2020] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::CheckFamiliesMLSS; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
use Bio::EnsEMBL::Utils::SqlHelper; | ||
use Data::Dumper; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'CheckFamiliesMLSS', | ||
DESCRIPTION => 'The expected number of families MLSSs are present', | ||
GROUPS => ['compara', 'compara_gene_trees'], | ||
DATACHECK_TYPE => 'critical', | ||
DB_TYPES => ['compara'], | ||
TABLES => ['method_link_species_set', 'family'] | ||
}; | ||
|
||
sub skip_tests { | ||
my ($self) = @_; | ||
my $mlss_adap = $self->dba->get_MethodLinkSpeciesSetAdaptor; | ||
my $mlss = $mlss_adap->fetch_all_by_method_link_type('FAMILY'); | ||
my $db_name = $self->dba->dbc->dbname; | ||
|
||
if ( scalar @$mlss == 0 ) { | ||
return( 1, "There are no family MLSS in $db_name" ); | ||
} | ||
} | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
my $dba = $self->dba; | ||
my $helper = $dba->dbc->sql_helper; | ||
|
||
my $expected_family_count; | ||
|
||
my $mlsss = $self->dba->get_MethodLinkSpeciesSetAdaptor->fetch_all_by_method_link_type('FAMILY'); | ||
|
||
foreach my $mlss ( @$mlsss ) { | ||
|
||
my $mlss_id = $mlss->dbID; | ||
my $mlss_name = $mlss->name; | ||
|
||
my $sql = qq/ | ||
SELECT COUNT(*) | ||
FROM family | ||
WHERE method_link_species_set_id = $mlss_id | ||
/; | ||
|
||
$expected_family_count += $helper->execute_single_result(-SQL => $sql); | ||
|
||
my $desc_1 = "The family for $mlss_id ($mlss_name) has rows as expected"; | ||
is_rows_nonzero($dba, $sql, $desc_1); | ||
} | ||
|
||
# Check that all the families correspond to a method_link_species_set that should have families | ||
my $desc_2 = "All the family rows with corresponding method_link_species_sets are expected"; | ||
my $row_count_sql = "SELECT COUNT(*) FROM family"; | ||
is_rows($dba, $row_count_sql, $expected_family_count, $desc_2); | ||
} | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2020] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::CheckGeneTreeRootMLSS; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
use Bio::EnsEMBL::Utils::SqlHelper; | ||
use Data::Dumper; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'CheckGeneTreeRootMLSS', | ||
DESCRIPTION => 'The expected number of gene_tree_root MLSSs are present', | ||
GROUPS => ['compara', 'compara_gene_trees'], | ||
DATACHECK_TYPE => 'critical', | ||
DB_TYPES => ['compara'], | ||
TABLES => ['method_link_species_set', 'gene_tree_root'] | ||
}; | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
my $dba = $self->dba; | ||
my $helper = $dba->dbc->sql_helper; | ||
my @method_links = qw(NC_TREES PROTEIN_TREES); | ||
|
||
my $expected_gene_count; | ||
|
||
foreach my $method_link_type ( @method_links ) { | ||
|
||
my $mlsss = $self->dba->get_MethodLinkSpeciesSetAdaptor->fetch_all_by_method_link_type($method_link_type); | ||
# Only check from the method_links that have mlsss there are other datachecks to check if mlsss are correct | ||
next if scalar(@$mlsss) == 0; | ||
|
||
foreach my $mlss ( @$mlsss ) { | ||
|
||
my $mlss_id = $mlss->dbID; | ||
my $mlss_name = $mlss->name; | ||
|
||
my $sql = qq/ | ||
SELECT COUNT(*) | ||
FROM gene_tree_root | ||
WHERE method_link_species_set_id = $mlss_id | ||
/; | ||
|
||
$expected_gene_count += $helper->execute_single_result(-SQL => $sql); | ||
|
||
my $desc_1 = "The gene_tree_root for $mlss_id ($mlss_name) has rows as expected"; | ||
is_rows_nonzero($dba, $sql, $desc_1); | ||
} | ||
} | ||
|
||
# Check that all the gene_tree_roots correspond to a method_link_species_set that should have gene_tree_roots | ||
my $desc_2 = "All the gene_tree_root rows with corresponding method_link_species_sets are expected"; | ||
my $row_count_sql = "SELECT COUNT(*) FROM gene_tree_root"; | ||
is_rows($dba, $row_count_sql, $expected_gene_count, $desc_2); | ||
} | ||
|
||
1; |
98 changes: 98 additions & 0 deletions
98
lib/Bio/EnsEMBL/DataCheck/Checks/CheckGenomicAlignments.pm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2020] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::CheckGenomicAlignments; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
use Bio::EnsEMBL::Utils::SqlHelper; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'CheckGenomicAlignments', | ||
DESCRIPTION => 'The expected number of genomic alignments have been merged', | ||
GROUPS => ['compara', 'compara_genome_alignments'], | ||
DATACHECK_TYPE => 'critical', | ||
DB_TYPES => ['compara'], | ||
TABLES => ['method_link_species_set', 'genomic_align', 'genomic_align_block'] | ||
}; | ||
|
||
sub skip_tests { | ||
my ($self) = @_; | ||
my $mlss_adap = $self->dba->get_MethodLinkSpeciesSetAdaptor; | ||
|
||
my @method_links = qw(LASTZ_NET LASTZ_PATCH EPO EPO_EXTENDED PECAN POLYPLOID); | ||
my @mlsss; | ||
foreach my $method (@method_links) { | ||
my $mlss = $mlss_adap->fetch_all_by_method_link_type($method); | ||
push @mlsss, @$mlss; | ||
} | ||
|
||
my $db_name = $self->dba->dbc->dbname; | ||
|
||
if ( scalar(@mlsss) == 0 ) { | ||
return( 1, "There are no genomic alignment MLSS in $db_name" ); | ||
} | ||
} | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
my $dba = $self->dba; | ||
my $helper = $dba->dbc->sql_helper; | ||
my @method_links = qw(LASTZ_NET LASTZ_PATCH EPO EPO_EXTENDED PECAN POLYPLOID); | ||
|
||
my $expected_align_count; | ||
my @tables = qw(genomic_align genomic_align_block); | ||
|
||
foreach my $table (@tables) { | ||
foreach my $method_link_type ( @method_links ) { | ||
|
||
my $mlsss = $self->dba->get_MethodLinkSpeciesSetAdaptor->fetch_all_by_method_link_type($method_link_type); | ||
# Only check from the method_links that have mlsss there are other datachecks to check if mlsss are correct | ||
next if scalar(@$mlsss) == 0; | ||
|
||
foreach my $mlss ( @$mlsss ) { | ||
|
||
my $mlss_id = $mlss->dbID; | ||
my $mlss_name = $mlss->name; | ||
|
||
my $sql = qq/ | ||
SELECT COUNT(*) | ||
FROM $table | ||
WHERE method_link_species_set_id = $mlss_id | ||
/; | ||
|
||
$expected_align_count += $helper->execute_single_result(-SQL => $sql) if $table eq "genomic_align"; | ||
|
||
my $desc_1 = "The $table for $mlss_id ($mlss_name) has rows as expected"; | ||
is_rows_nonzero($dba, $sql, $desc_1); | ||
} | ||
} | ||
} | ||
# Check that all the genomic_aligns correspond to a method_link_species_set that should have an alignment | ||
my $desc_2 = "All the genomic_align rows with corresponding method_link_species_sets are expected"; | ||
my $row_count_sql = "SELECT COUNT(*) FROM genomic_align"; | ||
is_rows($dba, $row_count_sql, $expected_align_count, $desc_2); | ||
} | ||
|
||
1; |
Oops, something went wrong.