-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
23 changed files
with
1,703 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2019] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::AlignmentCoordinates; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'AlignmentCoordinates', | ||
DESCRIPTION => 'Alignment coordinates are within the length of their dnafrag', | ||
DATACHECK_TYPE => 'critical', | ||
GROUPS => ['compara', 'compara_pairwise_alignments', 'compara_multiple_alignments'], | ||
DB_TYPES => ['compara'], | ||
TABLES => ['dnafrag', 'genomic_align'] | ||
}; | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
|
||
my $desc_1 = "All dnafrag_starts are >= 1"; | ||
my $sql_1 = q/ | ||
SELECT * | ||
FROM genomic_align | ||
WHERE dnafrag_start < 1 | ||
/; | ||
is_rows_zero($self->dba, $sql_1, $desc_1); | ||
|
||
my $desc_2 = "Alignment coordinates are within the length of their dnafrag"; | ||
my $sql_2 = q/ | ||
SELECT * | ||
FROM genomic_align ga | ||
JOIN dnafrag df | ||
USING (dnafrag_id) | ||
WHERE ga.dnafrag_end > length | ||
/; | ||
is_rows_zero($self->dba, $sql_2, $desc_2); | ||
} | ||
|
||
1; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2019] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::CheckCAFETable; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'CheckCAFETable', | ||
DESCRIPTION => 'Each row should show a one-to-many relationship', | ||
GROUPS => ['compara', 'compara_protein_trees'], | ||
DATACHECK_TYPE => 'critical', | ||
DB_TYPES => ['compara'], | ||
TABLES => ['cafe_species_gene'] | ||
}; | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
my $dba = $self->dba; | ||
|
||
my $desc = "All the rows in CAFE_species_gene have a one-to-many relationship for cafe_gene_family_id"; | ||
|
||
is_one_to_many($dba->dbc, "CAFE_species_gene", "cafe_gene_family_id", $desc); | ||
} | ||
|
||
1; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2019] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::CheckComparaStableIDs; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'CheckComparaStableIDs', | ||
DESCRIPTION => 'gene trees in gene_tree_root and family all have stable_ids generated', | ||
GROUPS => ['compara', 'compara_families', 'compara_protein_trees'], | ||
DATACHECK_TYPE => 'critical', | ||
TABLES => ['family', 'gene_tree_root'] | ||
}; | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
my $desc_1 = "There are no NULL stable_ids in family"; | ||
my $sql_1 = q/ | ||
SELECT * | ||
FROM family | ||
WHERE stable_id IS NULL | ||
/; | ||
is_rows_zero($self->dba, $sql_1, $desc_1); | ||
|
||
my $desc_2 = "There are no NULL stable_ids for gene trees in gene_tree_root"; | ||
my $sql_2 = q/ | ||
SELECT * FROM gene_tree_root | ||
WHERE member_type = 'protein' | ||
AND tree_type = 'tree' | ||
AND clusterset_id='default' | ||
AND stable_id IS NULL | ||
/; | ||
is_rows_zero($self->dba, $sql_2, $desc_2); | ||
|
||
} | ||
|
||
1; | ||
|
73 changes: 73 additions & 0 deletions
73
lib/Bio/EnsEMBL/DataCheck/Checks/CheckConservationScore.pm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2019] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::CheckConservationScore; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'CheckConservationScore', | ||
DESCRIPTION => 'The MLSS for GERP_CONSERVATION_SCORE should have conservation score entries', | ||
GROUPS => ['compara', 'compara_pairwise_alignments'], | ||
DATACHECK_TYPE => 'critical', | ||
DB_TYPES => ['compara'], | ||
TABLES => ['conservation_score', 'genomic_align_block', 'method_link', 'method_link_species_set', 'method_link_species_set_tag'] | ||
}; | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
my $dba = $self->dba; | ||
my $mlss_adap = $dba->get_MethodLinkSpeciesSetAdaptor; | ||
my $mlss = $mlss_adap->fetch_all_by_method_link_type("GERP_CONSERVATION_SCORE"); | ||
my $helper = $dba->dbc->sql_helper; | ||
|
||
foreach my $mlss ( @$mlss ) { | ||
my $mlss_name = $mlss->name; | ||
my $mlss_id = $mlss->dbID; | ||
my $sql_1 = qq/ | ||
SELECT value | ||
FROM method_link_species_set_tag | ||
WHERE tag = "msa_mlss_id" | ||
AND method_link_species_set_id = $mlss_id | ||
/; | ||
my $desc_1 = "There is an msa_mlss_id tag for $mlss_name"; | ||
my $msa_mlss_id = $helper->execute_single_result( -SQL => $sql_1 ); | ||
|
||
my $sql_2 = qq/ | ||
SELECT COUNT(*) | ||
FROM genomic_align_block | ||
JOIN conservation_score | ||
USING (genomic_align_block_id) | ||
WHERE method_link_species_set_id = $msa_mlss_id | ||
/; | ||
|
||
my $desc_2 = "There are conservation scores for multiple alignment mlss_id $msa_mlss_id in $mlss_name"; | ||
is_rows_nonzero($dba, $sql_2, $desc_2); | ||
|
||
} | ||
} | ||
|
||
1; | ||
|
50 changes: 50 additions & 0 deletions
50
lib/Bio/EnsEMBL/DataCheck/Checks/CheckConstrainedElementTable.pm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2019] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::CheckConstrainedElementTable; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'CheckConstrainedElementTable', | ||
DESCRIPTION => 'Each row should show a one-to-many relationship', | ||
GROUPS => ['compara', 'compara_multiple_alignments'], | ||
DATACHECK_TYPE => 'critical', | ||
DB_TYPES => ['compara'], | ||
TABLES => ['constrained_elements'] | ||
}; | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
my $dba = $self->dba; | ||
|
||
my $desc = "All the rows in constrained_element have a one-to-many relationship for constrained_element_id"; | ||
|
||
is_one_to_many($dba->dbc, "constrained_element", "constrained_element_id", $desc); | ||
|
||
} | ||
|
||
1; | ||
|
57 changes: 57 additions & 0 deletions
57
lib/Bio/EnsEMBL/DataCheck/Checks/CheckDuplicatedTaxaNames.pm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2019] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::CheckDuplicatedTaxaNames; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'CheckDuplicatedTaxaNames', | ||
DESCRIPTION => 'Check that the ncbi_taxa_name contains only unique rows', | ||
GROUPS => ['compara'], | ||
DATACHECK_TYPE => 'critical', | ||
DB_TYPES => ['compara'], | ||
TABLES => ['ncbi_taxa_name'] | ||
}; | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
my $dbc = $self->dba->dbc; | ||
|
||
my $sql = qq/ | ||
SELECT taxon_id, name, name_class, count(*) | ||
FROM ncbi_taxa_name | ||
GROUP BY taxon_id, name, name_class | ||
HAVING count(*) > 1; | ||
/; | ||
|
||
my $desc = "All the rows in ncbi_taxa_name are unique"; | ||
|
||
is_rows_zero($dbc, $sql, $desc); | ||
|
||
} | ||
|
||
1; | ||
|
Oops, something went wrong.