Skip to content

Commit

Permalink
Merge 0ae92a1 into 783236e
Browse files Browse the repository at this point in the history
  • Loading branch information
CristiGuijarro committed Jan 21, 2020
2 parents 783236e + 0ae92a1 commit b2c8643
Show file tree
Hide file tree
Showing 23 changed files with 1,703 additions and 1 deletion.
62 changes: 62 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/AlignmentCoordinates.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
=head1 LICENSE
Copyright [2018-2019] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the 'License');
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut

package Bio::EnsEMBL::DataCheck::Checks::AlignmentCoordinates;

use warnings;
use strict;

use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

use constant {
NAME => 'AlignmentCoordinates',
DESCRIPTION => 'Alignment coordinates are within the length of their dnafrag',
DATACHECK_TYPE => 'critical',
GROUPS => ['compara', 'compara_pairwise_alignments', 'compara_multiple_alignments'],
DB_TYPES => ['compara'],
TABLES => ['dnafrag', 'genomic_align']
};

sub tests {
my ($self) = @_;

my $desc_1 = "All dnafrag_starts are >= 1";
my $sql_1 = q/
SELECT *
FROM genomic_align
WHERE dnafrag_start < 1
/;
is_rows_zero($self->dba, $sql_1, $desc_1);

my $desc_2 = "Alignment coordinates are within the length of their dnafrag";
my $sql_2 = q/
SELECT *
FROM genomic_align ga
JOIN dnafrag df
USING (dnafrag_id)
WHERE ga.dnafrag_end > length
/;
is_rows_zero($self->dba, $sql_2, $desc_2);
}

1;

49 changes: 49 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CheckCAFETable.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
=head1 LICENSE
Copyright [2018-2019] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the 'License');
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut

package Bio::EnsEMBL::DataCheck::Checks::CheckCAFETable;

use warnings;
use strict;

use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

use constant {
NAME => 'CheckCAFETable',
DESCRIPTION => 'Each row should show a one-to-many relationship',
GROUPS => ['compara', 'compara_protein_trees'],
DATACHECK_TYPE => 'critical',
DB_TYPES => ['compara'],
TABLES => ['cafe_species_gene']
};

sub tests {
my ($self) = @_;
my $dba = $self->dba;

my $desc = "All the rows in CAFE_species_gene have a one-to-many relationship for cafe_gene_family_id";

is_one_to_many($dba->dbc, "CAFE_species_gene", "cafe_gene_family_id", $desc);
}

1;

61 changes: 61 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CheckComparaStableIDs.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
=head1 LICENSE
Copyright [2018-2019] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the 'License');
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut

package Bio::EnsEMBL::DataCheck::Checks::CheckComparaStableIDs;

use warnings;
use strict;

use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

use constant {
NAME => 'CheckComparaStableIDs',
DESCRIPTION => 'gene trees in gene_tree_root and family all have stable_ids generated',
GROUPS => ['compara', 'compara_families', 'compara_protein_trees'],
DATACHECK_TYPE => 'critical',
TABLES => ['family', 'gene_tree_root']
};

sub tests {
my ($self) = @_;
my $desc_1 = "There are no NULL stable_ids in family";
my $sql_1 = q/
SELECT *
FROM family
WHERE stable_id IS NULL
/;
is_rows_zero($self->dba, $sql_1, $desc_1);

my $desc_2 = "There are no NULL stable_ids for gene trees in gene_tree_root";
my $sql_2 = q/
SELECT * FROM gene_tree_root
WHERE member_type = 'protein'
AND tree_type = 'tree'
AND clusterset_id='default'
AND stable_id IS NULL
/;
is_rows_zero($self->dba, $sql_2, $desc_2);

}

1;

73 changes: 73 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CheckConservationScore.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
=head1 LICENSE
Copyright [2018-2019] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the 'License');
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut

package Bio::EnsEMBL::DataCheck::Checks::CheckConservationScore;

use warnings;
use strict;

use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

use constant {
NAME => 'CheckConservationScore',
DESCRIPTION => 'The MLSS for GERP_CONSERVATION_SCORE should have conservation score entries',
GROUPS => ['compara', 'compara_pairwise_alignments'],
DATACHECK_TYPE => 'critical',
DB_TYPES => ['compara'],
TABLES => ['conservation_score', 'genomic_align_block', 'method_link', 'method_link_species_set', 'method_link_species_set_tag']
};

sub tests {
my ($self) = @_;
my $dba = $self->dba;
my $mlss_adap = $dba->get_MethodLinkSpeciesSetAdaptor;
my $mlss = $mlss_adap->fetch_all_by_method_link_type("GERP_CONSERVATION_SCORE");
my $helper = $dba->dbc->sql_helper;

foreach my $mlss ( @$mlss ) {
my $mlss_name = $mlss->name;
my $mlss_id = $mlss->dbID;
my $sql_1 = qq/
SELECT value
FROM method_link_species_set_tag
WHERE tag = "msa_mlss_id"
AND method_link_species_set_id = $mlss_id
/;
my $desc_1 = "There is an msa_mlss_id tag for $mlss_name";
my $msa_mlss_id = $helper->execute_single_result( -SQL => $sql_1 );

my $sql_2 = qq/
SELECT COUNT(*)
FROM genomic_align_block
JOIN conservation_score
USING (genomic_align_block_id)
WHERE method_link_species_set_id = $msa_mlss_id
/;

my $desc_2 = "There are conservation scores for multiple alignment mlss_id $msa_mlss_id in $mlss_name";
is_rows_nonzero($dba, $sql_2, $desc_2);

}
}

1;

50 changes: 50 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CheckConstrainedElementTable.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
=head1 LICENSE
Copyright [2018-2019] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the 'License');
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut

package Bio::EnsEMBL::DataCheck::Checks::CheckConstrainedElementTable;

use warnings;
use strict;

use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

use constant {
NAME => 'CheckConstrainedElementTable',
DESCRIPTION => 'Each row should show a one-to-many relationship',
GROUPS => ['compara', 'compara_multiple_alignments'],
DATACHECK_TYPE => 'critical',
DB_TYPES => ['compara'],
TABLES => ['constrained_elements']
};

sub tests {
my ($self) = @_;
my $dba = $self->dba;

my $desc = "All the rows in constrained_element have a one-to-many relationship for constrained_element_id";

is_one_to_many($dba->dbc, "constrained_element", "constrained_element_id", $desc);

}

1;

57 changes: 57 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CheckDuplicatedTaxaNames.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
=head1 LICENSE
Copyright [2018-2019] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the 'License');
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut

package Bio::EnsEMBL::DataCheck::Checks::CheckDuplicatedTaxaNames;

use warnings;
use strict;

use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

use constant {
NAME => 'CheckDuplicatedTaxaNames',
DESCRIPTION => 'Check that the ncbi_taxa_name contains only unique rows',
GROUPS => ['compara'],
DATACHECK_TYPE => 'critical',
DB_TYPES => ['compara'],
TABLES => ['ncbi_taxa_name']
};

sub tests {
my ($self) = @_;
my $dbc = $self->dba->dbc;

my $sql = qq/
SELECT taxon_id, name, name_class, count(*)
FROM ncbi_taxa_name
GROUP BY taxon_id, name, name_class
HAVING count(*) > 1;
/;

my $desc = "All the rows in ncbi_taxa_name are unique";

is_rows_zero($dbc, $sql, $desc);

}

1;

0 comments on commit b2c8643

Please sign in to comment.