Skip to content

Commit

Permalink
Merge pull request #272 from Ensembl/feature/pipeline_xref_datacheck
Browse files Browse the repository at this point in the history
Create new datacheck groups for xref-related pipelines.
  • Loading branch information
james-monkeyshines committed Jul 10, 2020
2 parents 8326bae + ea3cc84 commit 291776e
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 17 deletions.
2 changes: 1 addition & 1 deletion lib/Bio/EnsEMBL/DataCheck/Checks/CompareGOXref.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'CompareGOXref',
DESCRIPTION => 'Compare GO xref counts between two databases, categorised by source',
GROUPS => ['compare_core', 'xref'],
GROUPS => ['compare_core', 'xref', 'xref_go_projection'],
DATACHECK_TYPE => 'advisory',
DB_TYPES => ['core']
};
Expand Down
4 changes: 2 additions & 2 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CompareProjectedGOXrefs.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'CompareProjectedGOXrefs',
DESCRIPTION => 'Compare GO xref counts between two databases, categorised by source coming from the info_type',
GROUPS => ['compare_core', 'xref'],
GROUPS => ['compare_core', 'xref', 'xref_go_projection'],
DATACHECK_TYPE => 'advisory',
DB_TYPES => ['core'],
TABLES => ['xref']
Expand All @@ -52,7 +52,7 @@ sub go_xref_counts {
my ($self, $old_dba) = @_;

my $minimum_count = 500;
my $threshold = 0.80;
my $threshold = 0.66;

my $desc = "Consistent GO xref counts between ".
$self->dba->dbc->dbname.
Expand Down
4 changes: 2 additions & 2 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CompareProjectedGeneNames.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'CompareProjectedGeneNames',
DESCRIPTION => 'Compare Projected Gene Name counts between two databases',
GROUPS => ['compare_core', 'xref'],
GROUPS => ['compare_core', 'xref', 'xref_name_projection'],
DATACHECK_TYPE => 'advisory',
DB_TYPES => ['core'],
TABLES => ['xref','gene','object_xref','seq_region','coord_system']
Expand All @@ -51,7 +51,7 @@ sub tests {
sub projected_gene_name_counts {
my ($self, $old_dba) = @_;

my $threshold = 0.80;
my $threshold = 0.66;

my $desc = "Checking Projected Gene Names between ".
$self->dba->dbc->dbname.
Expand Down
6 changes: 3 additions & 3 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CompareProjectedSynonyms.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'CompareProjectedSynonyms',
DESCRIPTION => 'Compare Projected Synonyms counts between two databases, categorised by db_name coming from the external_db',
GROUPS => ['compare_core', 'xref'],
GROUPS => ['compare_core', 'xref', 'xref_name_projection'],
DATACHECK_TYPE => 'advisory',
DB_TYPES => ['core'],
TABLES => ['xref','external_db','external_synonym','object_xref']
Expand All @@ -51,7 +51,7 @@ sub tests {
sub projected_synonyms_counts {
my ($self, $old_dba) = @_;

my $threshold = 0.80;
my $threshold = 0.66;

my $desc = "Checking Projected Synonyms between ".
$self->dba->dbc->dbname.
Expand All @@ -77,4 +77,4 @@ sub projected_synonyms_counts {
my $sql2 = sprintf($sql, $old_dba->species_id);
row_subtotals($self->dba, $old_dba, $sql1, $sql2, $threshold, $desc);
}
1;
1;
2 changes: 1 addition & 1 deletion lib/Bio/EnsEMBL/DataCheck/Checks/CompareXref.pm
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ sub xref_counts {
/;
my $sql1 = sprintf($sql, $self->dba->species_id);
my $sql2 = sprintf($sql, $old_dba->species_id);
row_subtotals($self->dba, $old_dba, $sql1, $sql2, 0.70, $desc);
row_subtotals($self->dba, $old_dba, $sql1, $sql2, 0.66, $desc);
}

1;
10 changes: 9 additions & 1 deletion lib/Bio/EnsEMBL/DataCheck/Checks/DisplayXrefExists.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,19 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'DisplayXrefExists',
DESCRIPTION => 'At least one gene name exists',
GROUPS => ['core', 'xref'],
GROUPS => ['core', 'xref', 'xref_name_projection'],
DATACHECK_TYPE => 'advisory',
TABLES => ['coord_system', 'gene', 'seq_region', 'transcript', 'xref'],
};

sub skip_tests {
my ($self) = @_;

if ( $self->dba->get_division ne 'vertebrates' ) {
return( 1, "Display xrefs are not typically expected for non-vertebrates" );
}
}

sub tests {
my ($self) = @_;

Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/EnsEMBL/DataCheck/Checks/HGNCMultipleGenes.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'HGNCMultipleGenes',
DESCRIPTION => 'HGNC-derived gene names are not given to multiple genes',
GROUPS => ['core', 'xref'],
GROUPS => ['core', 'xref', 'xref_name_projection'],
DATACHECK_TYPE => 'advisory',
TABLES => ['external_db', 'gene', 'xref']
};
Expand Down
18 changes: 12 additions & 6 deletions lib/Bio/EnsEMBL/DataCheck/index.json
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,8 @@
"description" : "Compare GO xref counts between two databases, categorised by source",
"groups" : [
"compare_core",
"xref"
"xref",
"xref_go_projection"
],
"name" : "CompareGOXref",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::CompareGOXref"
Expand Down Expand Up @@ -658,7 +659,8 @@
"description" : "Compare GO xref counts between two databases, categorised by source coming from the info_type",
"groups" : [
"compare_core",
"xref"
"xref",
"xref_go_projection"
],
"name" : "CompareProjectedGOXrefs",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::CompareProjectedGOXrefs"
Expand All @@ -668,7 +670,8 @@
"description" : "Compare Projected Gene Name counts between two databases",
"groups" : [
"compare_core",
"xref"
"xref",
"xref_name_projection"
],
"name" : "CompareProjectedGeneNames",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::CompareProjectedGeneNames"
Expand All @@ -678,7 +681,8 @@
"description" : "Compare Projected Synonyms counts between two databases, categorised by db_name coming from the external_db",
"groups" : [
"compare_core",
"xref"
"xref",
"xref_name_projection"
],
"name" : "CompareProjectedSynonyms",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::CompareProjectedSynonyms"
Expand Down Expand Up @@ -1002,7 +1006,8 @@
"description" : "At least one gene name exists",
"groups" : [
"core",
"xref"
"xref",
"xref_name_projection"
],
"name" : "DisplayXrefExists",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::DisplayXrefExists"
Expand Down Expand Up @@ -1339,7 +1344,8 @@
"description" : "HGNC-derived gene names are not given to multiple genes",
"groups" : [
"core",
"xref"
"xref",
"xref_name_projection"
],
"name" : "HGNCMultipleGenes",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::HGNCMultipleGenes"
Expand Down

0 comments on commit 291776e

Please sign in to comment.