Skip to content

Commit

Permalink
Merge pull request #305 from Ensembl/feature/non_core_dc_pipeline
Browse files Browse the repository at this point in the history
Handful of fixes and new "core_sync" group
  • Loading branch information
james-monkeyshines committed Sep 18, 2020
2 parents 5b1ce5a + 51acb40 commit 0714850
Show file tree
Hide file tree
Showing 10 changed files with 43 additions and 16 deletions.
2 changes: 2 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/CompareMetaKeys.pm
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ sub tests {
my $desc = "Meta key '$geneset_meta_key' is different between $compare_dbs";
isnt($geneset, $old_geneset, $desc);
}

$old_dba->dbc->disconnect_if_idle();
}
}

Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/EnsEMBL/DataCheck/Checks/CoreTables.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'CoreTables',
DESCRIPTION => 'Requisite core-like tables are identical to those in the core database',
GROUPS => ['corelike'],
GROUPS => ['core_sync', 'corelike'],
DB_TYPES => ['cdna', 'otherfeatures', 'rnaseq'],
TABLES => ['assembly', 'coord_system', 'seq_region'],
FORCE => 1
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/EnsEMBL/DataCheck/Checks/DNAFragCore.pm
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'DNAFragCore',
DESCRIPTION => 'Top-level sequences in the core database match dnafrags in compara database',
GROUPS => ['compara', 'compara_genome_alignments', 'compara_gene_trees', 'compara_syntenies', 'compara_master'],
GROUPS => ['compara', 'compara_gene_trees', 'compara_genome_alignments', 'compara_master', 'compara_syntenies', 'core_sync'],
DATACHECK_TYPE => 'critical',
DB_TYPES => ['compara'],
TABLES => ['dnafrag', 'genome_db']
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/EnsEMBL/DataCheck/Checks/ForeignKeysMultiDB.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'ForeignKeysMultiDB',
DESCRIPTION => 'Foreign key relationships between tables from different databases are not violated',
GROUPS => ['funcgen', 'schema', 'variation'],
GROUPS => ['core_sync', 'funcgen', 'schema', 'variation'],
DB_TYPES => ['funcgen', 'variation'],
FORCE => 1
};
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/EnsEMBL/DataCheck/Checks/GenomeDBCore.pm
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'GenomeDBCore',
DESCRIPTION => 'Species, assembly, and geneset metadata are the same in core and compara databases',
GROUPS => ['compara', 'compara_genome_alignments', 'compara_gene_trees', 'compara_syntenies', 'compara_master'],
GROUPS => ['compara', 'compara_gene_trees', 'compara_genome_alignments', 'compara_master', 'compara_syntenies', 'core_sync'],
DATACHECK_TYPE => 'critical',
DB_TYPES => ['compara'],
TABLES => ['genome_db'],
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/EnsEMBL/DataCheck/Checks/MetaCoord.pm
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'MetaCoord',
DESCRIPTION => 'The meta_coord table is correctly populated',
GROUPS => ['annotation', 'core', 'brc4_core', 'corelike', 'funcgen', 'geneset', 'protein_features', 'variation'],
GROUPS => ['annotation', 'brc4_core', 'core', 'core_sync', 'corelike', 'funcgen', 'geneset', 'protein_features', 'variation'],
DB_TYPES => ['cdna', 'core', 'funcgen', 'otherfeatures', 'rnaseq', 'variation'],
FORCE => 1
};
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/EnsEMBL/DataCheck/Checks/MetaKeyConsistent.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extends 'Bio::EnsEMBL::DataCheck::DbCheck';
use constant {
NAME => 'MetaKeyConsistent',
DESCRIPTION => 'Assembly and species meta keys are identical between core and core-like databases',
GROUPS => ['corelike', 'meta'],
GROUPS => ['core_sync', 'corelike', 'meta'],
DB_TYPES => ['cdna', 'otherfeatures', 'rnaseq'],
TABLES => ['meta'],
FORCE => 1
Expand Down
24 changes: 20 additions & 4 deletions lib/Bio/EnsEMBL/DataCheck/Manager.pm
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,11 @@ sub load_config {
die "Config file does not exist" unless -e $self->config_file;

my $json = path($self->config_file)->slurp;
my %config = %{ JSON->new->decode($json) };
my %config;
eval {
%config = %{ JSON->new->decode($json) };
};
die $self->config_file . " is not a valid json file:\n$@" if $@;

foreach my $key (keys %{$config{'datacheck_params'}}) {
if (!exists $params{$key}) {
Expand Down Expand Up @@ -364,9 +368,21 @@ sub read_history {
my %history = ();

if (-s $self->history_file) {
# slurp gets an exclusive lock on the file before reading it.
my $json = path($self->history_file)->slurp;
%history = %{ JSON->new->decode($json) };
# 'slurp' gets an exclusive lock on the file before reading it.
# But sometimes we get flock problems, if a bunch of datachecks
# are all completing very quickly, so have a brief pause to
# calm things down a bit. That doesn't always work, so give
# it a second go if the first attempt fails.
eval {
sleep(2);
my $json = path($self->history_file)->slurp;
%history = %{ JSON->new->decode($json) };
};
if ($@) {
sleep(2);
my $json = path($self->history_file)->slurp;
%history = %{ JSON->new->decode($json) };
}

foreach (@$datachecks) {
my $name = $_->name;
Expand Down
5 changes: 4 additions & 1 deletion lib/Bio/EnsEMBL/DataCheck/Pipeline/DbDataChecks_conf.pm
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,10 @@ sub pipeline_analyses {
ELSE
['RunDataChecks']
),
'A->1' => ['DataCheckResults'],
'A->1' =>
WHEN('scalar @{#all_dbs#}' =>
['DataCheckResults']
),

},
-rc_name => 'default',
Expand Down
16 changes: 11 additions & 5 deletions lib/Bio/EnsEMBL/DataCheck/index.json
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,7 @@
"datacheck_type" : "critical",
"description" : "Requisite core-like tables are identical to those in the core database",
"groups" : [
"core_sync",
"corelike"
],
"name" : "CoreTables",
Expand Down Expand Up @@ -1001,10 +1002,11 @@
"description" : "Top-level sequences in the core database match dnafrags in compara database",
"groups" : [
"compara",
"compara_genome_alignments",
"compara_gene_trees",
"compara_genome_alignments",
"compara_master",
"compara_syntenies",
"compara_master"
"core_sync"
],
"name" : "DNAFragCore",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::DNAFragCore"
Expand Down Expand Up @@ -1289,6 +1291,7 @@
"datacheck_type" : "critical",
"description" : "Foreign key relationships between tables from different databases are not violated",
"groups" : [
"core_sync",
"funcgen",
"schema",
"variation"
Expand Down Expand Up @@ -1408,10 +1411,11 @@
"description" : "Species, assembly, and geneset metadata are the same in core and compara databases",
"groups" : [
"compara",
"compara_genome_alignments",
"compara_gene_trees",
"compara_genome_alignments",
"compara_master",
"compara_syntenies",
"compara_master"
"core_sync"
],
"name" : "GenomeDBCore",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::GenomeDBCore"
Expand Down Expand Up @@ -1610,8 +1614,9 @@
"description" : "The meta_coord table is correctly populated",
"groups" : [
"annotation",
"core",
"brc4_core",
"core",
"core_sync",
"corelike",
"funcgen",
"geneset",
Expand Down Expand Up @@ -1670,6 +1675,7 @@
"datacheck_type" : "critical",
"description" : "Assembly and species meta keys are identical between core and core-like databases",
"groups" : [
"core_sync",
"corelike",
"meta"
],
Expand Down

0 comments on commit 0714850

Please sign in to comment.