-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
423 additions
and
113 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2019] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::AnalysisDescription; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'AnalysisDescription', | ||
DESCRIPTION => 'Check that certain analyses have descriptions', | ||
GROUPS => ['core'], | ||
DB_TYPES => ['core'], | ||
TABLES => ['analysis', 'analysis_description', 'gene', 'prediction_transcript', 'transcript'] | ||
}; | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
|
||
my $species_id = $self->dba->species_id; | ||
|
||
my @tables = qw/gene transcript prediction_transcript/; | ||
foreach my $table (@tables) { | ||
my $desc = "Analysis descriptions for all ${table}s"; | ||
my $sql = qq/ | ||
SELECT COUNT(*) FROM | ||
$table LEFT OUTER JOIN | ||
analysis_description ad USING (analysis_id) INNER JOIN | ||
seq_region USING (seq_region_id) INNER JOIN | ||
coord_system USING (coord_system_id) | ||
WHERE | ||
ad.analysis_id IS NULL AND | ||
species_id = $species_id | ||
/; | ||
is_rows_zero($self->dba, $sql, $desc); | ||
} | ||
} | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2019] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::DataFilesExist; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use File::Spec::Functions qw/catdir/; | ||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'DataFilesExist', | ||
DESCRIPTION => 'Check that data files are defined in the database, and that the files exist', | ||
GROUPS => ['funcgen'], | ||
DB_TYPES => ['funcgen'], | ||
FORCE => 1 | ||
}; | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
|
||
$self->alignment_has_bigwig(); | ||
$self->segmentation_file_has_bigbed(); | ||
$self->data_files_exist(); | ||
} | ||
|
||
sub alignment_has_bigwig { | ||
my ($self) = @_; | ||
|
||
my $desc = 'Peak-calling alignment files are defined'; | ||
my $diag = 'Missing BIGWIG file'; | ||
my $sql = q/ | ||
SELECT | ||
a.alignment_id, | ||
a.name | ||
FROM | ||
alignment a INNER JOIN | ||
peak_calling pc ON ( | ||
pc.signal_alignment_id = a.alignment_id OR | ||
pc.control_alignment_id = a.alignment_id | ||
) LEFT OUTER JOIN | ||
( | ||
SELECT data_file_id FROM data_file | ||
WHERE | ||
table_name = 'alignment' AND | ||
file_type = 'BIGWIG' | ||
) df ON a.bigwig_file_id = df.data_file_id | ||
WHERE | ||
df.data_file_id IS NULL | ||
/; | ||
is_rows_zero($self->dba, $sql, $desc, $diag); | ||
} | ||
|
||
sub segmentation_file_has_bigbed { | ||
my ($self) = @_; | ||
|
||
my $desc = 'Segmentation files are defined'; | ||
my $diag = 'Missing BIGBED file'; | ||
my $sql = q/ | ||
SELECT | ||
sf.segmentation_file_id, | ||
sf.name | ||
FROM | ||
segmentation_file sf INNER JOIN | ||
regulatory_build rb USING (regulatory_build_id) LEFT OUTER JOIN | ||
( | ||
SELECT table_id FROM data_file | ||
WHERE | ||
table_name = 'segmentation_file' AND | ||
file_type = 'BIGBED' | ||
) df ON sf.segmentation_file_id = df.table_id | ||
WHERE | ||
rb.is_current = 1 AND | ||
df.table_id IS NULL | ||
/; | ||
is_rows_zero($self->dba, $sql, $desc, $diag); | ||
} | ||
|
||
sub data_files_exist { | ||
my ($self) = @_; | ||
|
||
# This path needs to be un-hardcoded from here and put into a config file... | ||
my $data_file_path = '/nfs/panda/ensembl/production/ensemblftp/data_files/'; | ||
my $path = $self->species_assembly_path($data_file_path); | ||
|
||
my $data_file_sql = q/ | ||
SELECT table_name, path FROM data_file | ||
WHERE file_type IN ('BIGWIG', 'BIGBED') | ||
/; | ||
my $helper = $self->dba->dbc->sql_helper; | ||
my $data_files = $helper->execute(-SQL => $data_file_sql); | ||
|
||
my %table_names; | ||
my %missing_files; | ||
foreach (@$data_files) { | ||
my $table_name = $_->[0]; | ||
$table_names{$table_name}++; | ||
|
||
# Don't need to check for undef $file value, db schema doesn't allow it. | ||
my $file = $_->[1]; | ||
my $data_file = catdir($path, $file); | ||
if (! -e $data_file) { | ||
push @{$missing_files{$table_name}}, $data_file; | ||
} | ||
} | ||
|
||
foreach my $table_name (keys %table_names) { | ||
my $desc = "All $table_name data files exist"; | ||
ok(!exists($missing_files{$table_name}), $desc); #|| | ||
#diag explain $missing_files{$table_name}; | ||
} | ||
} | ||
|
||
sub species_assembly_path { | ||
my ($self, $data_file_path) = @_; | ||
|
||
my $species = $self->species; | ||
my $core_dba = $self->get_dna_dba; | ||
my $meta = $core_dba->get_MetaContainer; | ||
my $assembly_default = $meta->single_value_by_key('assembly.default'); | ||
|
||
return catdir($data_file_path, $species, $assembly_default); | ||
} | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
65 changes: 65 additions & 0 deletions
65
lib/Bio/EnsEMBL/DataCheck/Checks/FuncgenAnalysisDescription.pm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
=head1 LICENSE | ||
Copyright [2018-2019] EMBL-European Bioinformatics Institute | ||
Licensed under the Apache License, Version 2.0 (the 'License'); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an 'AS IS' BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
=cut | ||
|
||
package Bio::EnsEMBL::DataCheck::Checks::FuncgenAnalysisDescription; | ||
|
||
use warnings; | ||
use strict; | ||
|
||
use Moose; | ||
use Test::More; | ||
use Bio::EnsEMBL::DataCheck::Test::DataCheck; | ||
|
||
extends 'Bio::EnsEMBL::DataCheck::DbCheck'; | ||
|
||
use constant { | ||
NAME => 'FuncgenAnalysisDescription', | ||
DESCRIPTION => 'Check that certain features have descriptions and are displayable', | ||
GROUPS => ['funcgen'], | ||
DB_TYPES => ['funcgen'], | ||
TABLES => ['analysis', 'analysis_description', 'feature_set', 'probe_feature'] | ||
}; | ||
|
||
sub tests { | ||
my ($self) = @_; | ||
|
||
my @tables = qw/feature_set probe_feature/; | ||
foreach my $table (@tables) { | ||
my $desc_1 = "Analysis descriptions for all ${table}s"; | ||
my $sql_1 = qq/ | ||
SELECT COUNT(*) FROM | ||
$table LEFT OUTER JOIN | ||
analysis_description ad USING (analysis_id) | ||
WHERE | ||
ad.analysis_id IS NULL | ||
/; | ||
is_rows_zero($self->dba, $sql_1, $desc_1); | ||
|
||
my $desc_2 = "Displayable analysis for all ${table}s"; | ||
my $sql_2 = qq/ | ||
SELECT COUNT(*) FROM | ||
$table INNER JOIN | ||
analysis_description ad USING (analysis_id) | ||
WHERE | ||
ad.analysis_id.displayable = 0 | ||
/; | ||
is_rows_zero($self->dba, $sql_2, $desc_2); | ||
} | ||
} | ||
|
||
1; |
Oops, something went wrong.