Skip to content

Commit

Permalink
Health check DescriptionNewlines converted to datacheck
Browse files Browse the repository at this point in the history
  • Loading branch information
Vinay Kaikala committed Mar 17, 2020
1 parent 63d968e commit 10da1c5
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 8 deletions.
52 changes: 52 additions & 0 deletions lib/Bio/EnsEMBL/DataCheck/Checks/DescriptionNewlines.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
=head1 LICENSE
Copyright [2018-2020] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the 'License');
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut

package Bio::EnsEMBL::DataCheck::Checks::DescriptionNewlines;

use warnings;
use strict;

use Moose;
use Test::More;
use Bio::EnsEMBL::DataCheck::Test::DataCheck;

extends 'Bio::EnsEMBL::DataCheck::DbCheck';

use constant {
NAME => 'DescriptionNewlines',
DESCRIPTION => 'Check for newlines and tabs in gene descriptions',
GROUPS => ['xref'],
DATACHECK_TYPE => 'critical',
TABLES => ['gene']
};

sub tests {
my ($self) = @_;

my $desc_1 = 'gene description does not contain newlines and/or tabs';
my $sql_1 = qq/
SELECT COUNT(*)
FROM gene
WHERE (LOCATE('\n', description) > 0 OR LOCATE('\t', description) > 0)
/;

is_rows_zero($self->dba, $sql_1, $desc_1);
}

1;

33 changes: 25 additions & 8 deletions lib/Bio/EnsEMBL/DataCheck/index.json
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,15 @@
"name" : "DensitySNPs",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::DensitySNPs"
},
"DescriptionNewlines" : {
"datacheck_type" : "critical",
"description" : "Check for newlines and tabs in gene descriptions",
"groups" : [
"xref"
],
"name" : "DescriptionNewlines",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::DescriptionNewlines"
},
"DisplayableGenes" : {
"datacheck_type" : "advisory",
"description" : "Genes are displayable and have web_data attached to their analysis",
Expand Down Expand Up @@ -944,7 +953,8 @@
"datacheck_type" : "critical",
"description" : "Xrefs have been added twice with different descriptions or versions",
"groups" : [
"xref"
"xref",
"core"
],
"name" : "DuplicateXref",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::DuplicateXref"
Expand Down Expand Up @@ -1090,7 +1100,8 @@
"datacheck_type" : "critical",
"description" : "All GO xrefs have an evidence",
"groups" : [
"xref"
"xref",
"core"
],
"name" : "GOXrefEvidence",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::GOXrefEvidence"
Expand Down Expand Up @@ -1671,7 +1682,8 @@
"datacheck_type" : "critical",
"description" : "Protein coding gene/transcript display xrefs are not shared between species inside a collection. This can lead to species-specific synonyms being applied to the wrong species",
"groups" : [
"xref"
"xref",
"core"
],
"name" : "SharedDisplayXref",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::SharedDisplayXref"
Expand Down Expand Up @@ -1747,7 +1759,8 @@
"datacheck_type" : "critical",
"description" : "Genes/Transcript display_xref does not have display_label set as stable_id",
"groups" : [
"xref"
"xref",
"core"
],
"name" : "StableIdDisplayXref",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::StableIdDisplayXref"
Expand Down Expand Up @@ -1786,7 +1799,8 @@
"datacheck_type" : "critical",
"description" : "Transcripts do not have a display xref with a -20* suffix. These are created by the non-vert Xref pipeline unless a flag is enabled: http://www.ebi.ac.uk/seqdb/confluence/display/EnsGen/Xref+mapping#Xrefmapping-CustomisingXrefMapping(DisplayXrefs)",
"groups" : [
"xref"
"xref",
"core"
],
"name" : "TranscriptDisplayXrefSuffix",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::TranscriptDisplayXrefSuffix"
Expand All @@ -1804,7 +1818,8 @@
"datacheck_type" : "critical",
"description" : "Gene display xrefs are only attached to UniProtKB Gene Names (Uniprot_gn)",
"groups" : [
"xref"
"xref",
"core"
],
"name" : "UniProtDisplayXref",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::UniProtDisplayXref"
Expand All @@ -1824,7 +1839,8 @@
"datacheck_type" : "critical",
"description" : "Uniprot xrefs do not have Unreviewed as their primary DB accession",
"groups" : [
"xref"
"xref",
"core"
],
"name" : "UnreviewedXrefs",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::UnreviewedXrefs"
Expand Down Expand Up @@ -1946,7 +1962,8 @@
"datacheck_type" : "critical",
"description" : "Xrefs do not have HTML markup, non-printing characters, or blank values",
"groups" : [
"xref"
"xref",
"core"
],
"name" : "XrefFormat",
"package_name" : "Bio::EnsEMBL::DataCheck::Checks::XrefFormat"
Expand Down

0 comments on commit 10da1c5

Please sign in to comment.