Skip to content

Commit

Permalink
Some changes in the validator
Browse files Browse the repository at this point in the history
  • Loading branch information
ypriverol committed Sep 29, 2016
1 parent 5b0a581 commit 55fb32b
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 18 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,6 @@ file, the command should be refined to:

## Support

For any issue or support questions, please contact pride-support@ebi.ac.uk
For any issue or support questions, please contact omicsdi-support@ebi.ac.uk


32 changes: 17 additions & 15 deletions src/main/java/uk/ac/ebi/ddi/xml/validator/cli/validatorCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,8 @@ public static void main(String[] args) throws Exception {

Map<File, List<Tuple>> errors = new HashMap<>();
for(File file: files){

List<Tuple> error = OmicsXMLFile.validateSchema(file);
if(checkValue.equalsIgnoreCase(Utils.WARN)){
error.addAll(OmicsXMLFile.validateSemantic(file));
}
error.addAll(OmicsXMLFile.validateSemantic(file));
if(errors.containsKey(file)){
error.addAll(errors.get(file));
}
Expand All @@ -103,18 +100,20 @@ public static void main(String[] args) throws Exception {
if(!errors.isEmpty()){
if(reportName != null){
PrintStream reportFile = new PrintStream(new File(reportName));
generateSummaryReport(errors,reportFile);
for(File file: errors.keySet()){
generateSummaryReport(errors,reportFile, checkValue);
for(File file: errors.keySet())
for (Tuple error: errors.get(file))
reportFile.println(file.getAbsolutePath() + "\t" + error.getKey() + "\t" + error.getValue());
}
if(checkValue == Utils.WARN || (error.getValue() == Utils.ERROR))
reportFile.println(file.getAbsolutePath() + "\t" + error.getKey() + "\t" + error.getValue());

reportFile.close();
}else{
for(File file: errors.keySet()){
PrintStream reportFile = new PrintStream(new File(file.getAbsolutePath() + ".error.csv"));
generateSummaryReport(errors,reportFile);
generateSummaryReport(errors,reportFile, checkValue);
for (Tuple error: errors.get(file))
reportFile.println(file.getName() + "\t" + error.getKey() + "\t" + error.getValue());
if(checkValue == Utils.WARN || (error.getValue() == Utils.ERROR))
reportFile.println(file.getName() + "\t" + error.getKey() + "\t" + error.getValue());
reportFile.close();
}
}
Expand All @@ -123,7 +122,7 @@ public static void main(String[] args) throws Exception {
}
}

public static void generateSummaryReport(Map<File, List<Tuple>> errors, PrintStream reportFile){
public static void generateSummaryReport(Map<File, List<Tuple>> errors, PrintStream reportFile, String errorLevel){
int numberErrors = 0;
int numberWars = 0;
Map<Field, Integer> fields = new HashMap<>();
Expand All @@ -150,10 +149,13 @@ public static void generateSummaryReport(Map<File, List<Tuple>> errors, PrintStr
Field field = (Field) entry.getKey();
Integer errorNumber = (Integer) entry.getValue();
String error = (field.getType() == FieldType.MANDATORY)? Utils.ERROR:Utils.WARN;
if(field.getCategory() == FieldCategory.DATE)
reportFile.println(error + " The number of datasets without or outdated " + field.getFullName() + " is " + errorNumber);
else
reportFile.println(error + " The number of datasets without " + field.getFullName() + " is " + errorNumber);
if(errorLevel == Utils.WARN || (error == Utils.ERROR)){
if(field.getCategory() == FieldCategory.DATE)
reportFile.println(error + " The number of datasets without or outdated " + field.getFullName() + " is " + errorNumber);
else
reportFile.println(error + " The number of datasets without " + field.getFullName() + " is " + errorNumber);

}
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/main/java/uk/ac/ebi/ddi/xml/validator/utils/Field.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ public enum Field {
SECONDARY_ACCESSION("additional_accession", FieldType.OPTIONAL,FieldCategory.ADDITIONAL, "Secondary Accession"),
ENSEMBL_EXPRESSION_ATLAS("ensembl", FieldType.UNKNOWN, FieldCategory.CROSSREF, "Gene reference to ENSEMBL"),

SUBMITTER_KEYWORDS("submitter_keywords", FieldType.UNKNOWN, FieldCategory.ADDITIONAL, "MODEL");
SUBMITTER_KEYWORDS("submitter_keywords", FieldType.UNKNOWN, FieldCategory.ADDITIONAL, "Submitter Keywords"),
CURATOR_KEYWORDS("curator_keywords", FieldType.UNKNOWN, FieldCategory.ADDITIONAL, "Submitter Keywords");



Expand Down
1 change: 0 additions & 1 deletion src/main/java/uk/ac/ebi/ddi/xml/validator/utils/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import org.apache.log4j.Logger;
import uk.ac.ebi.ddi.xml.validator.parser.model.*;
import uk.ac.ebi.ddi.xml.validator.parser.model.Date;
import uk.ac.ebi.ddi.xml.validator.parser.unmarshaller.OmicsUnmarshallerFactory;

import java.text.ParseException;
import java.text.SimpleDateFormat;
Expand Down
74 changes: 74 additions & 0 deletions src/test/resources/BIOMODELS.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<database>
<name>BioModels Database</name>
<description>BioModels Database is a repository of computational models of biological processes. Models described
from literature are manually curated and enriched with cross-references.
</description>
<release>1</release>
<release_date>2016-08-26</release_date>
<entry_count>1</entry_count>
<entries>
<entry id="MODEL00002243.1"> <!-- uniqueID -->
<name>Thiele2013 - Cerebellum cells in molecular layer</name>
<description>
Thiele2013 - Cerebellum cells in molecular layer The model of cerebellum cells in molecular layer
metabolism is derived from the community-driven global reconstruction of human metabolism
(version 2.02, MODEL1109130000 ).
</description>
<dates>
<date type="submission" value="2016-08-19"/>
<date type="publication" value="2016-08-20"/>
<date type="last_modification" value="2016-08-19"/>
</dates>
<additional_fields>
<field name="submitter">Tung Nguyen</field>
<field name="submitter_mail">tnguyen@ebi.ac.uk</field>
<field name="submitter_affiliation">EMBL - European Bioinformatics Institute</field>
<field name="repository">BioModels</field>
<field name="full_dataset_link">http://wwwdev.ebi.ac.uk/biomodels/jummp-biomodels/model/MODEL00002243.1</field>
<!--<field name="publication">Pub Title + Pub Authors + Pub Abstract + Pub Year</field>-->
<field name="publication">A community-driven global reconstruction of human metabolism,
Thiele I, et al . (2013)
Multiple models of human metabolism have been reconstructed, but each represents only a subset of
our knowledge. Here we describe Recon 2, a community-driven, consensus 'metabolic reconstruction',
which is the most comprehensive representation of human metabolism that is applicable to
computational modeling. Compared with its predecessors, the reconstruction has improved topological
and functional features, including ~2x more reactions and ~1.7x more unique metabolites. Using
Recon 2 we predicted changes in metabolite biomarkers for 49 inborn errors of metabolism with 77%
accuracy when compared to experimental data. Mapping metabolomic data and drug information onto
Recon 2 demonstrates its potential for integrating and analyzing diverse data types. Using protein
expression data, we automatically generated a compendium of 65 cell type-specific models, providing
a basis for manual curation or investigation of cell-specific metabolic properties. Recon 2 will
facilitate many future biomedical studies and is freely available at http://humanmetabolism.org/.
</field>
<field name="disease">Disease name</field>
<!--<field name="omics_type">Models</field>-->
<field name="data_protocol">extracted from Curation comments</field> <!-- Software used -->
<field name="sample_protocol">extracted from Curation comments</field> <!-- Software used -->
<field name="technology_type">Software used</field> <!-- Software used -->

<!-- The additional fields we define ourself -->
<field name="modelFormat">SBML</field>
<field name="submissionId">UNCHECKED</field>
<field name="publicationId">UNCHECKED</field>
<field name="levelVersion">UNCHECKED</field>
<field name="validationStatus">UNCHECKED</field>
<field name="certificationComment">This model is not certified</field>
<field name="elementName">Element Name</field>
<field name="elementID">Element ID</field>
<field name="elementDescription">Element Description</field>
<field name="public">The model is public</field>
<field name="deleted">The model is deleted</field>
<field name="certified">The model is certified</field>
<field name="sbmlSBOTerm">SBML SBO Term</field>
<field name="curators">Curators</field>
<field name="authors">Authors</field>
<field name="derivations">Derivations</field>
<field name="pharmml**">Fields related to PharmML</field>
</additional_fields>
<cross_references>
<ref dbkey="CHEBI:16551" dbname="ChEBI"/>
<ref dbkey="MTBLC30031" dbname="MetaboLights"/>
</cross_references>
</entry>
</entries>
</database>
19 changes: 19 additions & 0 deletions src/test/resources/BIOMODELS.xml.error.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
The number of Errors in the files: 1
The number of Warnings in the files: 7
Warn The number of datasets without Dataset NCBI TAXONOMY is 1
Warn The number of datasets without Dataset Submitter email is 1
Warn The number of datasets without MODEL is 8
Warn The number of datasets without Instrument Platform is 1
Warn The number of datasets without Gene reference to ENSEMBL is 1
Warn The number of datasets without Dataset Pubmed Id is 1
Warn The number of datasets without Secondary Accession is 1
Error The number of datasets without Dataset Omics Type is 1
Warn The number of datasets without MEDLINE Reference is 1
BIOMODELS.xml Warn Entry: MODEL00002243.1 The entry do not contain: Dataset NCBI TAXONOMY
BIOMODELS.xml Warn Entry: MODEL00002243.1 The entry do not contain: Dataset Pubmed Id
BIOMODELS.xml Warn Entry: MODEL00002243.1 The entry do not contain: MEDLINE Reference
BIOMODELS.xml Warn Entry: MODEL00002243.1 The entry do not contain: Gene reference to ENSEMBL
BIOMODELS.xml Error Entry: MODEL00002243.1 The entry do not contain: Dataset Omics Type
BIOMODELS.xml Warn Entry: MODEL00002243.1 The entry do not contain: Dataset Submitter email
BIOMODELS.xml Warn Entry: MODEL00002243.1 The entry do not contain: Instrument Platform
BIOMODELS.xml Warn Entry: MODEL00002243.1 The entry do not contain: Secondary Accession

0 comments on commit 55fb32b

Please sign in to comment.