In [5]:
import jsonschema
import json

def validate_json_file(common_schema_file, object_schema_file, json_data_file):
    """
    Function that, given a common-definitions schema and a specific data schema (both written in JSON),
        validates a third JSON file with the first two. For further details, go to: 
            https://github.com/EbiEga/ega-metadata-schema/tree/main/schemas
    """
    # Load the common-definitions JSON schema
    with open(common_schema_file, "r") as f:
        common_schema = json.load(f)
        
    # Load the specific schema
    with open(object_schema_file, "r") as f:
        object_schema = json.load(f)
        
    # Load the JSON data file
    with open(json_data_file, "r") as f:
        data_json = json.load(f)
    
    # We create the reference dictionary linking $id and $ref between files
    schema_store = {
        common_schema['$id'] : common_schema,
        object_schema['$id'] : object_schema
    }
    resolver = jsonschema.RefResolver.from_schema(common_schema, store=schema_store)
    validator = jsonschema.Draft7Validator(object_schema, resolver=resolver)

    # We try to validate the given JSON data file against the given schemas
    try:
        validator.validate(data_json)
        print(f"- Correctly validated file: {json_data_file}\n")
        
    except jsonschema.exceptions.ValidationError as error_message:
        print(f"-/- Validation error found at file: {json_data_file}")
        print(f"\tCause: {error_message.message}")
        
        
import os

# We iterate over all json files in the current directory and use the above defined
#     function to validate them. 
for testing_file in os.listdir("./"):
    if testing_file.endswith(".json"):
        # We extract the metadata object to use the correct object schema
        object_of_file = testing_file[:testing_file.find("-")]
        validate_json_file(common_schema_file = "../EGA.common-definitions.json", 
                           object_schema_file = f"../EGA.{object_of_file}.json", 
                           json_data_file = testing_file)

-/- Validation error found at file: ArrayAssay-invalid-1.json
	Cause: 23 is not of type 'string'
-/- Validation error found at file: ArrayAssay-invalid-2.json
	Cause: 'c01b39c7a35ccc3b081a3e83d2c71fa9a767ebfeb45c69f08e17dfe3ef375c7b' does not match '^[0-9a-z](?:-?[0-9a-z]){31}$'
-/- Validation error found at file: ArrayAssay-invalid-3.json
	Cause: 'EGAN00000000001' does not match '^EGAA[0-9]{11}$'
-/- Validation error found at file: ArrayAssay-invalid-4.json
	Cause: 'NCIT:C171275' is not one of ['NCIT:C171276', 'NCIT:C80226']
-/- Validation error found at file: ArrayAssay-invalid-5.json
	Cause: '2221-06-24' does not match '^(19|20)[0-9]{2}-(0[0-9]|1[0-2])-([012][0-9]|3[01])$'
-/- Validation error found at file: ArrayAssay-invalid-6.json
	Cause: 'made up archive' is not one of ['ensembl', 'ena', 'pubmed', 'protein', 'nuccore', 'ipg', 'nucleotide', 'structure', 'genome', 'annotinfo', 'assembly', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'gap', 'gapplus', 'grasp