# Test base/niagads/validate_medatata

## Test running the metadata validator tool script directly  

> `../../bases/niagads/metadata_validator_tool/core.py`

In [25]:
# imports / helpers

import subprocess
import json

def pretty_print(resultStr):
    resultJson = json.loads(resultStr.replace('running\n', '').replace('\\"', ''))
    print(json.dumps(resultJson, indent=4))
    
CMD_ROOT = ['python', '../../bases/niagads/metadata_validator_tool/core.py']

In [26]:
# print the usage 
result = subprocess.run(CMD_ROOT + ['--help'], capture_output=True, text=True)
print(result.stderr)
print(result.stdout)


running
usage: core.py [-h] [--template TEMPLATE] --metadataFileType
               {BIOSOURCE_PROPERTIES,FILE_MANIFEST} [--log] [--failOnError]
               [--schemaDir SCHEMADIR]
               [--metadataFilePrefix METADATAFILEPREFIX]
               [--metadataFile METADATAFILE] [--schemaFile SCHEMAFILE]
               [--idField IDFIELD]

This script allows the user validate a sample or file manifest metadata file
arranged in tabular format (field names in columns, values in rows) against a
JSON-Schema file. Results are piped to STDOUT unless `--log` option is
specified. This tool accepts tab separated value files (.tab) as well as excel
(.xls, .xlsx) files. This file can also be imported as a module and contains
the following functions / tyes: * MetadataValidatorType - enum of types of
expected metadata files * initialize_validator - returns an initialized
BiosourcePropertiesValidator or FileManifestValidator *
get_templated_schema_file - builds schema file name and verifies t

In [27]:
# file manifest test

schemaFile = './schemas/file_manifest.json'
metadataFile = './examples/test_file_manifest.tab'
args = ['--metadataFileType', 'file_manifest', '--schemaFile', schemaFile, '--metadataFile', metadataFile]
result = subprocess.run(CMD_ROOT + args, capture_output=True, text=True)
print(result.stderr)
print(result.stdout)


running
{"errors": []}



In [28]:
# file manifest with errors test

schemaFile = './schemas/file_manifest.json'
metadataFile = './examples/test_file_manifest_with_errors.tab'
args = ['--metadataFileType', 'file_manifest',  '--schemaFile', schemaFile, '--metadataFile', metadataFile]
result = subprocess.run(CMD_ROOT + args, capture_output=True, text=True)
print(result.stderr)
pretty_print(result.stdout)


{
    "errors": [
        {
            "1": [
                "'c6779ec2960296ed9a8d67f64422' does not match '^[a-fA-F0-9]{32}$'"
            ]
        },
        {
            "6": [
                "required field `sample_id` cannot be empty / null"
            ]
        }
    ]
}


In [29]:
# templated file manifest

schemaDir = 'schemas'
pattern = 'examples/test_'
args = ['--metadataFileType', 'file_manifest', '--template', 'file_manifest', '--schemaDir', schemaDir, '--metadataFilePrefix', pattern]
result = subprocess.run(CMD_ROOT + args, capture_output=True, text=True)
print(result.stderr)
pretty_print(result.stdout)


{
    "errors": []
}


In [30]:
# templated participant info file

schemaDir = 'schemas'
pattern = 'examples/test_'
idField = 'participant_id'
args = ['--metadataFileType', 'biosource_properties', 
    '--template', 'participant_info', 
    '--schemaDir', schemaDir, 
    '--metadataFilePrefix', pattern,
    '--idField', idField
    ]
result = subprocess.run(CMD_ROOT + args, capture_output=True, text=True)
print(result.stderr)
pretty_print(result.stdout)


{
    "errors": []
}


## Test as an imported package

```python
import niagads.metadata_validator_tool.core
```

In [31]:
import niagads.metadata_validator_tool.core as vm

schemaDir = 'schemas'
pattern = 'examples/test_'
idField = 'participant_id'
template = 'participant_info'

# get files from template and path variables
schemaFile = vm.get_templated_schema_file(schemaDir, template)
print(f'Schema File: {schemaFile}')

metadataFile = vm.get_templated_metadata_file(pattern, template)
print(f'Metadata File: {metadataFile}')

# straight run
validationResult = vm.run(metadataFile, schemaFile, 'biosource_properties', idField)
print(f'Straight Run result: {validationResult}')

# get an initialized validator object
validator = vm.initialize_validator(metadataFile, schemaFile, 'biosource_properties', idField)
print(f'Validator type: {type(validator)}')
print(f'Schema: {validator.get_schema()}')
print(f'Parsed Metadata: {json.dumps(validator.get_metadata(), indent=4)}')
print(f'Biosource IDs: {validator.get_biosource_ids()}')
print(f'Race: {validator.get_field_values('race')}')
validationResult = validator.run()
print(f'Validation Result: {validationResult}')

Schema File: schemas/participant_info.json
Metadata File: examples/test_participant_info.tab
Straight Run result: {'errors': []}
Validator type: <class 'niagads.metadata_validator.core.BiosourcePropertiesValidator'>
Schema: schemas/participant_info.json
Parsed Metadata: [
    {
        "participant_id": "DONOR1",
        "cohort": "KNIGHT-ADRC",
        "consent": null,
        "sex": "Male",
        "race": "Asian",
        "ethnicity": "Hispanic or Latino",
        "diagnosis": null,
        "disease": "AD",
        "APOE": null,
        "comment": "clinical diagnosis"
    },
    {
        "participant_id": "DONOR2",
        "cohort": "ROSMAP",
        "consent": null,
        "sex": "Female",
        "race": "White",
        "ethnicity": "Not Hispanic or Latino",
        "diagnosis": null,
        "disease": "AD",
        "APOE": null,
        "comment": null
    },
    {
        "participant_id": "DONOR3",
        "cohort": "KNIGHT-ADRC",
        "consent": null,
        "sex": "Ma

## Test dss-metadata-validator script
`/scripts/dss_metadata_validator.py`

In [None]:
# dss validation
schemaDir = './schemas'
filePrefix = './examples/test_'
cmd = ['python', './scripts/dss_metadata_validator.py']
args = ['--schemaDir', schemaDir, '--metadataFilePrefix', filePrefix]
result = subprocess.run(cmd + args, capture_output=True, text=True)
print(result.stderr)
pretty_print(result.stdout)


{
    "participant_info": {
        "errors": []
    },
    "sample_info": {
        "errors": [
            {
                "invalid_PARTICIPANT_ID": [
                    "DONOR7"
                ]
            }
        ],
            {
                "missing_PARTICIPANT_ID": [
                    "DONOR5"
                ]
            }
        ]
    },
    "file_manifest": {
        "errors": []
    }
}
