Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include schemas in the cidc_schemas package #95

Merged
merged 11 commits into from Jul 10, 2019
2 changes: 2 additions & 0 deletions MANIFEST.in
Expand Up @@ -4,6 +4,8 @@ include HISTORY.rst
include LICENSE
include README.rst

graft cidc_schemas/schemas

recursive-include tests *
recursive-exclude * __pycache__
recursive-exclude * *.py[co]
Expand Down
12 changes: 6 additions & 6 deletions README.md
Expand Up @@ -25,11 +25,11 @@ pip install git+https://github.com/cimac-cidc/cidc-schemas
### Project Structure

- **`cidc_schemas/`** - a python module for generating, validating, and reading manifest and assay templates.
- **`schemas/`** - json specifications defining the CIDC metadata model.
- `templates/` - schemas for generating and validating manifest and assay templates.
- `assays/` - schemas defining assay data models
- **`docs/`** - the most recent build of the data model documentation, along with templates and scripts for re-generating the documentation.
- **`template_examples/`** - example populated Excel files for template specifications in `schemas/templates`.
- **`schemas/`** - json specifications defining the CIDC metadata model.
- `templates/` - schemas for generating and validating manifest and assay templates.
- `assays/` - schemas defining assay data models
- **`tests/`** - tests for the `cidc_schemas` module.

### Running tests
Expand Down Expand Up @@ -85,23 +85,23 @@ python3 -m cidc_schemas.cli [args]
Create a template for a given template configuration.

```bash
cidc_schemas generate_template -m schemas/templates/pbmc.json -s schemas -o pbmc.xlsx
cidc_schemas generate_template -m templates/pbmc_template.json -o pbmc.xlsx
```

### Validate filled-out templates

Check that a populated template file is valid with respect to a template specification.

```bash
cidc_schemas validate_template -m schemas/templates/pbmc.json -s schemas -x template_examples/pbmc_template.xlsx
cidc_schemas validate_template -m templates/pbmc_template.json -x template_examples/pbmc_template.xlsx
```

### Validate JSON schemas

Check that a JSON schema conforms to the JSON Schema specifications.

```bash
cidc_schemas validate_schema -s schemas -f schemas/shipping_core.json
cidc_schemas validate_schema -f shipping_core.json
```

### Convert between yaml and json
Expand Down
26 changes: 21 additions & 5 deletions cidc_schemas/cli.py
@@ -1,10 +1,12 @@
import os
import glob
import argparse
from typing import List

from . import util
from .template import Template
from .json_validation import load_and_validate_schema
from .constants import SCHEMA_DIR


def main():
Expand All @@ -19,13 +21,18 @@ def interface() -> argparse.Namespace:
# Print out usage if no subcommands provided
parser.set_defaults(func=lambda _: parser.print_usage(None))

# Option to list available schemas
list_parser = subparsers.add_parser(
'list', help='List all available schemas')
list_parser.set_defaults(func=lambda _: list_schemas())

# Parser for template generation
generate_parser = subparsers.add_parser(
'generate_template', help='Create shipping manifest excel template from manifest configuration file')
generate_parser.add_argument('-m', '--manifest_file',
help='Path to yaml file containing template configuration', required=True)
generate_parser.add_argument('-s', '--schemas_dir',
help='Path to directory containing data entity schemas', required=True)
help='Path to directory containing data entity schemas', required=False)
generate_parser.add_argument(
'-o', '--out_file', help='Where to write the resulting excel template', required=True)
generate_parser.set_defaults(func=generate_template)
Expand All @@ -36,15 +43,15 @@ def interface() -> argparse.Namespace:
template_parser.add_argument('-m', '--manifest_file',
help='Path to yaml file containing template configuration', required=True)
template_parser.add_argument('-s', '--schemas_dir',
help='Path to directory containing data entity schemas', required=True)
help='Path to directory containing data entity schemas')
template_parser.add_argument('-x', '--xlsx_file', )
template_parser.set_defaults(func=validate_template)

# Parser for validation a JSON schema
schema_parser = subparsers.add_parser(
'validate_schema', help='Validate a JSON schema.')
schema_parser.add_argument('-s', '--schemas_dir',
help='Path to the directory containing data entity schemas', required=True)
help='Path to the directory containing data entity schemas')
schema_parser.add_argument('-f', '--schema_file',
help='Path to the schema file to validate', required=True)
schema_parser.set_defaults(func=validate_schema)
Expand All @@ -61,8 +68,17 @@ def interface() -> argparse.Namespace:
return parser.parse_args()


def list_schemas():
for path in glob.glob(f'{SCHEMA_DIR}/**/*.json', recursive=True):
print(path.replace(SCHEMA_DIR + '/', ''))


def get_schemas_dir(schemas_dir) -> str:
return os.path.abspath(schemas_dir) if schemas_dir else SCHEMA_DIR


def build_manifest(args: argparse.Namespace) -> Template:
schemas_dir = os.path.abspath(args.schemas_dir)
schemas_dir = get_schemas_dir(args.schemas_dir)
return Template.from_json(args.manifest_file, schemas_dir)


Expand All @@ -79,7 +95,7 @@ def validate_template(args: argparse.Namespace):


def validate_schema(args: argparse.Namespace):
abs_schemas_dir = os.path.abspath(args.schemas_dir)
abs_schemas_dir = get_schemas_dir(args.schemas_dir)
success = load_and_validate_schema(args.schema_file, abs_schemas_dir)
if success:
print(f'{args.schema_file} is valid')
Expand Down
3 changes: 3 additions & 0 deletions cidc_schemas/constants.py
@@ -0,0 +1,3 @@
import os

SCHEMA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "schemas")
7 changes: 3 additions & 4 deletions cidc_schemas/json_validation.py
Expand Up @@ -10,11 +10,9 @@
import dateparser
import jsonschema

SCHEMA_ROOT = os.path.join(os.path.dirname(
os.path.abspath(__file__)), '..', 'schemas')
from .constants import SCHEMA_DIR


def load_and_validate_schema(schema_path: str, schema_root: str = SCHEMA_ROOT, on_refs: Optional[Callable] = None) -> dict:
def load_and_validate_schema(schema_path: str, schema_root: str = SCHEMA_DIR, on_refs: Optional[Callable] = None) -> dict:
"""
Try to load a valid schema at `schema_path`. If an `on_refs` function is supplied,
call that on all refs in the schema, rather than resolving the refs.
Expand All @@ -23,6 +21,7 @@ def load_and_validate_schema(schema_path: str, schema_root: str = SCHEMA_ROOT, o
schema_root), "schema_root must be an absolute path"

# Load schema with resolved $refs
schema_path = os.path.join(schema_root, schema_path)
with open(schema_path) as schema_file:
base_uri = f'file://{schema_root}/'
json_spec = json.load(schema_file)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
1 change: 0 additions & 1 deletion cidc_schemas/template_reader.py
Expand Up @@ -150,7 +150,6 @@ def validate(self, template: Template) -> bool:
invalid_messages = []

required = template.template_schema.get('required', [])
logger.warning(required)

for name, schema in template.worksheets.items():
errors = self._validate_worksheet(name, schema, required)
Expand Down
2 changes: 1 addition & 1 deletion cidc_schemas/util.py
@@ -1,7 +1,7 @@
import os
import json
import yaml


def yaml_to_json(yaml_path: str) -> str:
"""
Given a path to a yaml file, write the equivalent json
Expand Down
9 changes: 5 additions & 4 deletions docs/generate_docs.py
Expand Up @@ -5,11 +5,12 @@
import jinja2

from cidc_schemas.json_validation import load_and_validate_schema
from cidc_schemas.constants import SCHEMA_DIR


DOCS_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.join(DOCS_DIR, '..')
TEMPLATES_DIR = os.path.join(DOCS_DIR, 'templates')
SCHEMAS_DIR = os.path.join(ROOT_DIR, "schemas")
HTML_DIR = os.path.join(DOCS_DIR, "docs")
PATH_PREFIX = "schemas"

Expand All @@ -21,7 +22,7 @@ def load_schemas() -> dict:
names to loaded and validated entity schemas.
"""
schemas = {}
for root, _, paths in os.walk(SCHEMAS_DIR):
for root, _, paths in os.walk(SCHEMA_DIR):
root_schemas = {}
for path in paths:
schema_path = os.path.join(root, path)
Expand All @@ -35,7 +36,7 @@ def json_to_html(ref):
return {'url': url}

schema = load_and_validate_schema(
schema_path, SCHEMAS_DIR, on_refs=json_to_html)
schema_path, SCHEMA_DIR, on_refs=json_to_html)

schema_path = path.replace(".json", ".html").replace("/", ".")
root_schemas[schema_path] = schema
Expand All @@ -47,7 +48,7 @@ def json_to_html(ref):

def generate_docs(out_directory: str = HTML_DIR):
"""
Generate documentation based on the schemas found in `SCHEMAS_DIR`.
Generate documentation based on the schemas found in `SCHEMA_DIR`.
"""

templateLoader = jinja2.FileSystemLoader(TEMPLATES_DIR)
Expand Down
16 changes: 5 additions & 11 deletions setup.py
Expand Up @@ -3,6 +3,8 @@

"""The setup script."""

import os
import glob
from setuptools import setup, find_packages

with open('README.md') as readme_file:
Expand All @@ -11,11 +13,8 @@
with open('HISTORY.rst') as history_file:
history = history_file.read()

requirements = []

setup_requirements = []

test_requirements = []
with open("requirements.txt") as f:
requirements = f.read().splitlines()

setup(
author="James Lindsay",
Expand All @@ -25,25 +24,20 @@
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
"Programming Language :: Python :: 2",
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
],
description="library for parsing manifest data",
python_requires='>=3.5',
install_requires=requirements,
license="MIT license",
long_description=readme + '\n\n' + history,
include_package_data=True,
keywords='cidc_schemas',
name='cidc_schemas',
packages=find_packages(include=['cidc_schemas']),
setup_requires=setup_requirements,
test_suite='tests',
tests_require=test_requirements,
url='https://github.com/CIMAC-CIDC/schemas',
version='0.1.0',
zip_safe=False,
Expand Down
3 changes: 2 additions & 1 deletion tests/constants.py
@@ -1,8 +1,9 @@
import os

from cidc_schemas.constants import SCHEMA_DIR

TESTS_DIR = os.path.abspath(os.path.dirname(os.path.join(__file__)))
ROOT_DIR = os.path.join(TESTS_DIR, '..')
SCHEMA_DIR = os.path.join(ROOT_DIR, 'schemas')
TEST_DATA_DIR = os.path.join(TESTS_DIR, 'data')
TEST_SCHEMA_DIR = os.path.join(TEST_DATA_DIR, "schemas")
TEMPLATE_EXAMPLES_DIR = os.path.join(ROOT_DIR, 'template_examples')
4 changes: 2 additions & 2 deletions tests/test_generate_docs.py
Expand Up @@ -8,7 +8,7 @@
import pytest
from docs import generate_docs

from .constants import ROOT_DIR
from cidc_schemas.constants import SCHEMA_DIR


def count_files(directory):
Expand All @@ -21,7 +21,7 @@ def test_generate_docs(tmpdir):
generate_docs.generate_docs(tmpdir)

# Count all schemas
num_schemas = count_files(os.path.join(ROOT_DIR, 'schemas'))
num_schemas = count_files(SCHEMA_DIR)

# Count all documentation files
num_docs = count_files(tmpdir)
Expand Down