Skip to content

Commit

Permalink
Convert data and manifest models from yaml to json
Browse files Browse the repository at this point in the history
  • Loading branch information
jacoblurye committed May 9, 2019
1 parent 7838b59 commit 887b870
Show file tree
Hide file tree
Showing 38 changed files with 1,569 additions and 907 deletions.
52 changes: 38 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,62 @@ This repository contains formal defintions of the CIDC metadata model using [jso
## View documentation at https://cimac-cidc.github.io/cidc-schemas/

# Running tests
This repository has unit tests in the *tests* folder. After installing dependencies

This repository has unit tests in the _tests_ folder. After installing dependencies
the tests can be run via the command

```bash
py.tests --cache-clear tests
py.test --cache-clear tests
```

# Building documentation

The documentation can be built by running the following command

```bash
python bin/generate_docs.py
```
This will create the html documents in /docs. If the changes are comitted and pushed

This will create the html documents in /docs. If the changes are comitted and pushed
to master this they will be viewable at https://cimac-cidc.github.io/cidc-schemas/

# Create template for manifest
# Using the Command-Line Interface

1) clone repository
`git clone THIS-REPO`
## Install the CLI

Clone the repository and cd into it

2) create virtual environment
```bash
virtualenv ENV-NAME
. ENV-NAME/bin/activate
cd schemas
git clone git@github.com:CIMAC-CIDC/cidc-schemas.git
cd cidc-schemas
```

3) install dependencies
Install the `cidc_schemas` package (this adds the `cidc_schemas` CLI to your console)

```bash
python setup.py install
```
pip install -r requirements.txt

Run `cidc_schemas --help` to see available options.

If you're making changes to the module and want those changes to be reflected in the CLI without reinstalling the `cidc_schemas` module every time, run

```bash
python3 -m cidc_schemas.cli [args]
```

3) Run the script
## Generate templates

Create a template for a given manifest configuration

```bash
cidc_schemas generate_template -m manifests/pbmc.yaml -s schemas -o pbmc.xlsx
```
python bin/create_template.py -y manifest/pbmc.yaml -o $PWD

## Convert between yaml and json

The CLI comes with a little utility for converting between yaml and json files.

```bash
cidc_schemas convert --to_json <some_yaml_file>
```
50 changes: 25 additions & 25 deletions bin/generate_docs.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import yaml
import json
import jinja2

DOC_DIR = "docs"

# Get the Specified YAML File
def get_yaml(file_name):
# Get the Specified JSON File
def get_json(file_name):
with open(file_name, 'r') as stream:
try:
yaml_doc = yaml.safe_load(stream)
return (yaml_doc)
except yaml.YAMLError as exc:
json_doc = json.load(stream)
return (json_doc)
except json.JSONDecodeError as exc:
print(exc)

# Extract Properties
Expand Down Expand Up @@ -47,44 +47,44 @@ def extract_properties(properties, property_dict, required):

# Create HTML for the Specified Entity
def processEntity(entity_name, template_env, property_dict):
file_name = "schemas/%s.yaml" % entity_name
current_yaml = get_yaml(file_name)
file_name = "schemas/%s.json" % entity_name
current_json = get_json(file_name)

# find required properties
req_props = {}
if 'required' in current_yaml:
req_props = current_yaml['required']
if 'required' in current_json:
req_props = current_json['required']

properties = current_yaml["properties"]
properties = current_json["properties"]
extract_properties(properties, property_dict, req_props)
sorted_property_list = sorted(properties)

template = template_env.get_template("entity.html")
output_text = template.render(current_yaml=current_yaml,
output_text = template.render(schema=current_json,
properties=properties,
sorted_property_list=sorted_property_list,
property_dict=property_dict)
print ("Creating: %s.html" % entity_name)
fd = open("%s/%s.html" % (DOC_DIR, entity_name), "w")
fd.write(output_text)
fd.close()
return current_yaml
return current_json

# Create HTML for the Specified Manifest
def processManifest(manifest_name, entity_yaml_set, property_dict, column_descriptions, template_env):
file_name = "manifests/%s.yaml" % manifest_name
current_yaml = get_yaml(file_name)
def processManifest(manifest_name, entity_json_set, property_dict, column_descriptions, template_env):
file_name = "manifests/%s.json" % manifest_name
current_json = get_json(file_name)

template = template_env.get_template("manifest.html")
output_text = template.render(current_yaml=current_yaml,
entity_yaml_set=entity_yaml_set,
output_text = template.render(schema=current_json,
entity_json_set=entity_json_set,
property_dict=property_dict,
column_descriptions=column_descriptions)
print ("Creating: %s.html" % manifest_name)
fd = open("%s/%s.html" % (DOC_DIR, manifest_name), "w")
fd.write(output_text)
fd.close()
return current_yaml
return current_json

def generate_docs():

Expand All @@ -102,9 +102,9 @@ def generate_docs():
entity_list.append("artifact")
entity_list.append("wes_artifact")
entity_list.append("shipping_core")
entity_yaml_set = {}
entity_json_set = {}
for entity in entity_list:
entity_yaml_set[entity] = (processEntity(entity, templateEnv, property_dict))
entity_json_set[entity] = (processEntity(entity, templateEnv, property_dict))

# Create HTML Pages for Each Manifest
column_descriptions = {}
Expand All @@ -114,16 +114,16 @@ def generate_docs():

manifest_list = []
manifest_list.append("pbmc")
manifest_yaml_set = {}
manifest_json_set = {}
for manifest in manifest_list:
manifest_yaml_set[manifest] = processManifest(manifest, entity_yaml_set,
manifest_json_set[manifest] = processManifest(manifest, entity_json_set,
property_dict, column_descriptions, templateEnv)

# Create the Index Page
template = templateEnv.get_template("index.html")
print ("Creating index.html")
outputText = template.render(entity_list=entity_list, entity_yaml_set=entity_yaml_set,
manifest_list=manifest_list, manifest_yaml_set=manifest_yaml_set)
outputText = template.render(entity_list=entity_list, entity_json_set=entity_json_set,
manifest_list=manifest_list, manifest_json_set=manifest_json_set)
fd = open("%s/index.html" % DOC_DIR, "w")
fd.write(outputText)
fd.close()
Expand Down
67 changes: 67 additions & 0 deletions cidc_schemas/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import os
import argparse
from . import util
from .manifest import ShippingManifest


def main():
args = interface()
args.func(args)


def interface() -> argparse.Namespace:
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()

# Parser for template generation
generate_parser = subparsers.add_parser(
'generate_template', help='Create shipping manifest excel template from manifest configuration file')
generate_parser.add_argument('-m', '--manifest_file',
help='Path to yaml file containing template configuration', required=True)
generate_parser.add_argument('-s', '--schemas_dir',
help='Path to directory containing data entity schemas', required=True)
generate_parser.add_argument(
'-o', '--out_file', help='Where to write the resulting excel template', required=True)
generate_parser.set_defaults(func=generate_template)

# TODO: complete CLI for validating an excel template
validate_parser = subparsers.add_parser(
'validate_template', help='Validate a populated excel template based on the given configuration files')
validate_parser.set_defaults(func=validate_template)

# Parser for schema file format conversion
conversion_parser = subparsers.add_parser(
'convert', help='Convert a yaml file to a json file, or vice versa')
conversion_parser.add_argument(
'--to_json', help='Path to yaml file to convert to json')
conversion_parser.add_argument(
'--to_yaml', help='Path to json file to convert to yaml')
conversion_parser.set_defaults(func=convert)

return parser.parse_args()


def generate_template(args: argparse.Namespace):
schemas_dir = os.path.abspath(args.schemas_dir)
schema_paths = [os.path.join(schemas_dir, path)
for path in os.listdir(schemas_dir)]
ShippingManifest.from_json(
args.manifest_file, schema_paths).to_excel(args.out_file)


def validate_template(args: argparse.Namespace):
# TODO: call validation module
pass


def convert(args: argparse.Namespace):
if args.to_json:
json_file = util.yaml_to_json(args.to_json)
print(f'Wrote {args.to_json} as json to {json_file}')
elif args.to_yaml:
yaml_file = util.json_to_yaml(args.to_yaml)
print(f'Wrote {args.to_yaml} as yaml to {yaml_file}')


if __name__ == '__main__':
main()
Loading

0 comments on commit 887b870

Please sign in to comment.