# Summarise MOTBX resources

Convert MOTBX resources (YAML files) to CSV file.

In [49]:
import os
import json
import yaml
import jsonschema
import csv

home_dir = "../"
resource_dir = os.path.join(home_dir, "resources/curated")
schema_path = os.path.join(home_dir, "schema", "motbxschema.json")
test_dir = os.path.join(home_dir, "test")
summary_csv = os.path.join(home_dir, "test", "resources.csv")

In [8]:
# load JSON schema from file
schema = json.load(open(schema_path, "r"))

In [12]:
# explore schema
print(schema["properties"].keys())
print(schema["properties"]["resource"]["properties"].keys())
print(schema["properties"]["resourceMetadata"]["properties"].keys())

dict_keys(['resourceID', 'resource', 'resourceMetadata'])
dict_keys(['resourceCategory', 'resourceSubcategory', 'resourceTitle', 'resourceDescription', 'resourceUrl', 'resourceTags'])
dict_keys(['resourceStatus', 'resourceCurator', 'last_modified', 'resourceLog'])


In [14]:
# define CSV column names
fieldnames = ["resourceID"] + list(schema["properties"]["resource"]["properties"].keys())
print(fieldnames)

['resourceID', 'resourceCategory', 'resourceSubcategory', 'resourceTitle', 'resourceDescription', 'resourceUrl', 'resourceTags']


In [47]:
def flatten_resource(resource, fieldnames, row = {}):
    """Iterate through nested dictionary (from JSON)
    and return dictionary with single level for writing CSV.
    Keep only keys present in fieldnames.
    """
    for k, v in resource.items():
        if type(v) == type(str()) and k in fieldnames:
            row[k] = v
        elif type(v) == type(list()) and k in fieldnames:
            row[k] = ", ".join(v)
        elif type(v) == type(dict()):
            flatten_resource(v, fieldnames, row = row)
    return(row)

In [53]:
# open CSV file
with open(summary_csv, "w", newline = "", encoding = "utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames = fieldnames)
    writer.writeheader()
    
    # iterate through resources
    for root, dirs, files in os.walk(test_dir):
        for name in files: 
            if not name.endswith(".yaml"):
                continue
            print(name)
            
            # load test resource and validate
            with open(os.path.join(root, name), "r") as fp:
                resource = yaml.safe_load(fp)
                jsonschema.validate(resource, schema, format_checker = jsonschema.FormatChecker())
                
                # write to CSV file
                row = flatten_resource(resource, fieldnames)
                writer.writerow(row)

../test\resource.yaml
