# Summarise MOTBX resources

Convert MOTBX resources (YAML files) to CSV file.

In [1]:
import os
import yaml
from pathlib import Path
from motbxtools import motbxschema
import pprint
pp = pprint.PrettyPrinter(indent = 2)

CWD = Path.cwd()
if CWD.name != "notebooks":
    print("Make sure to run this notebook from the 'notebooks' directory.")

MOTBX_DIR = CWD.parent # home directory of this repository

with open(MOTBX_DIR.joinpath("resources/MOTBX_version.yaml"), "r") as f:
    MOTBX_VERSION = yaml.safe_load(f)

# path to directory where resources YAML file are saved
RESOURCES_DIR = MOTBX_DIR.joinpath("resources/curated")
SUMMARY_DIR = MOTBX_DIR.joinpath("resources/summary")
if not os.path.exists(SUMMARY_DIR):
    os.mkdir(SUMMARY_DIR)
SUMMARY_CSV_LATEST = SUMMARY_DIR.joinpath(
    f"MOTBX_{MOTBX_VERSION['latest']}.csv")
SUMMARY_CSV_PREVIOUS = SUMMARY_DIR.joinpath(
    f"MOTBX_{MOTBX_VERSION['previous'][0]}.csv")
CHANGELOG_CSV = SUMMARY_DIR.joinpath(
    f"changelog_{MOTBX_VERSION['latest']}.csv")

# path to JSON SCHEMA file defining structure of MOTBX resources
SCHEMA_JSON = MOTBX_DIR.joinpath("schema/motbxschema.json")

TEST_RESOURCES_DIR = MOTBX_DIR.joinpath("tests/resources_pass")
TEST_SUMMARY_CSV = MOTBX_DIR.joinpath("tests/resources.csv")

Create YAML file summary for test resources.

In [2]:
test_collection = motbxschema.MotbxCollection(TEST_RESOURCES_DIR, SCHEMA_JSON)
test_collection.summarise(TEST_SUMMARY_CSV, validate=True)

Loading resources for latest MOTBX version | test2.yaml

Create summary for curated resources

In [3]:
motbx_collection = motbxschema.MotbxCollection(RESOURCES_DIR, SCHEMA_JSON)
motbx_collection.summarise(SUMMARY_CSV_LATEST, validate=False,
                           summary_csv_path_old=SUMMARY_CSV_PREVIOUS,
                           changelog_path=CHANGELOG_CSV)

Loading summary of previous MOTBX version  | ID0081
Loading resources for latest MOTBX version | ID0083.yaml

In [5]:
info = motbx_collection.get_info()
pp.pprint(sorted(info["resourceTags"]))
sorted(info[("resourceCategory", "resourceSubcategory")])

[ 'DNA',
  'EATRIS-Plus',
  'ISO standard',
  'PCR',
  'array',
  'biological fluid',
  'biomarker',
  'data standard',
  'database',
  'epigenomics',
  'genomics',
  'guideline',
  'internal quality control (IQC)',
  'mRNA',
  'mass spectrometry',
  'metabolomics',
  'metadata standard',
  'miRNA',
  'minimum information standard',
  'multi-omics',
  'phenotypic data',
  'preclinical study',
  'proficiency testing',
  'proteomics',
  'protocol',
  'reference data set',
  'reference material',
  'registry',
  'scientific publication',
  'sequencing',
  'software application',
  'transcriptomics']


[('Data analysis', 'Computing platforms'),
 ('Data analysis', 'Software applications and workflows'),
 ('Data management and stewardship', 'Data and metadata standards'),
 ('Data management and stewardship', 'Databases and catalogues'),
 ('Data management and stewardship', 'Guidelines and best practices'),
 ('Epigenomics', 'Guidelines and best practices'),
 ('Epigenomics', 'Laboratory protocols and methods'),
 ('Genomics', 'Guidelines and best practices'),
 ('Metabolomics', 'Guidelines and best practices'),
 ('Metabolomics', 'Laboratory protocols and methods'),
 ('Metabolomics', 'Translational research use cases'),
 ('Proteomics', 'Guidelines and best practices'),
 ('Proteomics', 'Laboratory protocols and methods'),
 ('Quality control and assessment', 'Guidelines and best practices'),
 ('Quality control and assessment',
  'Proficiency testing and external quality assessment'),
 ('Quality control and assessment', 'Quality certification'),
 ('Quality control and assessment', 'Reference m