From 33eb104d19146522a3b409c10f9f2e39936d23f8 Mon Sep 17 00:00:00 2001 From: Emerson Rocha Date: Sun, 7 Mar 2021 16:49:33 -0300 Subject: [PATCH] hxlm (#11), urnresolver (#13): drafted concept of 'urnref' (something like when have several sources or URNs, allow urnresolver filter sources (at first just use file names) --- hxlm/core/bin/urnresolver.py | 57 ++++++++++++++++++++++-------- hxlm/core/schema/urn/util.py | 26 ++++++++++++-- tests/test_core_bin_urnresolver.py | 25 +++++++------ 3 files changed, 82 insertions(+), 26 deletions(-) diff --git a/hxlm/core/bin/urnresolver.py b/hxlm/core/bin/urnresolver.py index a6c2027..6b871e0 100755 --- a/hxlm/core/bin/urnresolver.py +++ b/hxlm/core/bin/urnresolver.py @@ -67,6 +67,11 @@ # ├── attribute.csv # └── hashtag.csv +# The data: +# ~/.local/var/hxlm/data +# The default place for all individual URNs (excluding the index one) +# ~/.config/hxlm/urn + import sys import os import logging @@ -81,7 +86,7 @@ import hxl.io import hxlm.core.htype.urn as HUrn -import hxlm.core.schema.urn.util as HUrnUtil +# import hxlm.core.schema.urn.util as HUrnUtil # @see https://github.com/hugapi/hug # pip3 install hug --upgrade @@ -143,25 +148,49 @@ def execute_cli(self, args, stdin=STDIN, stdout=sys.stdout, stderr=sys.stderr): """ The execute_cli is the main entrypoint of URNResolver. When - called will convert the HXL source to example format. + called will try to convert the URN to an valid IRI. """ + # Test commands: + # urnresolver --debug urn:data:xz:hxl:standard:core:hashtag + # urnresolver urn:data:xz:hxl:standard:core:hashtag + # --urn-file tests/urnresolver/all-in-same-dir/ + + if 'debug' in args and args.debug: + print('DEBUG: CLI args [[', args, ']]') + # print('args', args) - urn_item = HUrn.cast_urn(urn=args.infile) + + urn_string = args.infile + + urn_item = HUrn.cast_urn(urn=urn_string) urn_item.prepare() + if 'urn_file' in args and len(args.urn_file) > 0: + print('TODO: try load default configurations') + if 'debug' in args and args.debug: - # valt = HUrnUtil.get_urn_vault_local_info('un', 'locode') - HUrnUtil.debug_local_data('un', 'locode') - HUrnUtil.get_urn_vault_local_info(urn_item) - - # print('valt', valt) - print('args', args) - print('args.infile', args.infile) - print('urn_item', urn_item) - print('about', urn_item.about()) - print('about base_paths', urn_item.about('base_paths')) - print('about object_names', urn_item.about('object_names')) + print('DEBUG: urn_item [[', urn_item, ']]') + print('DEBUG: urn_item.about() [[', urn_item.about(), ']]') + print('DEBUG: urn_item.about(base_paths) [[', + urn_item.about('base_paths'), ']]') + print('DEBUG: urn_item.about(object_names) [[', + urn_item.about('object_names'), ']]') + + # if 'debug' in args and args.debug: + # # valt = HUrnUtil.get_urn_vault_local_info('un', 'locode') + # # HUrnUtil.debug_local_data('un', 'locode') + # # HUrnUtil.get_urn_vault_local_info(urn_item) + + # # print('valt', valt) + # # print('args', args) + # print('args.infile', args.infile) + # print('urn_item', urn_item) + # print('about', urn_item.about()) + # print('about base_paths', urn_item.about('base_paths')) + # print('about object_names', urn_item.about('object_names')) + + stderr.write('ERROR: urn [' + urn_string + '] strict match not found') print(urn_item.get_resources()) diff --git a/hxlm/core/schema/urn/util.py b/hxlm/core/schema/urn/util.py index a35858e..748ed4d 100644 --- a/hxlm/core/schema/urn/util.py +++ b/hxlm/core/schema/urn/util.py @@ -139,6 +139,11 @@ def get_urn_resolver_from_csv(urn_file: str, # (aka lazy), but wrong (Emerson Rocha, 2021-03-07 17:25) result = [] + + # print('ttt', Path(urn_file).name) + urnref = Path(urn_file).name + # raise Exception(Path(urn_file).name) + with open(urn_file, 'r') as open_urn_file: csvreader = csv.reader(open_urn_file, delimiter=delimiter) for row in csvreader: @@ -151,7 +156,8 @@ def get_urn_resolver_from_csv(urn_file: str, item = { 'urn': row[0], # 'source_remote': row[1] - 'source': [row[1]] + 'source': [row[1]], + 'urnref': urnref } result.append(item) @@ -173,8 +179,18 @@ def get_urn_resolver_from_json(urn_file: str) -> List[dict]: Returns: List[dict]: the resolver list of dictionaries to parse """ + + # To help filter with several sources of URNs, we add an urnref to allow + # filter on steps + urnref = Path(urn_file).name + with open(urn_file, "r") as read_file: data = json.load(read_file) + + for item in data: + if 'urnref' not in item: + item['urnref'] = urnref + return data # print('data', type(data), data) @@ -194,11 +210,17 @@ def get_urn_resolver_from_yml(urn_file: str) -> List[dict]: List[dict]: the resolver list of dictionaries to parse """ - # print('get_urn_resolver_from_yml') + # To help filter with several sources of URNs, we add an urnref to allow + # filter on steps + urnref = Path(urn_file).name with open(urn_file, "r") as read_file: data = yaml.safe_load(read_file) # print('get_urn_resolver_from_yml data', data) + + for item in data: + if 'urnref' not in item: + item['urnref'] = urnref return data diff --git a/tests/test_core_bin_urnresolver.py b/tests/test_core_bin_urnresolver.py index acebbf9..babcfa4 100755 --- a/tests/test_core_bin_urnresolver.py +++ b/tests/test_core_bin_urnresolver.py @@ -6,34 +6,39 @@ # To test directly # pytest -vv ./tests/test_core_bin_urnresolver.py -import os -import pathlib -TESTDIR = str(pathlib.Path(__file__).parent.absolute()) + '/urnresolver' -TEST_SIG_A = 'urn:data:xz:hxl:standard:core:hashtag' - from hxlm.core.schema.urn.util import ( get_urn_resolver_from_csv, get_urn_resolver_from_json, get_urn_resolver_from_yml, # get_urn_resolver_local, ) +import os +import pathlib +TESTDIR = str(pathlib.Path(__file__).parent.absolute()) + '/urnresolver' +TEST_SIG_A = 'urn:data:xz:hxl:standard:core:hashtag' + def test_core_bin_urnresolver_json(): result = get_urn_resolver_from_json(TESTDIR + '/json/urn.json') - # print('result', result) + print('result', result) print('result urnnnn', result[0]['urn']) - assert result[0]['urn'] ==TEST_SIG_A + assert result[0]['urn'] == TEST_SIG_A + assert result[0]['urnref'] == "urn.json" + def test_core_bin_urnresolver_csv(): result = get_urn_resolver_from_csv(TESTDIR + '/csv/urn.csv') # print('result', result) - assert result[0]['urn'] ==TEST_SIG_A + assert result[0]['urn'] == TEST_SIG_A + assert result[0]['urnref'] == "urn.csv" + def test_core_bin_urnresolver_yml(): result = get_urn_resolver_from_yml(TESTDIR + '/yml/urn.yml') # print('result', result) - assert result[0]['urn'] ==TEST_SIG_A + assert result[0]['urn'] == TEST_SIG_A + assert result[0]['urnref'] == "urn.yml" test_core_bin_urnresolver_json() -test_core_bin_urnresolver_yml() \ No newline at end of file +test_core_bin_urnresolver_yml()