diff --git a/csvwlib/converter/ToRDFConverter.py b/csvwlib/converter/ToRDFConverter.py index 74164a5..be66f54 100644 --- a/csvwlib/converter/ToRDFConverter.py +++ b/csvwlib/converter/ToRDFConverter.py @@ -13,7 +13,6 @@ from csvwlib.utils.rdf.RDFGraphUtils import RDFGraphUtils from csvwlib.utils.url.PropertyUrlUtils import PropertyUrlUtils from csvwlib.utils.url.UriTemplateUtils import UriTemplateUtils -from csvwlib.utils.url.ValueUrlUtils import ValueUrlUtils CSVW = Namespace('http://www.w3.org/ns/csvw#') @@ -122,9 +121,9 @@ def _parse_row_data(self, atdm_row, subject, table_metadata, property_url, row_n property_namespace = PropertyUrlUtils.create_namespace(property_url, table_metadata['url']) predicate = self._predicate_node(property_namespace, property_url, col_name) - self._parse_cell_values(values, col_metadata, subject, predicate) + self._parse_cell_values(values, col_metadata, subject, predicate, atdm_row, table_metadata['url']) - def _parse_cell_values(self, values, col_metadata, subject, predicate): + def _parse_cell_values(self, values, col_metadata, subject, predicate, atdm_row, tm_url): """ Parses single cell value, values if 'separator' is present""" if 'ordered' in col_metadata and col_metadata['ordered'] is True and len(values) > 1: next_item = BNode() @@ -140,13 +139,14 @@ def _parse_cell_values(self, values, col_metadata, subject, predicate): self.graph.add((subject, predicate, rdf_list)) else: for value in values: - object_node = self._object_node(value, col_metadata) + object_node = self._object_node(value, col_metadata, atdm_row, tm_url) self.graph.add((subject, predicate, object_node)) @staticmethod - def _object_node(value, col_metadata): + def _object_node(value, col_metadata, atdm_row, tm_url): if 'valueUrl' in col_metadata: - return ValueUrlUtils.create_uri_ref(value, col_metadata['valueUrl']) + return UriTemplateUtils.insert_value_rdf(col_metadata['valueUrl'], atdm_row, col_metadata.get('name'), tm_url) + return ValueUrlUtils.create_uri_ref(value, col_metadata['valueUrl'], col_metadata) else: lang = col_metadata.get('lang') if not datatypeutils.is_compatible_with_datatype(value, col_metadata.get('datatype')): diff --git a/csvwlib/utils/url/UriTemplateUtils.py b/csvwlib/utils/url/UriTemplateUtils.py index e19bfff..b8a18ec 100644 --- a/csvwlib/utils/url/UriTemplateUtils.py +++ b/csvwlib/utils/url/UriTemplateUtils.py @@ -1,5 +1,5 @@ from rdflib import URIRef, Literal - +import re from csvwlib.utils.ATDMUtils import ATDMUtils from csvwlib.utils.json.CommonProperties import CommonProperties @@ -13,6 +13,27 @@ def insert_value_rdf(url, atdm_row, col_name, domain_url): filled_url = UriTemplateUtils.insert_value(url, atdm_row, col_name, domain_url) return URIRef(filled_url) if filled_url.startswith('http') else Literal(filled_url) + @staticmethod + def expand_template(template: str, row: dict) -> str: + """ + Expand a CSVW-style URI template with values from a row dict. + + Args: + template: A template string like "http://ex.org/book#{id}" + row: A dict mapping column names or reserved vars to values. + + Returns: + The expanded string. + """ + def replacer(match): + var = match.group(1) + print('r',row) + if var in row.keys(): + return str(row[var]) + raise KeyError(f"Missing value for template variable '{var}'") + + return re.sub(r"\{([^}]+)\}", replacer, template) + @staticmethod def insert_value(url, atdm_row, col_name, domain_url): """ Inserts value into uri template - between {...} @@ -24,18 +45,16 @@ def insert_value(url, atdm_row, col_name, domain_url): if '{' not in url: return url - key = url[url.find('{') + 1:url.find('}')] - key = key.replace('#', '') - prefix = UriTemplateUtils.prefix(url, '') + cols = { + "_row": str(atdm_row.get('number','')), + "_sourceRow": str(atdm_row.get('url','').rsplit('=').pop()), + "_name": str(col_name) + } - if key == '_row': - return prefix + str(atdm_row['number']) - elif key == '_sourceRow': - return prefix + atdm_row['url'].rsplit('=')[1] - elif key == '_name': - return prefix + col_name - else: - return prefix + ATDMUtils.column_value(atdm_row, key) + for k, v in atdm_row['cells'].items(): + cols[k] = str(v[0]) + + return UriTemplateUtils.expand_template(url, cols) @staticmethod def expand(url, domain_url): diff --git a/csvwlib/utils/url/ValueUrlUtils.py b/csvwlib/utils/url/ValueUrlUtils.py deleted file mode 100644 index 841511b..0000000 --- a/csvwlib/utils/url/ValueUrlUtils.py +++ /dev/null @@ -1,21 +0,0 @@ -from rdflib import URIRef - -from csvwlib.utils.json.CommonProperties import CommonProperties -from csvwlib.utils.rdf.Namespaces import Namespaces - - -class ValueUrlUtils: - - @staticmethod - def create_uri_ref(value, value_url): - if CommonProperties.is_common_property(value_url): - return Namespaces.get_term(value_url) - if '{' not in value_url: - return URIRef(value_url) - sufix = '' - key = value_url[value_url.find('{') + 1:value_url.find('}')] - domain = value_url[:value_url.find('{')] - if key.startswith('#'): - sufix = '#' - - return URIRef(domain + sufix + value)