/
registry_parser.py
150 lines (111 loc) · 4.24 KB
/
registry_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""
Script to process the registry and transform the mapping sets into Turtle to
upload on OxO2
"""
import argparse
import uuid
from typing import List, Tuple
import yaml
from pyld.jsonld import expand
from rdflib import Graph
from sssom.parsers import parse_sssom_table
from sssom.writers import to_json
from sssom_schema import SSSOM, MappingRegistry, MappingSetReference
def registry_parser(config: str) -> MappingRegistry:
""" Parse registry and return MappingRegistry """
with open(file=config, mode="r", encoding="utf-8") as f:
data = yaml.safe_load(f)
map_set_refs = (
MappingSetReference(
mapping_set_id=mapping["mapping_set_id"],
mapping_set_group=mapping["mapping_set_group"] if mapping.get(
"mapping_set_group"
) else None,
local_name=mapping["local_name"],
)
for mapping in data["mapping_set_references"]
)
return MappingRegistry(
mapping_registry_id=data["mapping_registry_id"],
mapping_registry_title=data["mapping_registry_title"],
mapping_registry_description=data["mapping_registry_description"],
homepage=data["homepage"],
mapping_set_references=list(map_set_refs),
)
def generate_uuid(entry: List) -> Tuple[str, str]:
""" Generate uuid for mappings and mapping sets """
input_concat = "".join(entry)
uu_id = uuid.uuid5(uuid.NAMESPACE_DNS, input_concat)
uu_id = str(uu_id).replace("-", "")
return f"{SSSOM}{uu_id}", uu_id
def update_context(entry: dict) -> dict:
""" Fix context adding type and add uuid to context """
for _, value in entry["@context"].items():
if not isinstance(value, dict):
continue
if not value.get("@type"):
continue
if value["@type"] != "rdfs:Resource":
continue
value["@type"] = "@id"
entry["@context"]["uuid"] = {"@type": "xsd:string"}
return entry
def add_uuid_n_expand_curie(entry: dict) -> dict:
""" Add uuid and expand curie to mappings """
entry["@id"], entry["uuid"] = generate_uuid([entry["mapping_set_id"]])
if not entry.get("mappings"):
return entry
context = get_context(entry)
for mapping in entry["mappings"]:
mapping_key = [
mapping["subject_id"],
mapping["predicate_id"],
mapping["object_id"],
mapping["mapping_justification"],
]
mapping["@id"], mapping["uuid"] = generate_uuid(mapping_key)
mapping["@type"] = "Mapping"
mapping["subject_id"] = expand_curie(mapping["subject_id"], context)
mapping["object_id"] = expand_curie(mapping["object_id"], context)
# Add default confidence 1 for each mapping if no confidence available
if not mapping.get("confidence"):
mapping["confidence"] = 1.0
return entry
def get_context(entry: dict) -> dict:
""" Get context """
return entry["@context"]
def expand_curie(curie, context):
""" Expand curie """
namespace = curie.split(":")[0]
if "http" in namespace:
return curie
return curie.replace(f"{namespace}:", context[f"{namespace}"])
def read_mappings(config: str):
""" Transform to ttl all mapping sets listed in the registry """
registry = registry_parser(config)
for _, mapping_set_ref in registry.mapping_set_references.items():
print(f"Parsing mapping_set_id {mapping_set_ref.mapping_set_id}")
mapping_json = update_context(
add_uuid_n_expand_curie(
to_json(
parse_sssom_table(
f"mappings/{mapping_set_ref.local_name}"
)
)
)
)
context = get_context(mapping_json)
g = Graph()
g.parse(data={"@graph": expand(mapping_json, None)}, format="json-ld")
g.parse(data={"@context": context}, format="json-ld")
g.serialize(
f"mappings/ttl/{mapping_set_ref.local_name}.ttl", format="turtle"
)
def main(entry: dict):
""" Main """
read_mappings(entry.registry)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("registry", help="registry file with mappings")
args = parser.parse_args()
main(args)