In [1]:
import yaml
import pandas as pd
import numpy as np

from copy import deepcopy

In [2]:
def create_matrix_entry(name, id=None, unique_name=None, sort_order=0, aggregation_target=False, parent_matrix=None, nl_translation=None):
    d = {"name": name, "sort_order": sort_order, "aggregation_target": aggregation_target}
    if unique_name: d["id"] = unique_name
    if unique_name: d["unique_name"] = unique_name
    if parent_matrix: d["parent_matrix"] = parent_matrix
    if nl_translation: d["translations"] = [{"property_name": "name", "language": "nl-be", "translated_value": nl_translation}]
    return d

d = {}

#df = pd.read_csv("./input/sampletype.csv", sep=';', encoding='mbcs')
df = pd.read_csv("./PARC/sampletype_occupational studies.csv", sep=';', encoding='mbcs')
dl = df.replace({np.nan:None}).to_dict(orient="records")

for e in dl:
    d[e["sampletype_key"]] = create_matrix_entry(
        e["sampletype_key"],
        id = e["sampletype_initials"],
        unique_name = e["sampletype_initials"],
        sort_order = e["sampletype_sort"],
        aggregation_target = False,
        nl_translation = e["sampletype_name_nl"]
    )
    if e["samplegroup_key"] and not(e["samplegroup_key"]==e["sampletype_key"]):
        d[e["sampletype_key"]]["parent_matrix"] = e["samplegroup_key"]

df = pd.read_csv("./input/samplegroup.csv", sep=';', encoding='mbcs')
dl = df.replace({np.nan:None}).to_dict(orient="records")
samplegroup_name_lookup = {e["samplegroup_key"]:e["samplegroup_name_nl"] for e in dl}

for k,v in samplegroup_name_lookup.items():
    if k not in d.keys():
        d[k] = create_matrix_entry(
            k,
            id = k,
            unique_name = k,
            sort_order = 0.1,
            aggregation_target = False,
            nl_translation = v
            )

data = {"matrices": [v for v in d.values() if v]}
with open("MatrixList_data.yaml", "w") as f:
    yaml.dump(data, f)

In [3]:
def create_opg_entry(name, description=None, sort_order=0, is_abstract=False, parent_groups=[]):
    d = {"id": name, "name": name, "is_abstract": is_abstract, "sort_order": sort_order}
    if description: d["description"] = description
    if len(parent_groups): d["parent_groups"] = parent_groups
    return d

df = pd.read_csv("./input/category.csv", sep=';', encoding='mbcs')
dl = df.replace({np.nan:None}).to_dict(orient="records")

keys = sorted(set(list(df["category_key"]) + list(df["category_level0_en"]) + list(df["category_level1_en"]) + list(df["category_level2_en"]) + list(df["category_level3_en"])) - {np.nan})
d = {k:None for k in keys}
for r in dl:
    if d[r["category_level0_en"]] is None:
        d[r["category_level0_en"]] = create_opg_entry(r["category_level0_en"], description=f"Top-level group for {r['category_level0_en']} information", is_abstract=True)
    if d[r["category_level1_en"]] is None:
        d[r["category_level1_en"]] = create_opg_entry(r["category_level1_en"], description=f"1st level group for {r['category_level1_en']} information", sort_order=0.1, is_abstract=True, parent_groups=[r['category_level0_en']])
    elif r["category_level0_en"] not in d[r["category_level1_en"]]["parent_groups"]:
        d[r["category_level1_en"]]["parent_groups"].append(r["category_level0_en"])
    
    main_key = r["category_key"]
    main_description = r["category_description_en"]
    main_sort_order = r["category_sort"]
    if r["category_level3_en"] is None:
        if d[main_key] is None:
            d[main_key] = create_opg_entry(main_key, description=main_description, sort_order=main_sort_order, is_abstract=False, parent_groups=[r['category_level1_en']])
        else:
            d[main_key]["parent_groups"].append(r["category_level1_en"])
    else:
        if d[r["category_level2_en"]] is None:
            description = f"1st level group for {r['category_level2_en']} information"
            sort_order = 0.2
            d[r["category_level2_en"]] = create_opg_entry(r["category_level2_en"], description=description, sort_order=sort_order, is_abstract=False, parent_groups=[r['category_level1_en']])
        else:
            d[r["category_level2_en"]]["parent_groups"].append(r["category_level1_en"])
        if d[main_key] is None:
            d[main_key] = create_opg_entry(main_key, description=main_description, sort_order=main_sort_order, is_abstract=False, parent_groups=[r['category_level2_en']])
        else:
            d[main_key]["parent_groups"].append(r["category_level2_en"])

for k in d.keys():
    if d[k] and "parent_groups" in d[k].keys() and d[k]["parent_groups"]:
        d[k]["parent_groups"] = list(set(d[k]["parent_groups"]))

data = {"observable_property_groups": [v for v in d.values() if v]}
with open("ObservablePropertyGroupList_data.yaml", "w") as f:
    yaml.dump(data, f)

In [4]:
#empty: "timeframe";"provenance";"datatype_name";"stakeholder_name";"nonsampleobsprop_key";"unit_name";"category_level1";"category_level2";"category_sort";
#filter: "lingo";"stakeholder_key";

#"category_key";"levelslabels";"min";"max";"significantdecimals";"formula";"conditional";

#"varname";"name";"label";
#"subjectunique";"datatype_key";"unit_key";

def create_obsprop_entry(name=None, unique_name=None, description=None, label=None, value_type=None, categorical=False,
                         quantity_kind=None, default_unit=None, default_immutable=None, default_significantdecimals=None,
                         category_key=None, min=None, max=None, levelslabels=None, formula=None, conditional=None,
                         relevant_observable_entity_types=None, relevant_observation_types=None, indicator=None, **kwargs):
  d = {"name": name, "id": unique_name, "unique_name": unique_name, "value_type": value_type, "categorical": categorical}
  if description: d["description"] = description
  if label: d["label"] = label
  # TODO: fix units & add autoconversion
  if quantity_kind: d["quantity_kind"] = quantity_kind
  if default_unit and not(default_unit == "-"): d["default_unit"] = default_unit
  if default_immutable: d["default_immutable"] = default_immutable
  if default_significantdecimals: d["default_significantdecimals"] = default_significantdecimals

  if category_key: d["groups"] = [category_key]

  if levelslabels:
    ll_dict = {kvl[0].strip():"=".join(kvl[1:]).strip() for kvl in [kv.strip().split("=") for kv in levelslabels.split(";")]}
    d["value_options"] = [{"key":k, "value":v} for k,v in ll_dict.items()]

  d["value_metadata"] = []
  if min: d["value_metadata"].append({"field": "min", "value": min})
  if max: d["value_metadata"].append({"field": "max", "value": max})
  if not len(d["value_metadata"]): d.pop("value_metadata")

  # TODO: fix complex properties
  if formula: d["calculation_design"] = {"formula": formula}
  if conditional: d["validation_design"] = {"conditional": conditional}

  if relevant_observable_entity_types: d["relevant_observable_entity_types"] = relevant_observable_entity_types
  if relevant_observation_types: d["relevant_observation_types"] = relevant_observation_types
  if indicator: d["indicator"] = indicator
  return d

def translate_entry(source):
  translation_dict = {
    "datatype_key": {
      "target_props": ["value_type", "categorical"],
      "translation": {
        "categorical": ["string", True],
        "character": ["string", False],
        "decimal": ["decimal", False],
        "integer": ["integer", False],
    }},
    "subjectunique": {
      "target_props": ["default_immutable"],
      "translation": {
        "1": [True],
        "0": [False],
    }},
    "varname": {"target_props": ["unique_name", "name"]},
    "name": {"target_props": ["description"]},
    "label": {"target_props": ["label"]},
    "unit_key": {"target_props": ["default_unit"]},
    "significantdecimals": {"target_props": ["default_significantdecimals"]},
  }
  result = deepcopy(source)
  for k,v in source.items():
    if k in translation_dict.keys():
      if "translation" in translation_dict[k].keys() and v in translation_dict[k]["translation"].keys():
        for i, nk in enumerate(translation_dict[k]["target_props"]):
          result[nk] = translation_dict[k]["translation"][v][i]
      else:
        for nk in translation_dict[k]["target_props"]:
          result[nk] = v
  return result

# Get nonsampleobspropTechnicalExport
df = pd.read_csv("./output/nonsampleobspropTechnicalExport.csv", sep=';', encoding='mbcs')
dl = df.replace({np.nan:None}).to_dict(orient="records")
for r in dl:
  r["relevant_observable_entity_types"] = ["person"]
  r["relevant_observation_types"] = ["questionnaire"]

d = [create_obsprop_entry(**translate_entry(r)) for r in dl if r["lingo"]=="en" and r["stakeholder_key"]=="Parc"]

# Get sampleobspropTechnicalExport
df = pd.read_csv("./output/sampleobspropTechnicalExport.csv", sep=';', encoding='mbcs')
dl = df.replace({np.nan:None}).to_dict(orient="records")
for r in dl:
  r["relevant_observable_entity_types"] = ["person", "sample"]
  r["relevant_observation_types"] = ["sampling"]

d.extend([create_obsprop_entry(**translate_entry(r)) for r in dl if r["lingo"]=="en" and r["stakeholder_key"]=="Parc"])
unique_dict = {}
for v in d:
  if v:
    unique_dict[v["id"]] = v
unique_list = list(unique_dict.values())

data = {"observable_properties": unique_list}
with open("ObservablePropertyList_data.yaml", "w") as f:
    yaml.dump(data, f)