In [13]:
from pathlib import Path

import pandas as pd
import numpy as np
import yaml
from linkml_runtime.dumpers import json_dumper, yaml_dumper

import peh

In [2]:
epiinfo_field_list_path = Path("../source_tables/INQUIRE/Field_list.xlsx")
epiinfo_field_codes_path = Path("../source_tables/INQUIRE/Field_codes.yml")
yaml_file_path = Path("../project_examples/INQUIRE/05-inquire_observableproperties.yaml")

In [4]:
df_fields = pd.read_excel(epiinfo_field_list_path).replace(np.nan, None)
with open(epiinfo_field_codes_path, 'r') as stream:
    field_codes = yaml.safe_load(stream)

page_list = list({(t[1], t[2]) for t in df_fields[["PageId", "metaPages_Name"]].to_records("tuple") if not(t[1] is None and t[2] is None)})

In [5]:
def get_valueoptions(table_name, column_name):
    value_dict = field_codes[table_name]
    if value_dict["name"].strip().lower() != column_name.strip().lower():
        print(value_dict["name"])
        print(column_name)
        raise KeyError(f"ValueOption Lookup KeyError: SourceTableName {table_name} doesn't match TextColumnName {column_name}")
    return [{"key":t[0], "value":t[1] if len(t) > 1 else t[0]} for t in [[i.strip() for i in v.split("-")] for v in field_codes[table_name]["values"]]]

In [15]:
observablepropertygroups = [peh.ObservablePropertyGroup(
    id = page[1],
    shortname = page[1],
    name = page[1],
    is_abstract = False,
    sort_order = page[0],
) for page in page_list]
metadatafields = [
    peh.ObservablePropertyMetadataField(id = "min", name = "min", value_type = "decimal"),
    peh.ObservablePropertyMetadataField(id = "max", name = "max", value_type = "decimal"),
    peh.ObservablePropertyMetadataField(id = "lower", name = "lower", value_type = "string"),
    peh.ObservablePropertyMetadataField(id = "upper", name = "upper", value_type = "string"),
    peh.ObservablePropertyMetadataField(id = "minlength", name = "minlength", value_type = "integer"),
    peh.ObservablePropertyMetadataField(id = "maxlength", name = "maxlength", value_type = "integer"),
    peh.ObservablePropertyMetadataField(id = "format", name = "format", value_type = "string"),
]

In [16]:
# Consistency Checks
#df = df_fields[df_fields["SourceTableName"].isnull()==False]
#df[df["SourceTableName"].apply(str.lower) != "code" + df["CodeColumnName"].apply(str.lower) + "1"]
#df[df["TextColumnName"].apply(str.lower) != df["CodeColumnName"].apply(str.lower)]
df_fields.to_dict("records")[4]

{'PageId': 1.0,
 'metaPages_Name': 'Identifier and demographics',
 'TabIndex': 2.0,
 'metaFields_Name': 'Todaysdate',
 'PromptText': "Today's date (as filled by participant on questionnaire)",
 'IsRequired': True,
 'MaxLength': None,
 'Lower': None,
 'Upper': '2023-05-16',
 'SourceTableName': None,
 'CodeColumnName': None,
 'TextColumnName': None,
 'metaFieldTypes_Name': 'Date',
 'IsDropDown': False,
 'metaDataTypes_Name': 'Date',
 'Pattern': 'YYYY-MM-DD',
 'List': None}

In [17]:
observableproperties = [peh.ObservableProperty(
    id=f["metaFields_Name"],
    shortname=f["metaFields_Name"],
    name=f["metaFields_Name"],
    value_options_as_list=get_valueoptions(f["SourceTableName"], f["TextColumnName"])
) for f in df_fields.to_dict("records") if f["metaFieldTypes_Name"] == "Legal Values"]

In [18]:
data = {
    "observablepropertygroups_as_list": observablepropertygroups,
    "metadata_fields_as_list": metadatafields,
    "observableproperties_as_list": observableproperties,
}
yaml_dumper.dump(data, yaml_file_path)