In [53]:
import peh
import json
from pathlib import Path
import pydantic_yaml

from linkml_runtime.loaders import json_loader, yaml_loader
from linkml_runtime.dumpers import json_dumper, yaml_dumper
from peh import NamedThing, NamedThingId, EntityList, Project, Study, Stakeholder, BioChemEntity

from collections import defaultdict

import logging
LOGGER = logging.getLogger()
LOGGER.setLevel(logging.DEBUG)

In [44]:
IN_MEMORY_DICT = defaultdict(defaultdict)

def reset_store():
    IN_MEMORY_DICT = defaultdict(defaultdict)

def save_entity_to_store(entity):
    save_object_to_store(entity.__class__.__name__, entity.id, entity)

def save_object_to_store(obj_class_name, obj_id, obj):
    IN_MEMORY_DICT[obj_class_name][obj_id] = obj

def get_object_from_store(obj_class_name, obj_id):
    if obj_class_name in IN_MEMORY_DICT.keys() and obj_id in IN_MEMORY_DICT[obj_class_name].keys():
        return IN_MEMORY_DICT[obj_class_name][obj_id]
    else:
        logging.debug(f"IN_MEMORY_DICT Storage error: Object of class '{obj_class_name}' with id '{obj_id}' not found.")
        raise KeyError(f"IN_MEMORY_DICT Storage error: Object of class '{obj_class_name}' with id '{obj_id}' not found.")

def load_entities_from_folder(folder_path):
    file_paths = list(folder_path.glob('**/*.yaml'))
    for file_path in file_paths:
        logging.debug(file_path)
        load_entities_from_file(str(file_path))

def load_entities_from_file(file_path):
    p = yaml_loader.load(file_path, EntityList)
    for entity_list_name in list(p._keys()):
        for entity in getattr(p, entity_list_name):
            save_entity_to_store(entity)

def load_entities_from_tree(root):
    if isinstance(root, NamedThing):
        save_entity_to_store(root)
        for property_name in list(root._keys()):
            property = getattr(root, property_name)
            if not(property is None):
                if isinstance(property, list):
                    load_entities_from_tree(property)
                elif isinstance(property, dict):
                    load_entities_from_tree(list(property.values()))
                else:
                    load_entities_from_tree([property])
#    elif isinstance(root, NamedThingId):
#        save_object_to_store(root.__class__.__name__, str(root), root)
    if isinstance(root, (dict, defaultdict)):
        root = list(root.values())
    if isinstance(root, list):
        for entity in root:
            load_entities_from_tree(entity)

def get_entity(obj_id, obj_class_name=None):
    if not obj_class_name:
        found_class_name = obj_id.__class__.__name__
    elif isinstance(obj_class_name, type):
        found_class_name = obj_class_name.__name__
    else:
        found_class_name = obj_class_name
    found_id = obj_id.id if hasattr(obj_id, "id") else obj_id
    if found_class_name[-2:] == "Id":
        found_class_name = found_class_name[:-2]
    return get_object_from_store(found_class_name, found_id)

#p = yaml_loader.load(r'..\data\ProjectList_data.yaml', EntityList)
#p = pydantic_yaml.parse_yaml_file_as(peh.Project, r"./Project_data.yaml")

In [45]:
IN_MEMORY_DICT = defaultdict(defaultdict)
load_entities_from_folder(Path(r'..\data'))
load_entities_from_tree(IN_MEMORY_DICT)

for cn in IN_MEMORY_DICT.keys():
    print(cn, list(IN_MEMORY_DICT[cn].keys()))

DEBUG:root:..\data\BioChemEntityList_data.yaml
DEBUG:root:..\data\BioChemGroupingList_data.yaml
DEBUG:root:..\data\IndicatorList_data.yaml
DEBUG:root:..\data\MatrixList_data.yaml
DEBUG:root:..\data\ObservablePropertyGroupList_data.yaml
DEBUG:root:..\data\ObservablePropertyList_data.yaml
DEBUG:root:..\data\ObservablePropertyMetadataFieldList_data.yaml
DEBUG:root:..\data\ProjectList_data.yaml
DEBUG:root:..\data\StakeholderList_data.yaml
DEBUG:root:..\data\StudyList_data.yaml
DEBUG:root:..\data\TimepointList_data.yaml


BioChemEntity ['hg']
BioChemGrouping ['Acrylamide', 'Alkaloid', 'Anilines and MOCA', 'Aprotic Solvents', 'Bisphenols', 'Care Products', 'DINCH', 'Diisocyanates', 'Dioxins', 'Flame Retardants', 'Furans', 'Glycol Ethers', 'Glyphosate', 'Herbicides', 'Metals', 'Musks', 'Mycotoxins', 'PCBs', 'Parabens', 'Per-/Poly-Fluorinated Compounds (PFASs)', 'Pesticides', 'Pesticides (Carbamate)', 'Pesticides (Pyrethroids)', 'Phthalates', 'Polycyclic Aromatic Hydrocarbons (PAHs)', 'UV-filters (Benzophenones)', 'Volatile Organic Compounds (VOCs)', 'Volatile Anaesthetics']
Indicator ['Age of a person in years', 'Concentration of mercury in morning urine']
Matrix ['bwb', 'bs', 'bp', 'bbc', 'rbc', 'cbwb', 'cbs', 'cbp', 'cbbc', 'us', 'um', 'uf', 'ud', 'bm', 'h', 'n', 'atn', 'btn', 'nm', 'sa', 'st', 'sem', 'ebc', 'cbt', 'adi', 'dw', 'af', 'plt', 'idust', 'odust', 'oair', 'iair', 'pair', 'soil', 'kgfood', 'egg', 'water', 'vams', 'dbs', 'swb', 'sw', 'blood', 'bloodcells', 'cord blood', 'urine', 'air', 'food']


In [51]:
print(get_entity(get_entity("3XG", "Study")))
print(get_entity(get_entity("3XG", "Study").study_stakeholders_as_list[0].stakeholder))
print(get_entity("VITO", Stakeholder))

Study(id='3XG', shortname='3XG', name=None, description=None, label=None, study_stakeholders_as_list=[StudyStakeholder(stakeholder='VITO', study_roles=[(text='principal_investigator'), (text='data_controller'), (text='data_processor'), (text='data_user')])], timepoints=['3xGST03'], study_entities=[], projects=['HBM4EU', 'PARC', 'EIRENE'])
Stakeholder(id='VITO', shortname='VITO', name='VITO', description=None, label=None, geographic_scope='BE')
Stakeholder(id='VITO', shortname='VITO', name='VITO', description=None, label=None, geographic_scope='BE')


In [55]:
vito = get_entity("VITO", Stakeholder)
yaml_dumper.dump(vito, Path(r'.\vito.yaml'))
_3XG = get_entity("3XG", Study)
yaml_dumper.dump(_3XG, Path(r'.\_3XG.yaml'))