In [130]:
from math import isnan
from pathlib import Path
from typing import Union
import json
import yaml
from dateutil.parser import parse
from typing import List

In [131]:
mixture_metadata_file_path = "2014_12_10 Wolf.yaml"
mixture_metadata_file_path2 = "2020_11_10 Klimek Geschossdecke_Quarzkies_290 l.yaml"
_INGREDIENT_CODE = "EMODUL_INGREDIENT"
default_props = {"operator_date": ["TIMESTAMP", "operator_date", "operator_date"],
                 "tester_name": ["VARCHAR", "tester_name", "tester_name"],
                 "$name": ["VARCHAR", "Name", "Name"]}
_INGREDIENT_KEYWORDS = ["cement", "water_total", "addition", "admixture", "aggregate"]

def _read_metadata_mixture_ingredients(yaml_path: Union[str, Path], mixture_code: str, ingredient_code: str, default_props: dict, keywords: list) -> dict:

    default_keys = default_props.keys()

    with open(yaml_path, 'r') as file:
        loaded = dict(yaml.safe_load(file))

    data = {keyword: {} for keyword in keywords}
    data['mixture'] = {}

    for key, val in loaded.items():
        if val is None:
            continue
        for keyword in keywords:
            if keyword in key:
                data[keyword][f"{key}".lower()] = val
                break
        else:
            if key in default_keys:
                if key.lower() == 'operator_date':
                    # convert german date to openBIS date format YYYY-MM-DD
                    data['mixture'][key] = parse(val).strftime('%Y-%m-%d')
                else:
                    data['mixture'][key] = val
            else:
                data['mixture'][f"{key}".lower()] = val

    # Converting NaN values to 0.0 as openBIS does not accept NaNs
    for id_dict, kw_dict in data.items():
        for key, val in kw_dict.items():
            if isinstance(val, float) and isnan(val):
                data[id_dict][key] = 0.0
                
    data['mixture']['water_cement_ratio'] = data['cement'].pop('water_cement_ratio')

    return data

def _split_addition(addition_combined_dict: dict, ingredient_code: str) -> List[dict]:
    """
    Recieves a dict where additions 1..x are contained and returns a list of their split dicts
    """
    current_addition_counter = 1
    current_addition = "addition" + str(current_addition_counter)

    split_dict = {}

    while f"{ingredient_code.lower()}.{current_addition}--bulkdensity" in addition_combined_dict:
        filtered_dict = {key: val for key, val in addition_combined_dict.items() if current_addition in key}
        split_dict[current_addition] = filtered_dict
        addition_combined_dict = {key: val for key, val in addition_combined_dict.items() if key not in filtered_dict}
        current_addition_counter += 1
        current_addition = "addition" + str(current_addition_counter)

    return split_dict

In [132]:
mixture_sample_code = "EXPERIMENTAL_STEP_EMODUL_MIX"
mixture_ingredient_dict = _read_metadata_mixture_ingredients(
    mixture_metadata_file_path, mixture_sample_code, _INGREDIENT_CODE, default_props, _INGREDIENT_KEYWORDS)

# Splitting read dictionary into mixture dictionaty and ingredient dictionary
mixture_metadata = mixture_ingredient_dict.pop('mixture')
mixture_metadata['ingredients'] = {}

# Splitting addition into addition1, addition2 ... to upload separate ingreeients
additions_dict = _split_addition(mixture_ingredient_dict.pop('addition'), _INGREDIENT_CODE)
ingredient_metadata = mixture_ingredient_dict | additions_dict

In [133]:
new = {}
for ing, ing_dict in ingredient_metadata.items():
    new[f"{ing}_identifier"] = {key.split("--")[-1]: val for key, val in ing_dict.items()}
    new[f"{ing}_identifier"]["type"] = ing

ingredient_metadata=new


for ing, ing_dict in ingredient_metadata.items():
    for prop, prop_val in ing_dict.items():
        mixture_metadata['ingredients'].setdefault(ing, {})
        if 'quantityinmix' in prop or 'volume' in prop:
            mixture_metadata['ingredients'][ing][prop] = prop_val
            
ingredient_metadata = {ing: {prop: prop_val for prop, prop_val in ing_val.items() if not (prop == "volume" or prop == "quantityinmix")} for ing, ing_val in ingredient_metadata.items()}

In [134]:
ingredient_metadata

{'cement_identifier': {'bulkdensity': 3.1,
  'annotation': 'CEM I 32.5 R Zementwerk Berlin',
  'type': 'cement'},
 'water_total_identifier': {'bulkdensity': 1.0, 'type': 'water_total'},
 'admixture_identifier': {'bulkdensity': 1.14,
  'annotation': 'FM 21/BV 21',
  'type': 'admixture'},
 'aggregate_identifier': {'bulkdensity': 0.0, 'type': 'aggregate'}}

In [135]:
mixture_metadata

{'operator_date': '2014-12-10',
 'tester_name': 'Haamkens',
 'specimen_name': '8.2 (Wolf)',
 'water_effective--quantityinmix': 0.0,
 'water_effective--bulkdensity': 0.0,
 'water_effective--volume': 0.0,
 'air_content--quantityinmix': 0.0,
 'air_content--bulkdensity': 0.0,
 'air_content--volume': 20.0,
 'water_cement_ratio': 0.5454545454545454,
 'ingredients': {'cement_identifier': {'quantityinmix': 330.0,
   'volume': 106.5},
  'water_total_identifier': {'quantityinmix': 180.0, 'volume': 180.0},
  'admixture_identifier': {'quantityinmix': 4.95, 'volume': 4.3},
  'aggregate_identifier': {'quantityinmix': 1720.0, 'volume': 656.2}}}

In [136]:
with open("mixture.json", "w") as file:
    json.dump(mixture_metadata, file, indent=4, ensure_ascii=False)
    
with open("ingredients.json", "w") as file:
    json.dump(ingredient_metadata, file, indent=4, ensure_ascii=False)