# Read-in example of the synthesis logs with plotting

This notebook shows an example on how to read interesting informatino from the synthesis files, and plot such information. 

In [5]:
from pathlib import Path
import h5py
import nexusformat.nexus as nx # makes it easier to extract information. 
import numpy as np
from attrs import Factory
from attrs import define, validators, field, cmp_using, fields
import pandas as pd

import logging
from typing import Iterable
from typing import Any, NoReturn


In [15]:
# Setting up some flexible HDF reading methods (copied from DataMerge dataclasses and readers for now): 

# Mixin class for making a dict-like object out of an attrs class
# from: https://github.com/python-attrs/attrs/issues/879







class gimmeItems:  # used to be MutableMappingMixin(MutableMapping)
    """Mixin class to make attrs classes quack like a dictionary (well,
    technically a mutable mapping). ONLY use this with attrs classes.

    Provides keys(), values(), and items() methods in order to be
    dict-like in addition to MutableMapping-like. Also provides pop(),
    but it just raises a TypeError :)
    """

    __slots__ = ()  # May as well save on memory?

    def __iter__(self) -> Iterable:
        for ifield in fields(self.__class__):
            yield ifield.name

    def __len__(self) -> int:
        return len(fields(self.__class__))

    def __getitem__(self, k: str) -> Any:
        """
        Adapted from:
        https://github.com/python-attrs/attrs/issues/487#issuecomment-660727537
        """
        try:
            return self.__getattribute__(k)
        except AttributeError as exc:
            raise KeyError(str(exc)) from None

    def __delitem__(self, v: str) -> NoReturn:
        raise TypeError("Cannot delete fields for attrs classes.")

    def __setitem__(self, k: str, v: Any) -> None:
        self.__setattr__(k, v)

    def pop(self, key, default=None) -> NoReturn:
        raise TypeError("Cannot pop fields from attrs classes.")

    def keys(self) -> Iterable:
        return self.__iter__()

    def values(self) -> Iterable:
        for key in self.__iter__():
            yield self.__getattribute__(key)

    def items(self) -> Iterable:
        for key in self.__iter__():
            yield key, self.__getattribute__(key)

# end copy

# First, we set the read names and locations here:
@define
class HDFPathsObj(gimmeItems):
    """
    Config carrying the HDF5 path locations for reading datafiles.
    """

    # Q: str = field(
    #     default="/entry/result/Q", validator=validators.instance_of(str), converter=str
    # )
    # I: str = field(
    #     default="/entry/result/I", validator=validators.instance_of(str), converter=str
    # )
    # ISigma: str = field(
    #     default="/entry/result/ISigma",
    #     validator=validators.instance_of(str),
    #     converter=str,
    # )
    SampleID: str = field(
        default="/AutoMOF5/MOF_synthesis_1/RawLog/0/SampleID", # hdf5 location
        validator=validators.instance_of(str), # locations are in string format
        converter=str,
    )
    ExperimentID: str = field(
        default="/AutoMOF5/ID",
        validator=validators.instance_of(str),
        converter=str,
    )    
    InjectionSpeed: str = field(
        default="/AutoMOF5/MOF_synthesis_1/ExtraInformation/InjectionSpeed", # hdf5 location
        validator=validators.instance_of(str), # locations are in string format
        converter=str,
    )
    # not able to read attributes yet
    # InjectionSpeedUnits: str = field(
    #     default="/AutoMOF5/MOF_synthesis_1/ExtraInformation/InjectionSpeed",
    #     validator=validators.instance_of(str),
    #     converter=str,
    # )
    MetalToLinkerRatio: str = field(
        default="/AutoMOF5/MOF_synthesis_1/ExtraInformation/MetalToLinkerRatio", # hdf5 location
        validator=validators.instance_of(str), # locations are in string format
        converter=str,
    )
    MetalToMethanolRatio: str = field(
        default="/AutoMOF5/MOF_synthesis_1/ExtraInformation/MetalToMethanolRatio",
        validator=validators.instance_of(str),
        converter=str,
    )
    ReactionTime: str = field(
        default="/AutoMOF5/MOF_synthesis_1/ExtraInformation/ReactionTime",
        validator=validators.instance_of(str),
        converter=str,
    )    
    ChemicalYield: str = field(
        default="/AutoMOF5/Chemicals/ChemicalYield",
        validator=validators.instance_of(str),
        converter=str,
    )

# then we configure the defaults for the same: 
@define
class HDFDefaultsObj(gimmeItems):
    """In case the preset HDF5 paths are empty, defaults can be used for some non-critical items"""

    SampleID: str = field(
        default="", validator=validators.instance_of(str), converter=str
    )
    ExperimentID: str = field(
        default="", validator=validators.instance_of(str), converter=str
    )
    # I guess these should be pint Units at some point. 
    InjectionSpeed: float = field(
        default=0.0, # hdf5 location
        validator=validators.instance_of(float), # locations are in string format
        converter=float,
    )
    MetalToLinkerRatio: float = field(
        default=0.0, # hdf5 location
        validator=validators.instance_of(float), # locations are in string format
        converter=float,
    )
    MetalToMethanolRatio: float = field(
        default=0.0, # hdf5 location
        validator=validators.instance_of(float), # locations are in string format
        converter=float,
    )
    ReactionTime: float = field(
        default=0.0, # hdf5 location
        validator=validators.instance_of(float), # locations are in string format
        converter=float,
    )
    ChemicalYield: float = field(
        default=0.0, # hdf5 location
        validator=validators.instance_of(float), # locations are in string format
        converter=float,
    )


@define
class readConfigObj(gimmeItems):
    """
    Object that carries information on how to read the datafiles
    """

    hdfPaths: HDFPathsObj = field(
        default=Factory(HDFPathsObj),
        validator=validators.instance_of(HDFPathsObj),
    )

    hdfDefaults: HDFDefaultsObj = field(
        default=Factory(HDFDefaultsObj),
        validator=validators.instance_of(HDFDefaultsObj),
    )

# this could be used to read the configuration from yaml files: 
def readConfigObjFromYaml(filename: Path) -> readConfigObj:
    assert (
        filename.is_file()
    ), f"Read configuration filename {filename.as_posix()} does not exist"
    with open(filename, "r") as f:
        configDict = yaml.safe_load(f)
    if "readConfig" not in configDict.keys():
        return readConfigObj(HDFPathsObj(), HDFDefaultsObj())
    print(configDict["readConfig"])
    HDFPaths = HDFPathsObj(**configDict["readConfig"].get("HDFPaths", {}))
    HDFDefaults = HDFDefaultsObj(**configDict["readConfig"].get("HDFDefaults", {}))
    return readConfigObj(HDFPaths, HDFDefaults)


# now for the actual reader, modified here slightly to output to Pandas Dataframe:
def scatteringDataObjFromNX(
    filename: Path, readConfig: readConfigObj
) -> pd.DataFrame:
    """Returns a populated scatteringDataObj by reading the data from a Processed MOUSE NeXus file"""
    assert filename.is_file(), logging.warning(
        f'{filename=} cannot be accessed from {Path(".").absolute().as_posix()}'
    )
    with h5py.File(filename, "r") as h5f:
        kvs = {}
        for key in readConfig.hdfPaths.keys():
            hPath = getattr(readConfig.hdfPaths, key)
            if hPath in h5f:
                val = h5f[hPath][()]
                if isinstance(val, np.ndarray):
                    val = val.flatten()
            else:
                assert key in readConfig.hdfDefaults.keys(), logging.error(
                    f"NeXus file {filename=} does not contain information for {key=} at specified HDF5 Path {hPath}"
                )
                val = getattr(readConfig.hdfDefaults, key)
            if isinstance(val, bytes): val=val.decode('utf-8')
            kvs.update({key: val})

    return pd.DataFrame(data=kvs, index=[0])

In [16]:
filepath = Path('tests/testData/AutoMOFs05_T000.h5')
info = scatteringDataObjFromNX(filepath, readConfigObj())

In [17]:
info

Unnamed: 0,SampleID,ExperimentID,InjectionSpeed,MetalToLinkerRatio,MetalToMethanolRatio,ReactionTime,ChemicalYield
0,T000,AutoMOF5,10,0.132734,0.007145,31482.0,0.315922
