# <center>Workflow for on-line GC and HPLC analysis in flow chemistry</center>
# <center>2.1 Experimental notebook - Parsing</center>

---

This is the ``Experimental`` ``notebook`` ``2.1 "Parsing"``, where all the relevent data of the experiments are read in from different ressources. For each individual experiment this workflow is to be executed once, and the data can be appended to the project's dataset.

---

---
## Section 0: Imports, Paths, and Logging
---

In this section all the necessary python packages are imported, the path to this notebook and the logger for this notebook is set up.

In [20]:
# Activate autoreload to keep on track with changing modules #
%reload_ext autoreload
%autoreload 2

# Import standard libraries #
import os
import json
import logging
import pandas as pd
import ipywidgets as widgets
from pathlib import Path
from IPython.display import display, clear_output

# Import librarian module for file directory handling #
from datamodel_b07_tc.tools import Librarian

# Import modified sdRDM objects #
from datamodel_b07_tc.modified.experiment import Experiment
from datamodel_b07_tc.modified.measurement import Measurement
from datamodel_b07_tc.modified.plantsetup import PlantSetup
from datamodel_b07_tc.modified.dataset import Dataset

# Import datamodel from sdRDM #
from sdRDM import DataModel

# Import tools for parsing and calibration of the raw data #
from datamodel_b07_tc.tools import Calibrator
from datamodel_b07_tc.tools import gc_parser
from datamodel_b07_tc.tools import gstatic_parser
from datamodel_b07_tc.tools import mfm_parser
#from datamodel_b07_tc.tools import DEXPI2sdRDM

# Import tools for analysis of the raw data #
from datamodel_b07_tc.tools import FaradayEfficiencyCalculator
from datamodel_b07_tc.tools import PeakAssigner

In [2]:

def search_files_in_subdirectory(root_directory: Path, directory_keys: list[str], file_filter: str, verbose: bool = None) -> Path:
    """
    Function that loobs through Path objects containing a main directory. In this directory it is recoursevly searched for sub directories. 
    In the last sub directory files with the suffix 'file_filter' are searched and returned

    Args:
        root_directory (Path): Root directory
        directory_keys (list[str]): List of subdirectories that should be recoursevly searched
        file_filter (str): Suffix of files that should be found in last given sub directory
        verbose (bool, optional): Possiblity to printout all subdirectories in each directory listed. Defaults to None.

    Raises:
        KeyError: If either the specified sub directory or file could not be found

    Returns:
        subdirectory_files (Path): Path object containing all files found in the subdirectory
    """

    # First search for every nested sub directory in provided root directory #
    librarian = Librarian(root_directory=root_directory)
    root = librarian.enumerate_subdirectories(root_directory)
    for j,directory_key in enumerate(directory_keys):
        try:
            idx_sub_directory = [i for i in range(len(root)) if root[i].parts[-1] == directory_key ][0]
            if j < len(directory_keys)-1: 
                root          = librarian.enumerate_subdirectories(directory=root[idx_sub_directory])
        except:
            raise KeyError("Defined key: '%s' cannot be found in the given root directory: %s"%(directory_key,root[0].parent))

    # Search for all files that match the given filter in the specified sub directory #
    subdirectory_files = librarian.enumerate_files(directory=root[idx_sub_directory], filter=file_filter, verbose=verbose)   
    if not bool(subdirectory_files): 
        raise KeyError("No files with filter: '%s' found in the given sub directory: %s"%(file_filter,root_directory[idx_sub_directory]))
    
    return subdirectory_files



class widget():
    
    # Function to navigate into the selected subfolder

    def go_to_subfolder(self,_):
        self.current_dir.value       = str(self.folder_dropdown.value)
        subroot                   = self.librarian.enumerate_subdirectories(directory=self.folder_dropdown.value)
        self.folder_dropdown.options = [ (path.parts[-1],path) for _,path in subroot.items() ] if bool(subroot) else [ ("No subdirectories",self.folder_dropdown.value) ]

    # Function to navigate back from the selected subfolder
    def go_to_parentfolder(self,_):
        parentfolder                 = self.parent
        self.current_dir.value       = str(parentfolder)
        parentroot                = self.librarian.enumerate_subdirectories(directory=parentfolder)
        self.folder_dropdown.options = [(path.parts[-1],path) for _,path in parentroot.items()]
    
    def add_file(self,_):
        if self.file_category.value == "EChem":
            self.Echem_files.value      = self.Echem_files.value + [str(self.file_dropdown.value)]

        elif self.file_category.value == "GC":
            self.GC_files.value         = self.GC_files.value + [str(self.file_dropdown.value)]

        elif self.file_category.value == "MFM":
            self.MFM_files.value        = self.MFM_files.value + [str(self.file_dropdown.value)]
        
        elif self.file_category.value == "Calibration":
            self.calib_files.value      = self.calib_files.value + [str(self.file_dropdown.value)]

        elif self.file_category.value == "Correction factors":
            self.correction_files.value = self.correction_files.value + [str(self.file_dropdown.value)]
        
        elif self.file_category.value == "Faraday coefficients":
            self.faraday_files.value    = self.faraday_files.value + [str(self.file_dropdown.value)]
            
    def folder_dropdown_option_handler(self,_):
        # If no subdirectories exist, then the parent folder is simply the first parent, otherwise it is the 2nd parent
        # (because the current dropdown value is already )
        if str(self.folder_dropdown.value.parent) == self.current_dir.value:
            self.parent                  = self.folder_dropdown.value.parent.parent
            self.file_folder             = self.folder_dropdown.value.parent
        else:
            self.parent                  = self.folder_dropdown.value.parent
            self.file_folder             = self.folder_dropdown.value

        # Reset file type after chaning dropdown

        self.file_type_text.value        = ""
        self.file_dropdown.options       = []

    def file_type_input_handler(self,_):
        if self.file_type_text.value:
            file_filter                  = self.file_type_text.value
            subdirectory_files           = self.librarian.enumerate_files(directory=self.file_folder, filter=file_filter)
            
            # Show all available files and show the first initially that they know if file are available
            try:
                self.file_dropdown.options   = [(file.parts[-1],file) for _,file in subdirectory_files.items()]
                self.file_dropdown.value     = subdirectory_files[0]
            except:
                self.file_dropdown.options   = ["No files with specified suffix"]
                self.file_dropdown.value     = "No files with specified suffix"
    
    def file_category_input_handler(self,_):
        self.button_select.description = 'Add file to %s'%(self.file_category.value)
        
    def dataset_input_handler(self,_):
        try:
            self.datamodel = DataModel.parse( self.dataset_dropdown.value )
        except:
            raise KeyError("\nChoosen dataset cannot be interpreted!\n")
    
    def choose_data(self,root: Path) -> None:
        
        self.librarian        = Librarian(root_directory=root)
        datasets              = search_files_in_subdirectory(root_directory=root, directory_keys=["datasets"], file_filter="json", verbose=False)
        sub_directories       = self.librarian.enumerate_subdirectories(directory=root)

        self.dataset_dropdown = widgets.Dropdown(options=[(path.parts[-1],path) for _,path in datasets.items()],
                                                description="Choose dataset",
                                                layout=widgets.Layout(width='auto'),
                                                style={'description_width': 'auto'})

        self.folder_dropdown  = widgets.Dropdown(description='Select folder:',
                                                options=[(path.parts[-1],path) for _,path in sub_directories.items()],
                                                layout=widgets.Layout(width='auto'),
                                                style={'description_width': 'auto'})

        self.file_dropdown    = widgets.Dropdown(description='Select file:',
                                                layout=widgets.Layout(width='auto'),
                                                style={'description_width': 'auto'})
        
        self.file_category    = widgets.Dropdown(options=['EChem', 'GC', 'MFM',"Calibration","Correction factors","Faraday coefficients"],
                                                value='EChem',
                                                description='for category:',
                                                style={'description_width': 'auto'})

        self.button_go_for    = widgets.Button(description='Move into directory',
                                              layout=widgets.Layout(width='auto'))
        
        self.button_go_back   = widgets.Button(description='Move one diretory back',
                                              layout=widgets.Layout(width='auto'))
        
        self.button_select    = widgets.Button(description='Add file to %s'%(self.file_category.value),
                                              layout=widgets.Layout(width='auto'))

        self.file_type_text   = widgets.Text(description='File type:',
                                            placeholder='Enter type here (e.g.: csv, json, ... or * for all files)',
                                            layout=widgets.Layout(width='auto'),
                                            style={'description_width': 'auto'})
        
        self.current_dir      = widgets.Text(description='Current directory:',
                                            disabled=True,
                                            value=str(root),
                                            layout=widgets.Layout(width='auto'),
                                            style={'description_width': 'auto'})
        
        self.elec_surf_area   = widgets.FloatText(value=1.0,
                                                  description='Electrode surface area [cm^2]:',
                                                  style={'description_width': 'auto'})


        self.Echem_files      = widgets.TagsInput(allow_duplicates=False)
        self.GC_files         = widgets.TagsInput(allow_duplicates=False)
        self.MFM_files        = widgets.TagsInput(allow_duplicates=False)
        self.calib_files      = widgets.TagsInput(allow_duplicates=False)
        self.correction_files = widgets.TagsInput(allow_duplicates=False)
        self.faraday_files    = widgets.TagsInput(allow_duplicates=False)

        self.file_folder      = root

        # Initial value for datamodel
        try:
            self.datamodel = DataModel.parse( self.dataset_dropdown.value )
        except:
            raise KeyError("\nChoosen dataset cannot be interpreted!\n")
        
        # Functions for the buttons #
        self.button_go_for.on_click(self.go_to_subfolder)
        self.button_go_back.on_click(self.go_to_parentfolder)
        self.button_select.on_click(self.add_file)

        # Attach the event handler to the 'value' property change of the file type widget
        self.file_type_text.observe(self.file_type_input_handler, names='value')
        self.folder_dropdown.observe(self.folder_dropdown_option_handler, names='options')
        self.file_category.observe(self.file_category_input_handler, names='value')
        self.dataset_dropdown.observe(self.dataset_input_handler, names='value')

        # Display the widgets

        # Create the layout
        widgets0  = widgets.HBox([self.dataset_dropdown])
        widgets1  = widgets.VBox([self.current_dir,self.folder_dropdown])
        widgets2  = widgets.HBox([self.button_go_for, self.button_go_back])
        widgets3  = widgets.VBox([self.file_type_text])
        widgets4  = widgets.HBox([self.file_dropdown,self.file_category])
        widgets5  = widgets.VBox([self.button_select])
        widgets6  = widgets.VBox([widgets.Label(value='Files for EChem evaluation:'), self.Echem_files])
        widgets7  = widgets.VBox([widgets.Label(value='Files for GC evaluation:'), self.GC_files])
        widgets8  = widgets.VBox([widgets.Label(value='Files for MFM evaluation:'), self.MFM_files])
        widgets9  = widgets.HBox([widgets.VBox([widgets.Label(value='Files for calibration:'), self.calib_files]),
                                  widgets.VBox([widgets.Label(value='Files for correction factors:'), self.correction_files]),
                                  widgets.VBox([widgets.Label(value='Files for Farraday coefficients:'), self.faraday_files]),
                                  self.elec_surf_area])

        # Combine the layout
        full_layout = widgets.VBox([widgets0,widgets1,widgets2,widgets3,widgets4,widgets5,widgets6,widgets7,widgets8,widgets9])

        # Display the layout
        display(full_layout)



In [3]:
# Define paths for loggin output #
root                = Path.cwd()
logging_config_path = root / "datamodel_b07_tc/tools/logging/config_exp_2_1.json"

# Read in logger specs and configurate logger (set name to current notebook) #
with open(logging_config_path) as logging_config_json: logging.config.dictConfig( json.load( logging_config_json ) )
logger = logging.getLogger(__name__)

# Set the level of thid-party logger to avoid dumping too much information #
for logger_ in ['markdown_it', 'h5py', 'numexpr', 'git']: logging.getLogger(logger_).setLevel('WARNING')

---
## Section 1: Dataset and raw data
---
In this section the dataset as well as the to analyze raw data is choosen

In [4]:
## Add several experiments? 
# e.g.: choose data for experiment 1: data: ...

In [5]:
## Search for dataset and raw data ##

w                             = widget()
root                          = Path.cwd()

w.choose_data(root)

VBox(children=(HBox(children=(Dropdown(description='Choose dataset', layout=Layout(width='auto'), options=(('b…

In [6]:
e_chem = 'c:\\Users\\darouich\\OneDrive\\Dokumente\\datamodel_b07_tc\\datamodel_b07_tc\\data\\Rohdaten\\01_EChem\\CAD14-Cu@AB\\GSTATIC.DTA'
mfm    = 'c:\\Users\\darouich\\OneDrive\\Dokumente\\datamodel_b07_tc\\datamodel_b07_tc\\data\\Rohdaten\\03_MFM\\CAD14-Cu@AB\\Bench-2h-GSS_CAD14-Cu@AB_200_50c_24h.csv'
gc     = ['c:\\Users\\darouich\\OneDrive\\Dokumente\\datamodel_b07_tc\\datamodel_b07_tc\\data\\Rohdaten\\02_GC\\CAD14-Cu@AB\\JH-1H 2023-02-06 10-00-18\\NV-F0102.D\\report00.CSV',
        'c:\\Users\\darouich\\OneDrive\\Dokumente\\datamodel_b07_tc\\datamodel_b07_tc\\data\\Rohdaten\\02_GC\\CAD14-Cu@AB\\JH-1H 2023-02-06 10-00-18\\NV-F0102.D\\REPORT01.CSV',
        'c:\\Users\\darouich\\OneDrive\\Dokumente\\datamodel_b07_tc\\datamodel_b07_tc\\data\\Rohdaten\\02_GC\\CAD14-Cu@AB\\JH-1H 2023-02-06 10-00-18\\NV-F0103.D\\report00.CSV',
        'c:\\Users\\darouich\\OneDrive\\Dokumente\\datamodel_b07_tc\\datamodel_b07_tc\\data\\Rohdaten\\02_GC\\CAD14-Cu@AB\\JH-1H 2023-02-06 10-00-18\\NV-F0103.D\\REPORT01.CSV']
calib  = 'c:\\Users\\darouich\\OneDrive\\Dokumente\\datamodel_b07_tc\\datamodel_b07_tc\\data\\calibration\\calibration.json'
correc = 'c:\\Users\\darouich\\OneDrive\\Dokumente\\datamodel_b07_tc\\datamodel_b07_tc\\data\\correction_factors\\correction_factors.json'
farada = 'c:\\Users\\darouich\\OneDrive\\Dokumente\\datamodel_b07_tc\\datamodel_b07_tc\\data\\faraday_coefficients\\faraday_coefficients.json'

w.Echem_files.value = [e_chem]
w.MFM_files.value   = [mfm]
w.GC_files.value    = gc
w.calib_files.value = [calib]
w.correction_files.value = [correc]
w.faraday_files.value    = [farada]

In [7]:
## Read in selected raw data and save it in Experiment class ##

dataset, lib                 = w.datamodel
experiment                   = Experiment()
gc_experimental_data_df_list = []
gc_metadata_df_list          = []
gc_measurements_list         = []

potentiostatic_metadata_df, potentiostatic_measurement = Measurement.from_parser( parser=gstatic_parser, metadata_path=w.Echem_files.value[0] )
mfm_experimental_data_df, mfm_measurement              = Measurement.from_parser( parser=mfm_parser, experimental_data_path=w.MFM_files.value[0] )
#experiment.plant_setup                                = PlantSetup.from_parser( parser=DEXPI2sdRDM, path=w.plant_setup_files[0] )

for i in range(0,len(w.GC_files.value),2):
    gc_metadata_df, gc_experimental_data_df, gc_measurement = Measurement.from_parser(parser=gc_parser,metadata_path=w.GC_files.value[i],experimental_data_path=w.GC_files.value[i+1])
    gc_experimental_data_df_list.append(gc_experimental_data_df)
    gc_metadata_df_list.append(gc_metadata_df)
    gc_measurements_list.append(gc_measurement)

experiment.measurements      = [potentiostatic_measurement, mfm_measurement, *gc_measurements_list]

## Read in parameters such as calibration, correction factors and farraday coefficients and save it in Experiment class ##

experiment.species_data      = Calibrator.from_json_file(path_to_json_file=w.calib_files.value[0]).calibrate()
electrode_surface_area       = w.elec_surf_area.value
experiment.read_correction_factors(w.correction_files.value[0])
experiment.read_faraday_coefficients(w.faraday_files.value[0])

## Append new experiment to current dataset ##
dataset.experiments.append(experiment)

#with open(w.dataset_dropdown.value, "w") as f: f.write(dataset.json())


---
## Section 2: Analysis of data
---
In this section the raw data is analyzed

In [37]:
class widget2:

    def choose_experiment_input_handler(self,_):

        # Clear existing widgets
        clear_output(wait=True)

        # Display the layout for experiment and species
        display(self.full_layout)

        # Also display the peak assignment again
        self.gc_measurements = [gc for gc in self.experiments_dict[self.experiments_dropdown.value].measurements if gc.measurement_type == 'GC measurement']
        self.peak_assignment = PeakAssigner.from_gc_measurement(self.gc_measurements, self.species_tags.value)
        self.peak_assignment.assign_peaks()
    
    def species_tags_input_handler(self,_):
        # If species are changed redo the ouput of widget one
        self.peak_assignment.modify_dropdown_options( self.species_tags.value )

    def do_postprocessing(self,_):
        fe_calculator = FaradayEfficiencyCalculator(experiment=self.experiments_dict[self.experiments_dropdown.value],
                                                    electrode_surface_area=self.electrode_surface_area,
                                                    mean_radius=self.mean_radius.value)

        faraday_efficiencies = []

        for gc_measurement, assigned_peak_areas_dict in zip(self.gc_measurements, self.peak_assignment._assignment_dicts):
            faraday_efficiencies.append( fe_calculator.calculate_faraday_efficiencies(gc_measurement=gc_measurement,assigned_peak_areas_dict=assigned_peak_areas_dict) )

        # Show them as text -->
        #print(faraday_efficiencies)


    def choose_experiment(self,dataset) -> None:
        
        self.experiments_dict     = Dataset(**dataset.__dict__).enumerate('experiments')

        self.experiments_dropdown = widgets.Dropdown(options=[(str(obj.id),idx) for idx,obj in self.experiments_dict.items()],
                                                    description="Choose experiment:",
                                                    layout=widgets.Layout(width='auto'),
                                                    style={'description_width': 'auto'})

        self.species_tags         = widgets.TagsInput(allow_duplicates=False,
                                                      value=['Hydrogen', 'Carbon monoxide', 'Carbon dioxide', 'Methane', 'Ethene', 'Ethane'])

        self.mean_radius          = widgets.IntSlider(value=10,  # Initial value
                                                      min=0,    # Minimum value
                                                      max=20,   # Maximum value
                                                      step=1,   # Step size
                                                      description='Mean radius:')

        self.explanation_label    = widgets.HTML(value='The mass flow at the time of the GC measurement is determined by matching the time of the gc measurement\
                                                        with the corresponding times of the mass flow measurements.<br>Errors in the mass flows due to strong fluctuations\
                                                        are minimized by calculating the mean by averaging over a certain number (=radius) of measuring points before and\
                                                        after the time of the GC measurement.<br> The radius has to be specified in accordance with the strength of fluctuations.')


        # Handle switch of experiment
        self.experiments_dropdown.observe(self.choose_experiment_input_handler,names="value")
        self.species_tags.observe(self.species_tags_input_handler,names="value")

        # Common variables
        self.electrode_surface_area = 1

        # Widgets
        widgets0  = widgets.HBox([self.experiments_dropdown])
        widgets1  = widgets.VBox([widgets.Label(value='Species in GC analysis:'), self.species_tags])
        widgets2  = widgets.VBox([self.explanation_label,self.mean_radius])


        # Combine the layout
        self.full_layout = widgets.VBox([widgets0,widgets1])

        # Display the layout
        display(self.full_layout)

        # Execute the peak assignment for the initial experiment value
        self.choose_experiment_input_handler(None)

        # Choose mean radius 
        display(widgets2)

        # If assignment is saved execute the post processing
        #self.do_postprocessing(None)




In [38]:
ww = widget2()
ww.choose_experiment(dataset)

VBox(children=(HBox(children=(Dropdown(description='Choose experiment:', layout=Layout(width='auto'), options=…

VBox(children=(HBox(children=(VBox(children=(Label(value='Measurement number 0', layout=Layout(height='30px', …

VBox(children=(HTML(value='The mass flow at the time of the GC measurement is determined by matching the time …

In [16]:

mean_faraday_efficiency = pd.concat(faraday_efficiencies).groupby(level=0).mean()
print(mean_faraday_efficiency)

for species_data in experiment.species_data:
    if species_data.species in mean_faraday_efficiency.index:
        faraday_efficiency = mean_faraday_efficiency.loc[species_data.species].values
        species_data.faraday_efficiency = lib.Data(quantity= 'Faraday efficiency', values = faraday_efficiency.tolist(), unit = '%')

  (self._volumetric_fractions_df.loc["Hydrogen"][0] / 100)
  - (self._volumetric_fractions_df.loc["Hydrogen"][0] / 100)
  + (self._volumetric_fractions_df.loc["Carbon monoxide"][0])
  self._volumetric_fractions_df.loc["Carbon monoxide"][0]
  (self._volumetric_fractions_df.loc["Hydrogen"][0] / 100)
  - (self._volumetric_fractions_df.loc["Hydrogen"][0] / 100)
  + (self._volumetric_fractions_df.loc["Carbon monoxide"][0])
  self._volumetric_fractions_df.loc["Carbon monoxide"][0]


[                Faraday_efficiency
 Hydrogen                  7.370217
 Carbon monoxide          66.702143
 Carbon dioxide            0.312773
 Methane                   3.189273
 Ethene                    6.179319
 Ethane                   13.929901,
                 Faraday_efficiency
 Carbon dioxide             5.66519
 Carbon monoxide           0.669252
 Ethene                   26.526025
 Hydrogen                   20.3274
 Methane                   5.363712]

In [None]:
dataset.experiments.append(experiment)

#with open(w.dataset_dropdown.value, "w") as f: f.write(dataset.json())