# Workflow for the CRC1333 project B07 - Technical Chemistry

**Import necessary packages.**

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np
# import networkx as nx
# import pandas as pd

from datetime import datetime
from sdRDM import DataModel
from modules import GCParser
from modules import GstaticParser
from modules import MFMParser
from modules import Calculator
from modules import get_volumetric_flow_mean
from modules import get_initial_time_and_current
from modules import assign_peaks
from pathlib import Path
# from DEXPI2sdRDM import DEXPI2sdRDM

**Choose dataset to parse.**

In [None]:
# Get the current working directory.
current_directory = Path.cwd()

In [None]:
# Set the path for the datasets.
path_to_datasets = current_directory / 'datasets'

In [None]:
# List all files in the directory.
files = path_to_datasets.iterdir()

# Filter for .json extensions.
json_files = {index:file for index, file in enumerate(files) if file.suffix == '.json'}

for index, file in json_files.items():
    print(f'{index}: {file.name}')

In [None]:
index_dataset = 0

**Load dataset for project B07**

In [None]:
dataset, lib = DataModel.parse(json_files[index_dataset])

In [None]:
# print(dataset.json())

**Instantiate an experiment object which holds all the information about one single experiment.**

In [None]:
experiment = lib.Experiment()
dataset.experiments.append(experiment)

**Load data model from git or markdown file**

In [None]:
# lib = DataModel.from_markdown('specifications/datamodel_b07_tc.md')
# lib = DataModel.from_git(url='https://github.com/FAIRChemistry/datamodel_b07_tc.git')

**Set path to current working directory and available datasets.**

In [None]:
# Set the path for the datasets
path_to_datasets = Path('F:\Doktorand\\03_Messungen\Rohdaten')

# names of the paths for the different types of measurement data
echem_path = path_to_datasets / '01_EChem'
gc_path = path_to_datasets / '02_GC'
mfm_path = path_to_datasets / '03_MFM'
hplc_path = path_to_datasets / '04_HPLC'
pressure_path = path_to_datasets / '05_Pressure'


In [None]:
print(echem_path)

## **Parsing experimental data and metadata from various output files.**

### **Choose files with raw data.**

### gstatic

In [None]:
# Filter the directory for further subdirectories
echem_directories = {index:directory for index, directory in enumerate(echem_path.iterdir())}

# Print the names of the subdirectories.
for index, directory in echem_directories.items():
    print(f"{index}: {directory.name}")

In [None]:
# Choose the desired subdirectory by its number.
number_of_desired_directory = 1
selected_directory = echem_directories[number_of_desired_directory]
print(directory)

In [None]:
# Provide name of the file of interest.
file_name = 'GSTATIC.DTA'
file_path = selected_directory / file_name
print(file_path)

**Create an instance of the ``GstaticParser`` to parse Gamry output files and show available files in the selected directory.**

In [None]:
gstaticparser = GstaticParser(current_directory / 'data')
dict_of_gstatic_files = gstaticparser.enumerate_available_files()
for index, gstatic_file in dict_of_gstatic_files.items():
    print(f"{index}: {gstatic_file}")
# available_files = gstaticparser.available_files
# print(available_files)

**Chose specific file.**

In [None]:
gstatic_file = dict_of_gstatic_files[0]
gstatic_file

**Extract the metadata from it using the ``GstaticParser`` and load into the data model.**

In [None]:
gstatic_metadata_df, gstatic_metadata = gstaticparser.extract_metadata(gstatic_file)
potentiometric_measurement = lib.Measurement(measurement_type=lib.enums.MeasurementType.POTENTIOSTATIC, metadata=gstatic_metadata)
experiment.measurements = [potentiometric_measurement]
gstatic_metadata_df

### Mass flow meter

**Create an instance of the ``MFMParser`` to parse MFM output files and show available files in the selected directory.**

In [None]:
mfmparser = MFMParser(current_directory / 'data')
dict_of_mfm_files = mfmparser.enumerate_available_files()
for index, mfmfile in dict_of_mfm_files.items():
    print(f"{index}: {mfmfile}")

**Chose specific file**

In [None]:
mfm_file = dict_of_mfm_files[4]
mfm_file

**Extract the experimental data from it using the ``MFMParser`` and load into the data model.**

In [None]:
mfm_experimental_data_df, mfm_experimental_data = mfmparser.extract_exp_data(mfm_file)
mfm = lib.Measurement(
            measurement_type=lib.enums.MeasurementType.MFM.value,
            experimental_data=[value for value in mfm_experimental_data.values()],
        )
experiment.measurements.append(mfm)

**Print first 10 rows of the mfm experimental data.**

In [None]:
truncated_mfm_experimental_data_df= mfm_experimental_data_df.truncate(after=10)
truncated_mfm_experimental_data_df

### Gas chromatography

**Create an instance of the ``GCParser`` to parse GC output files and show available files in the selected directory.**

In [None]:
gcparser = GCParser(current_directory / 'data')
dict_of_gc_files = gcparser.enumerate_available_files()
for index, gc_file in dict_of_gc_files.items():
    print(f"{index}: {gc_file}")

**Select gc file that contains the metadata**

In [None]:
gc_metadata_file = dict_of_gc_files[2]
gc_metadata_file

**Select file that contains the experimental data**

In [None]:
gc_experimental_data_file = dict_of_gc_files[3]
gc_experimental_data_file

**Extract the metadata and experimental data from them and load into the dataset.**

In [None]:
gc_metadata_df, gc_metadata= gcparser.extract_metadata(gc_metadata_file)
gc_experimental_data_df, gc_experimental_data = gcparser.extract_exp_data(gc_experimental_data_file)
gc = lib.Measurement(
    measurement_type=lib.enums.MeasurementType.GC.value,
    metadata=[value for value in gc_metadata.values()],
    experimental_data=[value for value in gc_experimental_data.values()]
)
experiment.measurements.append(gc)
gc_metadata_df

In [None]:
gc_experimental_data_df

**Print current state of the data model**

In [None]:
print(experiment.json())

**Assign peak areas to species.** \
The peak areas recorded by the GC have to be matched with the correct species. The individial ``Area`` is selected by its corresponding ``Peak_Number``. It is possible that the same species is accountable for multiple peaks, i.d. multiple peaks are assigned to the same species.


In [None]:
assign_peak_dict={
    'H2': [1],
    'CO2': [2],
    'CO': [6],
    'CH4': [3],
    # 'C2H4': [5],
    # 'C2H6': [4],
}
peak_area_dict = assign_peaks(dataset, assign_peak_dict)

for species, peak_area in peak_area_dict.items():
    print(f"{species}: {peak_area}")

**Set calibration input values and import into the data model.** \
To determine the concentrations of the individual species, a calibration has to be performed in advance to match the individual values for ``Area`` with their corresponding concentrations.

In [None]:
calibration_input_dict={
    'H2': [
        lib.enums.Species.HYDROGEN,
        [71,153,330],
        [5,10,20]
        ],
    'CO':[
        lib.enums.Species.CARBONMONOXIDE,
        [797,1328,7223],
        [0.5,1,5]
    ],
    'CO2': [
        lib.enums.Species.CARBONDIOXIDE,
        [0,38653],
        [0,50]
    ],
    'CH4':[
        lib.enums.Species.METHANE,
        [5727,11991],
        [5,10]
    ],
    # 'C2H4':[
    #     lib.enums.Species.ETHENE,
    #     [1122,4864,7297],
    #     [0.5,2,3]
    # ],
    # 'C2H6':[
    #     [0,12168],
    #     [0,5],
    #     lib.enums.Species.ETHANE
    # ],
}

**Instantiate the ``Calculator`` module.**

In [None]:
calculator=Calculator(path_to_dataset=path_to_dataset)

**Calibrate using the ``calibrate`` method of the ``Calculator`` module.**

In [None]:
for key, value in calibration_input_dict.items():
    print(value[0].value)

In [None]:
calibration_df, calibration_dict=calculator.calibrate(calibration_input_dict)
calibration_df
# for species, value in calibration_dict.items():
#     print(f"{species}: {value}")
#     # print(lib.Calibration(value))

In [None]:
calculation = lib.Analysis()
calculation.calibrations = [calibration for calibration in calibration_dict.values()]
experiment.analysis = calculation

**Print current state of the dataset**

In [None]:
print(experiment.json())

In [None]:
print(calibration_df.loc[species][1])

**Calculate volumetric fractions in %.**

In [None]:
volumetric_fractions_df = calculator.calculate_volumetric_fractions(peak_area_dict=peak_area_dict, calibration_df=calibration_df)
volumetric_fractions_df

**Set the correction factors.**

In [None]:
correction_factors_dict= {
    'H2':1.01,
    'CO':0.74,
    'CO2':1.00,
    'CH4':0.76,
    # 'C2H4':,
    # 'C2H6':,
}

**Calculate the conversion factor using the correction factors.**

In [None]:
conversion_factor = calculator.calculate_conversion_factor(
    volumetric_fractions_df=volumetric_fractions_df, correction_factors_dict=correction_factors_dict
)
conversion_factor

**Get mean flow rate in ml/min at the time of the gc measurement.**

In [None]:
volumetric_flow_mean = get_volumetric_flow_mean(experiment, 10)
volumetric_flow_mean

**Calculate the real volumetric flow in ml/min.**

In [None]:
real_volumetric_flow = volumetric_flow_mean*conversion_factor
real_volumetric_flow

In [None]:
# vol_flow_real= calculator.calculate_real_volumetric_flow(conversion_factor = conversion_factor, measured_volumetric_flow_mean = vol_flow_mean )
# vol_flow_real

**Calculate volumetric flow fractions in %.**

In [None]:
volumetric_flow_fractions_df=calculator.calculate_volumetric_flow_fractions(
    real_volumetric_flow=real_volumetric_flow, volumetric_fractions_df=volumetric_fractions_df
)
volumetric_flow_fractions_df

**Calculate material flow in mmol/min.**

In [None]:
material_flow_df = calculator.calcualte_material_flow(volumetric_flow_fractions_df=volumetric_flow_fractions_df)
material_flow_df

**Get initial current in mA and initial time in s.**

In [None]:
initial_current, initial_time = get_initial_time_and_current(experiment)
print(f'Initial current in mA: {initial_current}')
print(f'Initial time in s: {initial_time}')

**Calculate theoretical material flow in mmol/min.**

In [None]:
electrode_surface_area = 1.0 # cm^2
theoretical_material_flow_df=calculator.calculate_theoretical_material_flow(
    initial_current=initial_current, initial_time=initial_time, electrode_surface_area=electrode_surface_area
)
theoretical_material_flow_df

**Calculate Faraday efficiency and load into dataset.**

In [None]:
faraday_efficiency_df = material_flow_df['Material_flow'] / theoretical_material_flow_df['Theoretical_material_flow']
faraday_efficiency_df

In [None]:
dataset.experiments.append(experiment)

In [None]:
with open(json_files[index_dataset], "w") as f:
  f.write(dataset.json())

In [None]:
# class DirectedGraph:
    
#     def __init__(self, name, connections):
#         self.name = name
#         self.connections = connections
#         self.connec_tuple = []
#         self.make_connectivity()

        
#     def make_connectivity(self):
#         if len(self.connections) == 0: 
#             return([])
#         for connection in self.connections:
#             self.connec_tuple.append((self.name, connection))

In [None]:
# mod_1 = DirectedGraph(input0, [tubing0])
# mod_2 = DirectedGraph(input1, [tubing1])
# mod_3 = DirectedGraph(tubing0, [valve0])
# mod_4 = DirectedGraph(tubing1, [valve0])
# mod_5 = DirectedGraph(valve0, [tubing2])
# mod_6 = DirectedGraph(fc, [tubing2])
# mod_7 = DirectedGraph(tubing2, [valve1])
# mod_8 = DirectedGraph(valve1, [tubing4])
# mod_9 = DirectedGraph(tubing4, [valve2])
# mod_10 = DirectedGraph(valve2, [tubing5])
# mod_11 = DirectedGraph(tubing5, [gc])
# mod_12 = DirectedGraph(tubing5, [fr])

# modules = [mod_1, mod_2, mod_3, mod_4, mod_5, mod_6, mod_7, mod_8, mod_9, mod_10, mod_11, mod_12]
# module_names = [flow_mod.name for flow_mod in modules]

In [None]:
# area = exp_df['Area']
# peak_area_dict = {
#     'co2': area.iloc[[1]].sum(),
#     'co': area.iloc[[5,6]].sum(),
#     'h2': area.iloc[[0]].sum(),
#     'ch4': area.iloc[[2,3]].sum(),
#     'c2h4': area.iloc[[4]].sum(),
#     # 'c2h6': area.iloc[[0,1]].sum()
# }
# peak_area_dict

In [None]:
# flowG = nx.DiGraph()
# flowG.add_nodes_from(module_names)
# for module in modules:
#     flowG.add_edges_from(module.connec_tuple)
# my_pos = nx.spring_layout(flowG, seed = 5) # this fixes the style of the graph, if it's ugly change the seed
# nx.draw(flowG, pos = my_pos, with_labels=True)

# hallo