# Workflow for the CRC1333 project B07 - Technical Chemistry

**Import necessary packages.**

In [80]:
# import matplotlib.pyplot as plt
# import numpy as np
# import networkx as nx
# import pandas as pd

from datetime import datetime
from sdRDM import DataModel
from modules import GCParser
from modules import GstaticParser
from modules import MFMParser
from modules import Calculator
from modules import get_volumetric_flow_mean
from modules import get_initial_time_and_current
from modules import assign_peaks
from pathlib import Path
# from DEXPI2sdRDM import DEXPI2sdRDM

**Load data model from git or markdown file**

In [81]:
# lib = DataModel.from_markdown('specifications/datamodel_b07_tc.md')
# lib = DataModel.from_git(url='https://github.com/FAIRChemistry/datamodel_b07_tc.git')

**Set path to current working directory and available datasets.**

In [82]:
# Get the current working directory
current_directory = Path.cwd()

# Set the path for the datasets
path_to_dataset = current_directory / 'datasets'

In [83]:
# List all files in the directory
files = path_to_dataset.iterdir()

# Filter the files with .json extension
json_files = {index:file for index, file in enumerate(files) if file.suffix == ".json"}

# Print the JSON file names
for index, file in json_files.items():
    print(f"{index}: {file.name}")

0: b07.json


**Choose dataset to parse.**

In [84]:
index_dataset = 0

**Load dataset for project B07**

In [85]:
dataset, lib = DataModel.parse(json_files[index_dataset])

**Instantiate an experiment object which holds all the information about one single experiment.**

In [86]:
experiment = lib.Experiment()
dataset.experiments.append(experiment)

In [87]:
# print(dataset.json())

### **Parsing experimental data and metadata from various output files.**

### gstatic

**Create an instance of the ``GstaticParser`` to parse Gamry output files and show available files in the selected directory.**

In [88]:
gstaticparser = GstaticParser(current_directory / 'data')
dict_of_gstatic_files = gstaticparser.enumerate_available_files()
for index, gstatic_file in dict_of_gstatic_files.items():
    print(f"{index}: {gstatic_file}")
# available_files = gstaticparser.available_files
# print(available_files)

0: GSTATIC(Potentiostat)


**Chose specific file.**

In [89]:
gstatic_file = dict_of_gstatic_files[0]
gstatic_file

'GSTATIC(Potentiostat)'

**Extract the metadata from it using the ``GstaticParser`` and load into the data model.**

In [90]:
gstatic_metadata_df, gstatic_metadata = gstaticparser.extract_metadata(gstatic_file)
potentiometric_measurement = lib.Measurement(measurement_type=lib.enums.MeasurementType.POTENTIOSTATIC, metadata=gstatic_metadata)
experiment.measurements = [potentiometric_measurement]
gstatic_metadata_df

Unnamed: 0,Parameter,Data_type,Value,Description
0,PSTAT,PSTAT,REF3000-19129,Potentiostat
1,IINIT,QUANT,-2.00000E+002,Initial I (mA/cm^2)
2,TINIT,QUANT,3.60000E+003,Initial Time (s)
3,IFINAL,QUANT,-2.00000E+002,Final I (mA/cm^2)
4,TFINAL,QUANT,0.00000E+000,Final Time (s)
5,SAMPLETIME,QUANT,1.00000E+000,Sample Period (s)
6,AREA,QUANT,1.00000E+000,Sample Area (cm^2)
7,DENSITY,QUANT,7.87000E+000,Density (g/cm^3)
8,EQUIV,QUANT,2.79200E+001,Equiv. Wt
9,IRCOMP,TOGGLE,T,IR Comp


### mfm

**Create an instance of the ``MFMParser`` to parse MFM output files and show available files in the selected directory.**

In [91]:
mfmparser = MFMParser(current_directory / 'data')
dict_of_mfm_files = mfmparser.enumerate_available_files()
for index, mfmfile in dict_of_mfm_files.items():
    print(f"{index}: {mfmfile}")

0: Bench-2h-GSS_D9-pH70-1303-C4-2103-1_200_50c_24h
1: data
2: PX409-USBH
3: test_data_mfm


**Chose specific file**

In [92]:
mfm_file = dict_of_mfm_files[3]
mfm_file

'test_data_mfm'

**Extract the experimental data from it using the ``MFMParser`` and load into the data model.**

In [93]:
mfm_experimental_data_df, mfm_experimental_data = mfmparser.extract_exp_data(mfm_file)
mfm = lib.Measurement(
            measurement_type=lib.enums.MeasurementType.MFM.value,
            experimental_data=[value for value in mfm_experimental_data.values()],
        )
experiment.measurements.append(mfm)
mfm_experimental_data_df

Unnamed: 0,Datetime,Time,Signal,Flow_rate
0,2023-03-21 12:14:06,7002,2337,3.651953
1,2023-03-21 12:14:08,7004,2333,3.645957
2,2023-03-21 12:14:10,7006,2340,3.656559
3,2023-03-21 12:14:12,7008,2337,3.651343
4,2023-03-21 12:14:14,7010,2345,3.664264
...,...,...,...,...
74,2023-03-21 12:16:34,7150,2330,3.641395
75,2023-03-21 12:16:36,7152,2329,3.638808
76,2023-03-21 12:16:38,7154,2331,3.641589
77,2023-03-21 12:16:40,7156,2327,3.636026


### gc

**Create an instance of the ``GCParser`` to parse GC output files and show available files in the selected directory.**

In [94]:
gcparser = GCParser(current_directory / 'data')
dict_of_gc_files = gcparser.enumerate_available_files()
for index, gc_file in dict_of_gc_files.items():
    print(f"{index}: {gc_file}")

0: report00
1: REPORT01


**Chose file that contains the metadata**

In [95]:
gc_metadata_file = dict_of_gc_files[0]

**Chose file that contains the experimental data**

In [96]:
gc_experimental_data_file = dict_of_gc_files[1]

**Extract the metadata and experimental data from them using the ``GCParser`` and load into the data model.**

In [97]:
gc_metadata_df, gc_metadata= gcparser.extract_metadata(gc_metadata_file)
gc_experimental_data_df, gc_experimental_data = gcparser.extract_exp_data(gc_experimental_data_file)
gc = lib.Measurement(
    measurement_type=lib.enums.MeasurementType.GC.value,
    metadata=[value for value in gc_metadata.values()],
    experimental_data=[value for value in gc_experimental_data.values()]
)
experiment.measurements.append(gc)
gc_metadata_df

Unnamed: 0,parameter,value,description
0,Sample Name,,
1,Sample Info,,
2,Data File,D:\GC\Kurz\D9-pH70-1303-C4-2103-1\JH-1H 2023-0...,NV-F0201.D
3,Acq. Instrument,Instrument 1,
4,Analysis Method,D:\GC\Kurz\D9-pH70-1303-C4-2103-1\JH-1H 2023-0...,JH_GASPRODUKTE_30MIN.M
5,Method Info,,
6,Results Created,21.03.2023 12:44:58,
7,Results Created by,MS,
8,Acq. Method,JH_GASPRODUKTE_30MIN.M,
9,Injection Date,"21-Mar-23, 12:14:56",


In [98]:
gc_experimental_data_df

Unnamed: 0,Peak_number,Retention_time,Signal,Peak_type,Peak_area,Peak_height,Peak_area_percentage
0,1,1.734678,1,BBAN,61.917892,12.646031,0.071954
1,2,2.905288,1,BBA,70992.4375,4047.07666,82.499118
2,3,3.420898,2,BV,206.927628,56.43293,0.240467
3,4,3.652101,2,VB,59.064323,20.202883,0.068638
4,5,6.014212,2,BB,6365.441895,1324.453613,7.397173
5,6,13.005888,1,BV,1924.299561,50.554913,2.236196
6,7,14.186999,2,BB,183.061951,59.090946,0.212733
7,8,15.824186,1,VV,2871.221191,20.406471,3.336598
8,9,17.882446,1,VV,2126.794922,16.856892,2.471513
9,10,20.661367,1,VB,1031.903687,8.965669,1.199158


**Print current state of the data model**

In [99]:
print(experiment.json())

{
  "id": "experiment0",
  "measurements": [
    {
      "id": "measurement0",
      "measurement_type": "Potentiostatic Measurement",
      "metadata": [
        {
          "id": "metadata164",
          "parameter": "PSTAT",
          "value": "REF3000-19129",
          "data_type": "PSTAT",
          "description": "Potentiostat"
        },
        {
          "id": "metadata165",
          "parameter": "IINIT",
          "value": "-2.00000E+002",
          "data_type": "QUANT",
          "description": "Initial I (mA/cm^2)"
        },
        {
          "id": "metadata166",
          "parameter": "TINIT",
          "value": "3.60000E+003",
          "data_type": "QUANT",
          "description": "Initial Time (s)"
        },
        {
          "id": "metadata167",
          "parameter": "IFINAL",
          "value": "-2.00000E+002",
          "data_type": "QUANT",
          "description": "Final I (mA/cm^2)"
        },
        {
          "id": "metadata168",
          "parameter

**Assign peak areas to species.** \
The peak areas recorded by the GC have to be matched with the correct species. The individial ``Area`` is selected by its corresponding ``Peak_Number``. It is possible that the same species is accountable for multiple peaks, i.d. multiple peaks are assigned to the same species.


In [100]:
gc_experimental_data_df

Unnamed: 0,Peak_number,Retention_time,Signal,Peak_type,Peak_area,Peak_height,Peak_area_percentage
0,1,1.734678,1,BBAN,61.917892,12.646031,0.071954
1,2,2.905288,1,BBA,70992.4375,4047.07666,82.499118
2,3,3.420898,2,BV,206.927628,56.43293,0.240467
3,4,3.652101,2,VB,59.064323,20.202883,0.068638
4,5,6.014212,2,BB,6365.441895,1324.453613,7.397173
5,6,13.005888,1,BV,1924.299561,50.554913,2.236196
6,7,14.186999,2,BB,183.061951,59.090946,0.212733
7,8,15.824186,1,VV,2871.221191,20.406471,3.336598
8,9,17.882446,1,VV,2126.794922,16.856892,2.471513
9,10,20.661367,1,VB,1031.903687,8.965669,1.199158


In [101]:
assign_peak_dict={
    'H2': [1],
    'CO2': [2],
    'CO': [6],
    'CH4': [3],
    # 'C2H4': [5],
    # 'C2H6': [4],
}
peak_area_dict = assign_peaks(dataset, assign_peak_dict)
peak_area_dict

{'H2': 61.9178924560547,
 'CO2': 70992.4375,
 'CO': 1924.29956054688,
 'CH4': 206.927627563477}

**Set calibration input values and import into the data model.** \
To determine the concentrations of the individual species, a calibration has to be performed in advance to match the individual values for ``Area`` with their corresponding concentrations.

In [102]:
calibration_input_dict={
    'H2':[
        [71,153,330],
        [5,10,20],
        lib.enums.Species.HYDROGEN
    ],
    'CO':[
        [797,1328,7223],
        [0.5,1,5],
        lib.enums.Species.CARBONMONOXIDE
    ],
    'CO2': [
        [0,38653],
        [0,50],
        lib.enums.Species.CARBONDIOXIDE
    ],
    'CH4':[
        [5727,11991],
        [5,10],
        lib.enums.Species.METHANE
    ],
    'C2H4':[
        [1122,4864,7297],
        [0.5,2,3],
        lib.enums.Species.ETHENE
    ],
    # 'C2H6':[
    #     [0,12168],
    #     [0,5],
    #     lib.enums.Species.ETHANE
    # ],
}

In [103]:
calibration_list = []
for value in calibration_input_dict.values():
    calibration_list.append(
        lib.Calibration(
            peak_area=[
                lib.Data(  
                    values=value[0],
                    unit=lib.enums.Unit.NONE
                )
            ],
            concentration=[
                lib.Data(  
                    values=value[1],
                    unit=lib.enums.Unit.PERCENTAGE
                )
            ],
            species=value[2]
        )
    )


In [104]:
calculation = lib.Calculation()
calculation.calibrations = calibration_list
experiment.calculations = calculation

In [105]:
# print(experiment.json())

{
  "id": "experiment0",
  "measurements": [
    {
      "id": "measurement0",
      "measurement_type": "Potentiostatic Measurement",
      "metadata": [
        {
          "id": "metadata164",
          "parameter": "PSTAT",
          "value": "REF3000-19129",
          "data_type": "PSTAT",
          "description": "Potentiostat"
        },
        {
          "id": "metadata165",
          "parameter": "IINIT",
          "value": "-2.00000E+002",
          "data_type": "QUANT",
          "description": "Initial I (mA/cm^2)"
        },
        {
          "id": "metadata166",
          "parameter": "TINIT",
          "value": "3.60000E+003",
          "data_type": "QUANT",
          "description": "Initial Time (s)"
        },
        {
          "id": "metadata167",
          "parameter": "IFINAL",
          "value": "-2.00000E+002",
          "data_type": "QUANT",
          "description": "Final I (mA/cm^2)"
        },
        {
          "id": "metadata168",
          "parameter

Instantiate the ``Calculator`` module.

In [106]:
calculator=Calculator(path_to_dataset=path_to_dataset)

Calibrate using the ``calibrate`` method of the ``Calculator`` module.

In [107]:
calibration_result_df, experiment=calculator.calibrate(experiment)
calibration_result_df

Unnamed: 0,Slope,Intercept,Coefficient_of_determination
H2,0.057688,1.013564,0.999656
CO,0.000691,0.012657,0.99927
CO2,0.001294,0.0,1.0
CH4,0.000798,0.42864,1.0
C2H6,0.000405,0.042251,0.999953


Calculate volume fractions using the ``calculate_volume_fractions`` method of the ``Calculator`` module.

In [108]:
volumetric_fractions_df = calculator.calculate_volumetric_fractions(peak_area_dict=peak_area_dict, calibration_result_df=calibration_result_df)
volumetric_fractions_df

Unnamed: 0,Volumetric_fraction
H2,4.585501
CO,1.342875
CO2,91.833024
CH4,0.593812
C2H6,


Calculate the conversion factors of the individual species using the ``calculate_conversion_factors`` method of the ``Calculator`` module and the correction factors.

In [109]:
correction_factors_dict= {
    'H2':1.01,
    'CO':0.74,
    'CO2':1.00,
    'CH4':0.76,
    # 'C2H4':,
    # 'C2H6':,
}

In [110]:
conversion_factor = calculator.calculate_conversion_factor(
    volumetric_fractions_df=volumetric_fractions_df, correction_factors_dict=correction_factors_dict
)
conversion_factor

0.9698176246539418

**Get mean flow rate at the time of the gc measurement.**

In [111]:
volumetric_flow_mean = get_volumetric_flow_mean(experiment, 10)
volumetric_flow_mean

3.79809261904762

In [112]:
real_volumetric_flow = volumetric_flow_mean*conversion_factor
real_volumetric_flow

3.6834571620204315

In [113]:
# vol_flow_real= calculator.calc_real_vol_flow(conversion_factor = f_conv, volumetric_flow_measured_mean = vol_flow_mean )
# vol_flow_real

In [114]:
volumetric_flow_fractions_df=calculator.calculate_volumetric_flow_fractions(
    real_volumetric_flow=real_volumetric_flow, volumetric_fractions_df=volumetric_fractions_df
)
volumetric_flow_fractions_df

Unnamed: 0,Volumetric_flow_fraction
H2,16.890495
CO,4.946423
CO2,338.26301
CH4,2.187281
C2H6,


In [115]:
V_m=22.41396954 # * 10^(-3) m^3 / mol
rename={'Volumetric_flow_fraction':'Material_flow'}
material_flow_df=volumetric_flow_fractions_df.divide(V_m).rename(columns=rename) 
material_flow_df

Unnamed: 0,Material_flow
H2,0.75357
CO,0.220685
CO2,15.091615
CH4,0.097586
C2H6,


**Get initial current in mA and initial time in s.**

In [116]:
initial_current, initial_time = get_initial_time_and_current(experiment)
initial_current, initial_time

(-200.0, 3600.0)

In [117]:
electrode_surface_area = 1.0
theoretical_material_flow_df=calculator.calculate_theoretical_material_flow(
    initial_current=initial_current, initial_time=initial_time, electrode_surface_area=electrode_surface_area
)
theoretical_material_flow_df

Unnamed: 0,Theoretical_material_flow
H2,0.000124
CO,0.000124
CO2,0.000124
CH4,0.000497
C2H4,0.000746
C2H6,0.000995


In [118]:
for species, material_flow in material_flow_df['Material_flow'].items():
    print(species, material_flow)


H2 0.7535699963626504
CO 0.22068482592404684
CO2 15.091615485592387
CH4 0.09758561173354853
C2H6 nan


**Calculate Faraday efficiency**

In [127]:
faraday_efficiency_df = material_flow_df['Material_flow'] / theoretical_material_flow_df['Theoretical_material_flow']
faraday_efficiency_df

C2H4              NaN
C2H6              NaN
CH4         196.15792
CO         1774.40406
CO2     121343.294338
H2        6059.037613
dtype: object

In [120]:
dataset.experiments.append(experiment)

In [121]:
with open(json_files[index_dataset], "w") as f:
  f.write(dataset.json())

In [122]:
# class DirectedGraph:
    
#     def __init__(self, name, connections):
#         self.name = name
#         self.connections = connections
#         self.connec_tuple = []
#         self.make_connectivity()

        
#     def make_connectivity(self):
#         if len(self.connections) == 0: 
#             return([])
#         for connection in self.connections:
#             self.connec_tuple.append((self.name, connection))

In [123]:
# mod_1 = DirectedGraph(input0, [tubing0])
# mod_2 = DirectedGraph(input1, [tubing1])
# mod_3 = DirectedGraph(tubing0, [valve0])
# mod_4 = DirectedGraph(tubing1, [valve0])
# mod_5 = DirectedGraph(valve0, [tubing2])
# mod_6 = DirectedGraph(fc, [tubing2])
# mod_7 = DirectedGraph(tubing2, [valve1])
# mod_8 = DirectedGraph(valve1, [tubing4])
# mod_9 = DirectedGraph(tubing4, [valve2])
# mod_10 = DirectedGraph(valve2, [tubing5])
# mod_11 = DirectedGraph(tubing5, [gc])
# mod_12 = DirectedGraph(tubing5, [fr])

# modules = [mod_1, mod_2, mod_3, mod_4, mod_5, mod_6, mod_7, mod_8, mod_9, mod_10, mod_11, mod_12]
# module_names = [flow_mod.name for flow_mod in modules]

In [124]:
# area = exp_df['Area']
# peak_area_dict = {
#     'co2': area.iloc[[1]].sum(),
#     'co': area.iloc[[5,6]].sum(),
#     'h2': area.iloc[[0]].sum(),
#     'ch4': area.iloc[[2,3]].sum(),
#     'c2h4': area.iloc[[4]].sum(),
#     # 'c2h6': area.iloc[[0,1]].sum()
# }
# peak_area_dict

In [125]:
# flowG = nx.DiGraph()
# flowG.add_nodes_from(module_names)
# for module in modules:
#     flowG.add_edges_from(module.connec_tuple)
# my_pos = nx.spring_layout(flowG, seed = 5) # this fixes the style of the graph, if it's ugly change the seed
# nx.draw(flowG, pos = my_pos, with_labels=True)