# Workflow for the CRC1333 project B07 - Technical Chemistry

**Import necessary packages.**

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
import sklearn
import pandas as pd
import pint_pandas
from datetime import datetime
from sdRDM import DataModel
from modules import GCMDParser
from modules import GCEDParser
from modules import GstaticParser
from modules import MFMParser
from modules import Calculator
from pathlib import Path
# from DEXPI2sdRDM import DEXPI2sdRDM

### Set up the data model and the correct path to the working directory.

**Load data model from markdown file.**

In [2]:
model = DataModel.from_markdown('specifications/datamodel_b07_tc.md')
# b07 = model.Dataset()

**Set path to current working directory.**

In [3]:
path_to_dataset = Path.cwd()

### Load metadata into the datamodel.

**Load P&ID file.**

In [4]:
# filename = "column"
# path = "./f'{filename}'"

**Load DEXPI conform P&ID file into the data model using the ``DEXPI2sdRDM`` module.**

In [5]:
# DEXPI2sdRDM(
# )

**Create an instance of the ``GstaticParser`` to parse Gamry output files and show available files in the selected directory.**

In [6]:
gstaticparser = GstaticParser(path_to_dataset / 'data')
dict_of_gstatic_files = gstaticparser.enumerate_available_files()
for index, gstatic_file in dict_of_gstatic_files.items():
    print(f"{index}: {gstatic_file}")
# available_files = gstaticparser.available_files
# print(available_files)

0: GSTATIC(Potentiostat)


**Chose specific file and extract the metadata from it using the ``GstaticParser``.**

In [7]:
gstatic_file = dict_of_gstatic_files[0]
gstatic_meta_df = gstaticparser.extract_metadata(gstatic_file)
gstatic_meta_df

Unnamed: 0,Abbreviation,Type,Quantity,Description_and_unit
0,TITLE,LABEL,Galvanostatic Scan,Test Identifier
1,DATE,LABEL,7.2.2023,Date
2,TIME,LABEL,9:14:53,Time
3,NOTES,NOTES,1,Notes...
4,,,,
5,PSTAT,PSTAT,REF3000-19129,Potentiostat
6,IINIT,QUANT,-2.00000E+002,Initial I (mA/cm^2)
7,TINIT,QUANT,3.60000E+003,Initial Time (s)
8,IFINAL,QUANT,-2.00000E+002,Final I (mA/cm^2)
9,TFINAL,QUANT,0.00000E+000,Final Time (s)


**Get initial current in mA and initial time in s.**

In [8]:
I_init =np.abs(float(gstatic_meta_df.loc[gstatic_meta_df["Abbreviation"] == "IINIT", "Quantity"].values[0]))
T_init = float(gstatic_meta_df.loc[gstatic_meta_df["Abbreviation"] == "TINIT", "Quantity"].values[0])
I_init, T_init

(200.0, 3600.0)

**Create an instance of the ``MFMParser`` to parse MFM output files and show available files in the selected directory.**

In [9]:
mfmparser = MFMParser(path_to_dataset / 'data')
dict_of_mfm_files = mfmparser.enumerate_available_files()
for index, mfmfile in dict_of_mfm_files.items():
    print(f"{index}: {mfmfile}")

0: Bench-2h-GSS_D9-pH70-1303-C4-2103-1_200_50c_24h
1: PX409-USBH


**Chose specific file and extract the metadata from it using the ``MFMParser``.**

In [10]:
mfm_file = dict_of_mfm_files[0]

In [11]:
mfm_exp_data_df = mfmparser.extract_exp_data(mfm_file)
mfm_exp_data_df

Unnamed: 0,datetime,time,signal,flow
0,2023-03-21 10:17:24,0.0,3039.0,4.747912
1,2023-03-21 10:17:26,2.0,3251.0,5.079678
2,2023-03-21 10:17:28,4.0,3265.0,5.101816
3,2023-03-21 10:17:30,6.0,3280.0,5.125732
4,2023-03-21 10:17:32,8.0,3290.0,5.140502
...,...,...,...,...
256964,2023-01-31 09:15:40,513928.0,3318.0,5.184328
256965,2023-01-31 09:15:40,513930.0,3318.0,5.184328
256966,2023-01-31 09:15:40,513932.0,3318.0,5.184328
256967,2023-01-31 09:15:40,513934.0,3318.0,5.184328


In [12]:
timestamp_object = mfm_exp_data_df.at[100000,'datetime'].to_pydatetime()
timestamp_object
# datetime.fromtimestamp(timestamp_object)

datetime.datetime(2023, 1, 30, 20, 47, 52)

In [13]:
gcmdparser = GCMDParser(path_to_dataset / 'data')
dict_of_gcmd_files = gcmdparser.enumerate_available_files()
for index, gcmd_file in dict_of_gcmd_files.items():
    print(f"{index}: {gcmd_file}")

0: report00
1: REPORT01


In [14]:
gcmd_file = dict_of_gcmd_files[0]
# print(gcmdparser._available_files[gcm]
# )
metadata_df = gcmdparser.extract_metadata(gcmd_file)
metadata_df

Unnamed: 0,column_1,column_2,column_3
0,Sample Name,,
1,Sample Info,,
2,Data File,D:\GC\Kurz\D9-pH70-1303-C4-2103-1\JH-1H 2023-0...,NV-F0201.D
3,Acq. Instrument,Instrument 1,
4,Analysis Method,D:\GC\Kurz\D9-pH70-1303-C4-2103-1\JH-1H 2023-0...,JH_GASPRODUKTE_30MIN.M
5,Method Info,,
6,Results Created,21.03.2023 12:44:58,
7,Results Created by,MS,
8,Acq. Method,JH_GASPRODUKTE_30MIN.M,
9,Injection Date,"21-Mar-23, 12:14:56",


In [15]:
inj_date = metadata_df.at[9, 'column_2']
inj_date

'21-Mar-23, 12:14:56'

In [16]:
inj_date_datetime = datetime.strptime(inj_date, '%d-%b-%y, %H:%M:%S')
inj_date_datetime

datetime.datetime(2023, 3, 21, 12, 14, 56)

In [17]:
vol_flow = []
m = 10
for i, time in enumerate(mfm_exp_data_df['datetime']):
    if time.to_pydatetime() == inj_date_datetime:
        for j in range(i-m,i+m+1):
            vol_flow.append(mfm_exp_data_df.at[j,'flow'])
vol_flow_mean = sum(vol_flow) / (m*2+1)
vol_flow_mean

3.79809261904762

**Create an instance of the ``GCEDParser`` to parse GC experimental data output files and show available files in the selected directory.**

In [18]:
gcedparser = GCEDParser(path_to_dataset / 'data')
dict_of_gced_files = gcedparser.enumerate_available_files()
for index, gced_file in dict_of_gced_files.items():
    print(f"{index}: {gced_file}")
# available_files = parser.available_files
# print(available_files)

0: report00
1: REPORT01


**Chose specific file and extract the metadata from it using the ``GCParser``.**

In [19]:
gcedfile = dict_of_gced_files[1]
exp_df = gcedparser.extract_exp_data(gced_file)
exp_df

Unnamed: 0,Peak_Number,Retention_Time,Signal,Peak_Type,Area,Height,Area_Percentage
0,1,1.734678,1,BBAN,61.917892,12.646031,0.071954
1,2,2.905288,1,BBA,70992.4375,4047.07666,82.499118
2,3,3.420898,2,BV,206.927628,56.43293,0.240467
3,4,3.652101,2,VB,59.064323,20.202883,0.068638
4,5,6.014212,2,BB,6365.441895,1324.453613,7.397173
5,6,13.005888,1,BV,1924.299561,50.554913,2.236196
6,7,14.186999,2,BB,183.061951,59.090946,0.212733
7,8,15.824186,1,VV,2871.221191,20.406471,3.336598
8,9,17.882446,1,VV,2126.794922,16.856892,2.471513
9,10,20.661367,1,VB,1031.903687,8.965669,1.199158


**Assign peak areas to species.** \
The peak areas recorded by the GC have to be matched with the correct species. The individial ``Area`` is selected by its corresponding ``Peak_Number``. It is possible that the same species is accountable for multiple peaks, i.d. multiple peaks are assigned to the same species.


In [20]:
assign_peak_dict={
    'CO2': [2],
    'CO': [6],
    'H2': [1],
    'CH4': [3],
    'C2H4': [5],
    # 'C2H6': [4],
}
peak_area_dict = {}
for key, value in assign_peak_dict.items():
    peak_area_dict[key]=exp_df.query('Peak_Number==@value')['Area'].sum()
peak_area_dict

{'CO2': 70992.4375,
 'CO': 1924.29956054688,
 'H2': 61.9178924560547,
 'CH4': 206.927627563477,
 'C2H4': 6365.44189453125}

**Set calibration input values and import into the data model.** \
To determine the concentrations of the individual species, a calibration has to be performed in advance to match the individual values for ``Area`` with their corresponding concentrations.

In [None]:
cali_input_dict={
    'CO2': [[0,38653],[0,50]],
    'CO':[[797,1328,7223],[0.5,1,5]],
    'H2':[[71,153,330], [5,10,20]],
    'CH4':[[5727,11991], [5,10]],
    'C2H4':[[1122,4864,7297], [0.5,2,3]],
    'C2H6':[[0,12168], [0,5]],
}

In [21]:
cali_dict={}
for key, value in cali_input_dict.items():
    cali_dict[key]=model.Calibration(
        peak_area=[
            model.Data(
                values=value[0],
                # unit=model.
                unit=model.enums.Unit.VOLFRACTION
            )
        ],
        concentration=[
            model.Data(  
                values=value[1],
                unit=model.enums.Unit.VOLFRACTION
            )
        ]    
    )

ValidationError: 1 validation error for Data
unit
  List element of type '<class 'str'>' cannot be added. Expected type '<enum 'Unit'>' (type=type_error)

In [None]:
calculation=model.Calculation(calibrations= list(cali_dict.values()))
experiment_1=model.Experiment(calculations=calculation)
b07=model.Dataset(experiments=[experiment_1])
# b07.add_experiment_to_experiments(experiment_1)
general_information=model.GeneralInformation(title='fotze')
b07.general_information=general_information
b07.__dict__
# b07

{'id': 'dataset0',
 'general_information': GeneralInformation(id='generalinformation0', title='fotze', description=None, authors=[]),
 'experiments': [Experiment(id='experiment0', plant_setup=None, calculations=Calculation(id='calculation0', calibrations=[Calibration(id='calibration0', peak_area=[Data(id='data0', values=[0.0, 38653.0], unit=<Unit.VOLFRACTION: 'vol%'>)], concentration=[Data(id='data1', values=[0.0, 50.0], unit=<Unit.VOLFRACTION: 'vol%'>)], slope=None, intercept=None, coefficient_of_determination=None), Calibration(id='calibration1', peak_area=[Data(id='data2', values=[797.0, 1328.0, 7223.0], unit=<Unit.VOLFRACTION: 'vol%'>)], concentration=[Data(id='data3', values=[0.5, 1.0, 5.0], unit=<Unit.VOLFRACTION: 'vol%'>)], slope=None, intercept=None, coefficient_of_determination=None), Calibration(id='calibration2', peak_area=[Data(id='data4', values=[71.0, 153.0, 330.0], unit=<Unit.VOLFRACTION: 'vol%'>)], concentration=[Data(id='data5', values=[5.0, 10.0, 20.0], unit=<Unit.VOL

In [None]:
b07.general_information.title

'fotze'

In [None]:
# model.Datamodel.visualize_tree(

In [None]:
model.Dataset.visualize_tree()

Dataset
├── id
├── general_information
│   └── GeneralInformation
│       ├── id
│       ├── title
│       ├── description
│       └── authors
│           └── Author
│               ├── id
│               ├── name
│               └── affiliation
└── experiments
    └── Experiment
        ├── id
        ├── plant_setup
        │   └── PlantSetup
        │       ├── id
        │       ├── devices
        │       │   └── Device
        │       │       ├── id
        │       │       ├── manufacturer
        │       │       ├── device_type
        │       │       ├── series
        │       │       └── on_off
        │       ├── tubing
        │       │   └── Tubing
        │       │       ├── id
        │       │       ├── material
        │       │       ├── inner_diameter
        │       │       ├── outer_diameter
        │       │       ├── length
        │       │       └── insulation
        │       │           └── Insulation
        │       │               ├── id
        │       │    

Instantiate the ``Calculator`` module.

In [None]:
calculator=Calculator(path_to_dataset=path_to_dataset)

Calibrate using the ``calibrate`` method of the ``Calculator`` module.

In [None]:
cali_result_df=calculator.calibrate(calibration_input_dict=cali_input_dict)
cali_result_df

Unnamed: 0,slope,intercept,coef_det
CO2,0.001294,0.0,1.0
CO,0.000691,0.012657,0.99927
H2,0.057688,1.013564,0.999656
CH4,0.000798,0.42864,1.0
C2H4,0.000405,0.042251,0.999953
C2H6,0.000411,0.0,1.0


Calculate volume fractions using the ``calculate_volume_fractions`` method of the ``Calculator`` module.

In [None]:
vol_frac_df = calculator.calculate_volume_fractions(peak_area_dict=peak_area_dict, calibration_result_df=cali_result_df)
vol_frac_df

Unnamed: 0,volume_fraction
CO2,91.833024
CO,1.342875
H2,4.585501
CH4,0.593812
C2H4,2.617203
C2H6,


Calculate the conversion factors of the individual species using the ``calculate_conversion_factors`` method of the ``Calculator`` module and the correction factors.

In [None]:
f_corr_dict= {
    'H2':1.01,
    'CO':0.74,
    'CO2':1.00,
    'CH4':0.76,
    # 'C2H4':,
    # 'C2H6':,
}

In [None]:
vol_frac_df.loc['H2'][0]

4.585500577795688

In [None]:
f_conv = calculator.calculate_conversion_factor(volume_fractions=vol_frac_df, correction_factors=f_corr_dict)
f_conv

0.9698176246539418

In [None]:
vol_flow_r_df = calculator.calculate_vol_flow_r(conversion_factor = f_conv, volumetric_flow_measured = mfm_exp_data_df )
vol_flow_r_df

TypeError: cannot perform __mul__ with this index type: DatetimeArray

In [None]:
# class DirectedGraph:
    
#     def __init__(self, name, connections):
#         self.name = name
#         self.connections = connections
#         self.connec_tuple = []
#         self.make_connectivity()

        
#     def make_connectivity(self):
#         if len(self.connections) == 0: 
#             return([])
#         for connection in self.connections:
#             self.connec_tuple.append((self.name, connection))

In [None]:
# mod_1 = DirectedGraph(input0, [tubing0])
# mod_2 = DirectedGraph(input1, [tubing1])
# mod_3 = DirectedGraph(tubing0, [valve0])
# mod_4 = DirectedGraph(tubing1, [valve0])
# mod_5 = DirectedGraph(valve0, [tubing2])
# mod_6 = DirectedGraph(fc, [tubing2])
# mod_7 = DirectedGraph(tubing2, [valve1])
# mod_8 = DirectedGraph(valve1, [tubing4])
# mod_9 = DirectedGraph(tubing4, [valve2])
# mod_10 = DirectedGraph(valve2, [tubing5])
# mod_11 = DirectedGraph(tubing5, [gc])
# mod_12 = DirectedGraph(tubing5, [fr])

# modules = [mod_1, mod_2, mod_3, mod_4, mod_5, mod_6, mod_7, mod_8, mod_9, mod_10, mod_11, mod_12]
# module_names = [flow_mod.name for flow_mod in modules]

In [None]:
# area = exp_df['Area']
# peak_area_dict = {
#     'co2': area.iloc[[1]].sum(),
#     'co': area.iloc[[5,6]].sum(),
#     'h2': area.iloc[[0]].sum(),
#     'ch4': area.iloc[[2,3]].sum(),
#     'c2h4': area.iloc[[4]].sum(),
#     # 'c2h6': area.iloc[[0,1]].sum()
# }
# peak_area_dict

In [None]:
# flowG = nx.DiGraph()
# flowG.add_nodes_from(module_names)
# for module in modules:
#     flowG.add_edges_from(module.connec_tuple)
# my_pos = nx.spring_layout(flowG, seed = 5) # this fixes the style of the graph, if it's ugly change the seed
# nx.draw(flowG, pos = my_pos, with_labels=True)