In [1]:
import pandas as pd
from functools import reduce

# Define the file paths for each DataFrame
file_paths = {
    'dfNitrate': '../data/nitrate.parquet',
    'dfAmmonium': '../data/ammonium.parquet',
    'dfOxygenA': '../data/oxygen_a.parquet',
    'dfOxygenB': '../data/oxygen_b.parquet',
    'dfPhosphate': '../data/phosphate.parquet',
    'dfWater': '../data/water.csv'
}

# Define column name mappings
column_mappings = {
    'dfNitrate': {'hstWaarde': 'nitrate'},
    'dfAmmonium': {'hstWaarde': 'ammonium'},
    'dfOxygenA': {'hstWaarde': 'oxygena'},
    'dfOxygenB': {'hstWaarde': 'oxygenb'},
    'dfPhosphate': {'hstWaarde': 'phosphate'},
    'dfWater': {'EDE_09902MTW_K100.MTW': 'throughput', 'DateTime': 'datumEindeMeting'}
}

# Load DataFrames
dfs = {}
for df_name, file_path in file_paths.items():
    if file_path.endswith(".parquet"):
        dfs[df_name] = pd.read_parquet(file_path)
    elif file_path.endswith(".csv"):
        dfs[df_name] = pd.read_csv(file_path, delimiter=';')

# Rename columns
for df_name, mapping in column_mappings.items():
    if df_name in dfs:
        dfs[df_name].rename(columns=mapping, inplace=True)

# Preprocess datetime columns
dfs['dfWater']['datumEindeMeting'] =  pd.to_datetime(dfs['dfWater']['datumEindeMeting'], dayfirst=True)

# Drop 'historianTagnummer' column if it exists
column_to_drop = 'historianTagnummer'
for df_name in dfs:
    if column_to_drop in dfs[df_name].columns:
        dfs[df_name].drop(columns=column_to_drop, inplace=True)

# Merge DataFrames
dfs_to_merge = [dfs[df_name] for df_name in ['dfNitrate', 'dfAmmonium', 'dfOxygenB', 'dfPhosphate']]
df_merged = reduce(lambda left, right: pd.merge(left, right, on=['datumBeginMeting', 'datumEindeMeting'], how='inner'), dfs_to_merge)

# Convert datetime columns
df_merged['datumBeginMeting'] = pd.to_datetime(df_merged['datumBeginMeting'])
df_merged['datumEindeMeting'] = pd.to_datetime(df_merged['datumEindeMeting'])

# Add water table
df_merged = df_merged.merge(dfs['dfWater'], how='inner', on='datumEindeMeting')    



In [2]:
df_merged

Unnamed: 0,nitrate,datumBeginMeting,datumEindeMeting,ammonium,oxygenb,phosphate,throughput,wwResolution
0,6.14,2021-01-01 00:00:00,2021-01-01 00:01:00,4.487,0.029,0.185,2754761089,60000
1,6.118,2021-01-01 00:01:00,2021-01-01 00:02:00,4.263,0.028,0.126,2609343947,60000
2,5.963,2021-01-01 00:02:00,2021-01-01 00:03:00,4.173,0.028,0.108,2535541065,60000
3,5.884,2021-01-01 00:03:00,2021-01-01 00:04:00,4.194,0.028,0.122,2780798536,60000
4,5.876,2021-01-01 00:04:00,2021-01-01 00:05:00,4.214,0.028,0.136,2192832206,60000
...,...,...,...,...,...,...,...,...
218853,6.882,2021-12-31 23:54:00,2021-12-31 23:55:00,2.247,0.552,1.925,9076167848,60000
218854,6.89,2021-12-31 23:55:00,2021-12-31 23:56:00,2.256,0.549,1.935,1763518071,60000
218855,6.904,2021-12-31 23:56:00,2021-12-31 23:57:00,2.264,0.559,1.944,1135575971,60000
218856,6.985,2021-12-31 23:57:00,2021-12-31 23:58:00,2.273,0.554,2.092,1879508758,60000
