In [None]:
import pandas as pd
from functools import reduce

# Define the file paths for each DataFrame
file_paths = {
    'dfNitrate': '../data/nitrate.parquet',
    'dfAmmonium': '../data/ammonium.parquet',
    'dfOxygenA': '../data/oxygen_a.parquet',
    'dfOxygenB': '../data/oxygen_b.parquet',
    'dfPhosphate': '../data/phosphate.parquet',
    'dfWater': '../data/water.csv'
}

# Define column name mappings
column_mappings = {
    'dfNitrate': {'hstWaarde': 'nitrate'},
    'dfAmmonium': {'hstWaarde': 'ammonium'},
    'dfOxygenA': {'hstWaarde': 'oxygena'},
    'dfOxygenB': {'hstWaarde': 'oxygenb'},
    'dfPhosphate': {'hstWaarde': 'phosphate'},
    'dfWater': {'EDE_09902MTW_K100.MTW': 'throughput', 'DateTime': 'datumEindeMeting'}
}

# Load DataFrames
dfs = {}
for df_name, file_path in file_paths.items():
    if file_path.endswith(".parquet"):
        dfs[df_name] = pd.read_parquet(file_path)
    elif file_path.endswith(".csv"):
        dfs[df_name] = pd.read_csv(file_path, delimiter=';')

# Rename columns
for df_name, mapping in column_mappings.items():
    if df_name in dfs:
        dfs[df_name].rename(columns=mapping, inplace=True)

# Preprocess datetime columns
dfs['dfWater']['datumEindeMeting'] =  pd.to_datetime(dfs['dfWater']['datumEindeMeting'], dayfirst=True)

# Drop 'historianTagnummer' and 'wwResolution' columns
column_to_drop = 'historianTagnummer'
for df_name in dfs:
    if column_to_drop in dfs[df_name].columns:
        dfs[df_name].drop(columns=column_to_drop, inplace=True)

dfs['dfWater'].drop(columns= 'wwResolution', inplace=True)

# Merge DataFrames
dfs_to_merge = [dfs[df_name] for df_name in ['dfNitrate', 'dfAmmonium', 'dfOxygenB', 'dfPhosphate']]
df_merged = reduce(lambda left, right: pd.merge(left, right, on=['datumBeginMeting', 'datumEindeMeting'], how='inner'), dfs_to_merge)

# Convert datetime columns
df_merged['datumBeginMeting'] = pd.to_datetime(df_merged['datumBeginMeting'])
df_merged['datumEindeMeting'] = pd.to_datetime(df_merged['datumEindeMeting'])

# Add water table
df_merged = df_merged.merge(dfs['dfWater'], how='inner', on='datumEindeMeting')    



In [None]:
df_merged