In [1]:
"""
Minimal WaterTAP MVP:
- Reads a synthetic SCADA-style CSV file for a conventional WTP
- Builds a zero-order flowsheet with:
    Feed -> Pump -> Coag/Floc -> Sedimentation -> Media Filtration -> Chlorination
- Calibrates (trains) selected removal fractions on ONE training point in the CSV
- Simulates all rows in the CSV with the calibrated parameters
- Exports a result CSV and generates plots of flow & water quality at each unit

Expected input CSV: wtp_training_data.csv with columns:
    day
    Q_raw_m3_per_hr
    toc_raw_mg_per_L
    tss_raw_mg_per_L
    toc_treated_mg_per_L
    tss_treated_mg_per_L
"""

'\nMinimal WaterTAP MVP:\n- Reads a synthetic SCADA-style CSV file for a conventional WTP\n- Builds a zero-order flowsheet with:\n    Feed -> Pump -> Coag/Floc -> Sedimentation -> Media Filtration -> Chlorination\n- Calibrates (trains) selected removal fractions on ONE training point in the CSV\n- Simulates all rows in the CSV with the calibrated parameters\n- Exports a result CSV and generates plots of flow & water quality at each unit\n\nExpected input CSV: wtp_training_data.csv with columns:\n    day\n    Q_raw_m3_per_hr\n    toc_raw_mg_per_L\n    tss_raw_mg_per_L\n    toc_treated_mg_per_L\n    tss_treated_mg_per_L\n'

In [2]:
import pandas as pd
import matplotlib.pyplot as plt

from pyomo.environ import (
    ConcreteModel,
    TransformationFactory,
    value,
    units as pyunits,
)
from pyomo.network import Arc

from idaes.core import FlowsheetBlock
from idaes.core.util.initialization import propagate_state
from idaes.core.util import scaling as iscale

from watertap.core.wt_database import Database
from watertap.core.zero_order_properties import WaterParameterBlock


from watertap.unit_models.zero_order import (
    feed_zo,
    pump_zo,
    coag_and_floc_zo,
    sedimentation_zo,
    media_filtration_zo,
    chlorination_zo,
)

from watertap.core.solvers import get_solver



In [3]:
# ---------------------------------------------------------------------------
# 1. Data loading helper
# ---------------------------------------------------------------------------

import pandas as pd

def load_training_data(csv_path: str) -> pd.DataFrame:
    """
    Load the synthetic SCADA dataset and map it into the minimal set of
    columns expected by the WaterTAP training script.

    Expected final columns:
        - day
        - Q_raw_m3_per_hr
        - toc_raw_mg_per_L
        - tss_raw_mg_per_L
        - toc_treated_mg_per_L
        - tss_treated_mg_per_L
    """
    df = pd.read_csv(csv_path)

    # ------------------------------------------------------------------
    # 1) Time handling → create a simple day index
    # ------------------------------------------------------------------
    if "timestamp" in df.columns:
        ts = pd.to_datetime(df["timestamp"])
    else:
        # fabricate timestamps if not present (hourly)
        ts = pd.date_range("2025-01-01", periods=len(df), freq="H")
        df["timestamp"] = ts

    # Day index: 1,2,... (used only as a label in the MVP script)
    df["day"] = (ts.dt.normalize() - ts.dt.normalize().min()).dt.days + 1

    # ------------------------------------------------------------------
    # 2) Flow → m3/h from m3/s
    # ------------------------------------------------------------------
    if "Q_raw_m3_per_hr" not in df.columns:
        if "flow_m3_s" in df.columns:
            df["Q_raw_m3_per_hr"] = df["flow_m3_s"] * 3600.0
        else:
            raise ValueError(
                "Cannot derive 'Q_raw_m3_per_hr': 'flow_m3_s' not found in CSV."
            )

    # ------------------------------------------------------------------
    # 3) Raw TOC (mg/L)
    # ------------------------------------------------------------------
    if "toc_raw_mg_per_L" not in df.columns:
        if "TOC_raw_mgL" in df.columns:
            df["toc_raw_mg_per_L"] = df["TOC_raw_mgL"]
        else:
            # conservative fallback if you ever change the generator
            df["toc_raw_mg_per_L"] = 2.5

    # ------------------------------------------------------------------
    # 4) Raw TSS (mg/L)
    # ------------------------------------------------------------------
    if "tss_raw_mg_per_L" not in df.columns:
        if "TSS_raw_mgL" in df.columns:
            df["tss_raw_mg_per_L"] = df["TSS_raw_mgL"]
        elif "initial_turbidity_ntu" in df.columns:
            # crude 1:1 NTU ≈ mg/L if TSS column missing
            df["tss_raw_mg_per_L"] = df["initial_turbidity_ntu"]
        else:
            df["tss_raw_mg_per_L"] = 10.0  # generic fallback

    # ------------------------------------------------------------------
    # 5) Treated TOC (mg/L)
    #    Here we treat NVTOC as the finished-water TOC proxy.
    # ------------------------------------------------------------------
    if "toc_treated_mg_per_L" not in df.columns:
        if "NVTOC_raw_mgL" in df.columns:
            df["toc_treated_mg_per_L"] = df["NVTOC_raw_mgL"]
        else:
            # assume ~40% removal as a default if only raw TOC is available
            df["toc_treated_mg_per_L"] = df["toc_raw_mg_per_L"] * 0.6

    # ------------------------------------------------------------------
    # 6) Treated TSS (mg/L)
    #    Use filtered turbidity as a proxy; very common for WTPs.
    # ------------------------------------------------------------------
    if "tss_treated_mg_per_L" not in df.columns:
        if "filtered_turbidity_NTU" in df.columns:
            df["tss_treated_mg_per_L"] = df["filtered_turbidity_NTU"]
        elif "final_turbidity_ntu" in df.columns:
            df["tss_treated_mg_per_L"] = df["final_turbidity_ntu"]
        elif "TSS_settled_mgL" in df.columns:
            df["tss_treated_mg_per_L"] = df["TSS_settled_mgL"]
        else:
            df["tss_treated_mg_per_L"] = df["tss_raw_mg_per_L"] * 0.02

    # ------------------------------------------------------------------
    # 7) Sanity check: make sure everything the script needs is present
    # ------------------------------------------------------------------
    required_cols = [
        "day",
        "Q_raw_m3_per_hr",
        "toc_raw_mg_per_L",
        "tss_raw_mg_per_L",
        "toc_treated_mg_per_L",
        "tss_treated_mg_per_L",
    ]
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        raise ValueError(
            f"Missing required columns in CSV even after mapping: {missing}"
        )

    return df


In [4]:
# ---------------------------------------------------------------------------
# 2. Build zero-order WTP flowsheet (single operating point)
# ---------------------------------------------------------------------------

def build_wtp_flowsheet(db: Database) -> ConcreteModel:
    """
    Build a steady-state zero-order conventional WTP flowsheet:

    feed -> pump -> coag/floc -> sedimentation -> media filtration -> chlorination

    Uses a WaterParameterBlock with H2O + solutes ["toc", "tss"].
    """
    m = ConcreteModel()
    m.fs = FlowsheetBlock(dynamic=False)

    # Simple water + solute property package
    # (WaterParameterBlock is the standard for zero-order units in WaterTAP) 
    m.fs.properties = WaterParameterBlock(
        solute_list=["toc", "tss"]
    )

    # Unit models (all zero-order) 
    m.fs.feed = feed_zo.FeedZO(
        property_package=m.fs.properties,
        database=db,
    )

    m.fs.pump = pump_zo.PumpZO(
        property_package=m.fs.properties,
        database=db,
        process_subtype="default",
    )

    m.fs.coag = coag_and_floc_zo.CoagulationFlocculationZO(
        property_package=m.fs.properties,
        database=db,
        process_subtype="metals_removal",  # or other subtype present in DB
    )

    m.fs.sed = sedimentation_zo.SedimentationZO(
        property_package=m.fs.properties,
        database=db,
        process_subtype="conventional",
    )

    m.fs.filter = media_filtration_zo.MediaFiltrationZO(
        property_package=m.fs.properties,
        database=db,
        process_subtype="gravity_sand",
    )

    m.fs.disinfection = chlorination_zo.ChlorinationZO(
        property_package=m.fs.properties,
        database=db,
        process_subtype="naocl",  # or other DB subtype
    )

    # Connect units via Arcs 
    m.fs.s01 = Arc(source=m.fs.feed.outlet, destination=m.fs.pump.inlet)
    m.fs.s02 = Arc(source=m.fs.pump.outlet, destination=m.fs.coag.inlet)
    m.fs.s03 = Arc(source=m.fs.coag.outlet, destination=m.fs.sed.inlet)
    m.fs.s04 = Arc(source=m.fs.sed.outlet, destination=m.fs.filter.inlet)
    m.fs.s05 = Arc(source=m.fs.filter.outlet, destination=m.fs.disinfection.inlet)

    # Expand network arcs into equality constraints
    TransformationFactory("network.expand_arcs").apply_to(m)

    # Load default zero-order parameters from database
    for u in [
        m.fs.feed,
        m.fs.pump,
        m.fs.coag,
        m.fs.sed,
        m.fs.filter,
        m.fs.disinfection,
    ]:
        u.load_parameters_from_database()

    # Very basic scaling: scale volumetric flows and concentrations
    for comp in ["toc", "tss"]:
        iscale.set_scaling_factor(
            m.fs.feed.inlet.conc_mass_comp[0, comp], 1e3
        )
    iscale.set_scaling_factor(
        m.fs.feed.inlet.flow_vol[0], 1
    )

    iscale.calculate_scaling_factors(m)

    return m

In [5]:
# ---------------------------------------------------------------------------
# 3. Fix inputs from one row and create calibration (training) objective
# ---------------------------------------------------------------------------

def fix_operating_conditions_from_row(m: ConcreteModel, row: pd.Series):
    """
    Fix feed conditions based on a single row of the training DataFrame.
    """

    t = 0  # steady-state time index

    # Convert Q from m3/h to m3/s
    Q_raw_m3_per_hr = float(row["Q_raw_m3_per_hr"])
    Q_raw_m3_per_s = Q_raw_m3_per_hr / 3600.0

    toc_raw_mg_L = float(row["toc_raw_mg_per_L"])
    tss_raw_mg_L = float(row["tss_raw_mg_per_L"])

    # mg/L -> kg/m^3
    toc_raw_kg_m3 = toc_raw_mg_L * 1e-3
    tss_raw_kg_m3 = tss_raw_mg_L * 1e-3

    # Feed conditions applied at fs.feed.inlet
    m.fs.feed.inlet.flow_vol[t].fix(Q_raw_m3_per_s * pyunits.m**3/pyunits.s)
    m.fs.feed.inlet.temperature[t].fix(298 * pyunits.K)
    m.fs.feed.inlet.pressure[t].fix(101325 * pyunits.Pa)
    m.fs.feed.inlet.conc_mass_comp[t, "toc"].fix(
        toc_raw_kg_m3 * pyunits.kg/pyunits.m**3
    )
    m.fs.feed.inlet.conc_mass_comp[t, "tss"].fix(
        tss_raw_kg_m3 * pyunits.kg/pyunits.m**3
    )


def make_parameters_trainable(m: ConcreteModel):
    """
    Free (unfix) selected zero-order removal fractions so they can be calibrated.

    We free removal fractions for TOC and TSS on the coag, sedimentation,
    filter, and disinfection units and their water recovery.
    """
    t = 0
    train_units = [
        m.fs.coag,
        m.fs.sed,
        m.fs.filter,
        m.fs.disinfection,
    ]

    for u in train_units:
        # Recovery of water
        if hasattr(u, "recovery_frac_mass_H2O"):
            u.recovery_frac_mass_H2O[t].unfix()
            u.recovery_frac_mass_H2O[t].setlb(0.9)
            u.recovery_frac_mass_H2O[t].setub(1.0)

        # Solute removal fractions (if present)
        if hasattr(u, "removal_frac_mass_comp"):
            for comp in ["toc", "tss"]:
                if (t, comp) in u.removal_frac_mass_comp:
                    u.removal_frac_mass_comp[t, comp].unfix()
                    u.removal_frac_mass_comp[t, comp].setlb(0.0)
                    u.removal_frac_mass_comp[t, comp].setub(1.0)


def add_calibration_objective(m: ConcreteModel, row: pd.Series):
    """
    Add a least-squares objective to fit model effluent (after disinfection)
    to the measured treated water concentrations in the training row.
    """

    t = 0

    toc_meas_mg_L = float(row["toc_treated_mg_per_L"])
    tss_meas_mg_L = float(row["tss_treated_mg_per_L"])

    toc_meas_kg_m3 = toc_meas_mg_L * 1e-3
    tss_meas_kg_m3 = tss_meas_mg_L * 1e-3

    toc_meas = toc_meas_kg_m3 * pyunits.kg/pyunits.m**3
    tss_meas = tss_meas_kg_m3 * pyunits.kg/pyunits.m**3

    # Model predictions at plant outlet (after disinfection)
    toc_model = m.fs.disinfection.outlet.conc_mass_comp[t, "toc"]
    tss_model = m.fs.disinfection.outlet.conc_mass_comp[t, "tss"]

    m.toc_resid = Expression(expr=toc_model - toc_meas)
    m.tss_resid = Expression(expr=tss_model - tss_meas)

    m.obj = Objective(expr=m.toc_resid**2 + m.tss_resid**2)

In [6]:
# ---------------------------------------------------------------------------
# 4. Solve helper (training then simulation)
# ---------------------------------------------------------------------------

def solve_model(m: ConcreteModel, tee: bool = False):
    """
    Use the default WaterTAP Ipopt solver. 
    """
    solver = get_solver()
    results = solver.solve(m, tee=tee)
    return results

In [7]:
# ---------------------------------------------------------------------------
# 5. Main: train on one row, then simulate all rows & export results
# ---------------------------------------------------------------------------

def run_training_and_simulation(
    csv_path: str = "wtp_training_data.csv",
    results_csv_path: str = "wtp_simulation_results.csv",
):

    db = Database()
    df = load_training_data(csv_path)

    # --- TRAINING: use the first row as a calibration point for MVP ---
    train_row = df.iloc[0]

    m_train = build_wtp_flowsheet(db)
    fix_operating_conditions_from_row(m_train, train_row)
    make_parameters_trainable(m_train)
    add_calibration_objective(m_train, train_row)

    if degrees_of_freedom(m_train) != 0:
        raise RuntimeError(
            f"Training model DOF != 0 (is {degrees_of_freedom(m_train)}), "
            "check degrees of freedom."
        )

    print("Solving calibration (training) problem on row 0...")
    solve_model(m_train, tee=True)

    # Save calibrated parameters for reuse
    calibrated_params = {}

    t = 0
    for unit_name in ["coag", "sed", "filter", "disinfection"]:
        u = getattr(m_train.fs, unit_name)
        unit_params = {}

        if hasattr(u, "recovery_frac_mass_H2O"):
            unit_params["recovery_frac_mass_H2O"] = value(
                u.recovery_frac_mass_H2O[t]
            )

        if hasattr(u, "removal_frac_mass_comp"):
            for comp in ["toc", "tss"]:
                if (t, comp) in u.removal_frac_mass_comp:
                    unit_params[f"removal_{comp}"] = value(
                        u.removal_frac_mass_comp[t, comp]
                    )

        calibrated_params[unit_name] = unit_params

    print("Calibrated parameters:")
    for k, v in calibrated_params.items():
        print(k, v)

    # --- SIMULATION: run flowsheet for ALL rows using calibrated parameters ---

    sim_results = []

    for idx, row in df.iterrows():
        m = build_wtp_flowsheet(db)
        fix_operating_conditions_from_row(m, row)

        # Apply calibrated parameters as fixed values
        t = 0
        for unit_name, unit_params in calibrated_params.items():
            u = getattr(m.fs, unit_name)

            if "recovery_frac_mass_H2O" in unit_params and hasattr(
                u, "recovery_frac_mass_H2O"
            ):
                u.recovery_frac_mass_H2O[t].fix(
                    unit_params["recovery_frac_mass_H2O"]
                )

            if hasattr(u, "removal_frac_mass_comp"):
                for comp in ["toc", "tss"]:
                    key = f"removal_{comp}"
                    if key in unit_params and (t, comp) in u.removal_frac_mass_comp:
                        u.removal_frac_mass_comp[t, comp].fix(
                            unit_params[key]
                        )

        # Solve (forward simulation)
        print(f"Solving simulation for row {idx} (day {row['day']})...")
        solve_model(m, tee=False)

        # Extract inlet & outlet flows & concs for each unit
        t = 0
        record = {
            "day": row["day"],
            "Q_raw_m3_per_hr": row["Q_raw_m3_per_hr"],
            "toc_raw_mg_per_L": row["toc_raw_mg_per_L"],
            "tss_raw_mg_per_L": row["tss_raw_mg_per_L"],
            "toc_treated_measured_mg_per_L": row["toc_treated_mg_per_L"],
            "tss_treated_measured_mg_per_L": row["tss_treated_mg_per_L"],
        }

        # Helper to convert flow+conc to m3/h and mg/L from model
        def _flow_m3h(port):
            return value(port.flow_vol[t]) * 3600.0

        def _conc_mgL(port, comp):
            # kg/m3 -> mg/L
            return value(port.conc_mass_comp[t, comp]) * 1e3

        units = {
            "feed": m.fs.feed,
            "pump": m.fs.pump,
            "coag": m.fs.coag,
            "sed": m.fs.sed,
            "filter": m.fs.filter,
            "disinfection": m.fs.disinfection,
        }

        for uname, u in units.items():
            # Each zero-order unit has inlet/outlet ports with flow_vol & conc_mass_comp
            pin = u.inlet
            pout = u.outlet

            record[f"{uname}_Q_in_m3_per_hr"] = _flow_m3h(pin)
            record[f"{uname}_Q_out_m3_per_hr"] = _flow_m3h(pout)

            for comp in ["toc", "tss"]:
                record[f"{uname}_conc_in_{comp}_mg_per_L"] = _conc_mgL(pin, comp)
                record[f"{uname}_conc_out_{comp}_mg_per_L"] = _conc_mgL(pout, comp)

        sim_results.append(record)

    df_results = pd.DataFrame(sim_results)
    df_results.to_csv(results_csv_path, index=False)
    print(f"Simulation results saved to {results_csv_path}")

    # Create visualisations at the end
    plot_results(df_results)

In [8]:
# ---------------------------------------------------------------------------
# 6. Visualisation: flows & water quality at each unit inlet/outlet
# ---------------------------------------------------------------------------

def plot_results(df_results: pd.DataFrame):
    """
    Generate simple visualisations of:
        - Flow (m3/h) at inlet/outlet of each unit
        - TOC and TSS (mg/L) at inlet/outlet of each unit
    """
    days = df_results["day"]

    units = ["feed", "pump", "coag", "sed", "filter", "disinfection"]

    # ---- Plot flows ----
    plt.figure(figsize=(10, 6))
    for uname in units:
        plt.plot(
            days,
            df_results[f"{uname}_Q_in_m3_per_hr"],
            linestyle="--",
            label=f"{uname} Q_in",
        )
        plt.plot(
            days,
            df_results[f"{uname}_Q_out_m3_per_hr"],
            linestyle="-",
            label=f"{uname} Q_out",
        )

    plt.xlabel("Day")
    plt.ylabel("Flow [m³/h]")
    plt.title("Unit Inlet/Outlet Flows")
    plt.legend()
    plt.tight_layout()
    plt.show()

    # ---- Plot TOC ----
    plt.figure(figsize=(10, 6))
    for uname in units:
        plt.plot(
            days,
            df_results[f"{uname}_conc_in_toc_mg_per_L"],
            linestyle="--",
            label=f"{uname} TOC_in",
        )
        plt.plot(
            days,
            df_results[f"{uname}_conc_out_toc_mg_per_L"],
            linestyle="-",
            label=f"{uname} TOC_out",
        )

    plt.xlabel("Day")
    plt.ylabel("TOC [mg/L]")
    plt.title("Unit Inlet/Outlet TOC")
    plt.legend()
    plt.tight_layout()
    plt.show()

    # ---- Plot TSS ----
    plt.figure(figsize=(10, 6))
    for uname in units:
        plt.plot(
            days,
            df_results[f"{uname}_conc_in_tss_mg_per_L"],
            linestyle="--",
            label=f"{uname} TSS_in",
        )
        plt.plot(
            days,
            df_results[f"{uname}_conc_out_tss_mg_per_L"],
            linestyle="-",
            label=f"{uname} TSS_out",
        )

    plt.xlabel("Day")
    plt.ylabel("TSS [mg/L]")
    plt.title("Unit Inlet/Outlet TSS")
    plt.legend()
    plt.tight_layout()
    plt.show()

In [9]:
# ---------------------------------------------------------------------------
# 7. Entry point
# ---------------------------------------------------------------------------

if __name__ == "__main__":
    run_training_and_simulation(
        csv_path="wtp_watertap_training_dataset.csv",
        results_csv_path="wtp_simulation_results.csv",
    )

2025-11-27 23:41:32 [ERROR] idaes.core.base.process_block: Failure in build: fs.feed
Traceback (most recent call last):
  File "C:\Users\abel_\anaconda3\envs\WaterTAP_Env\Lib\site-packages\idaes\core\base\process_block.py", line 41, in _rule_default
    b.build()
  File "C:\Users\abel_\anaconda3\envs\WaterTAP_Env\Lib\site-packages\watertap\unit_models\zero_order\feed_zo.py", line 49, in build
    super().build()
  File "C:\Users\abel_\anaconda3\envs\WaterTAP_Env\Lib\site-packages\idaes\models\unit_models\feed.py", line 130, in build
    super(FeedData, self).build()
  File "C:\Users\abel_\anaconda3\envs\WaterTAP_Env\Lib\site-packages\idaes\core\base\unit_model.py", line 110, in build
    super(UnitModelBlockData, self).build()
  File "C:\Users\abel_\anaconda3\envs\WaterTAP_Env\Lib\site-packages\idaes\core\base\process_base.py", line 130, in build
    self._get_config_args()
  File "C:\Users\abel_\anaconda3\envs\WaterTAP_Env\Lib\site-packages\idaes\core\base\process_base.py", line 251, 

ValueError: key 'database' not defined for ConfigDict '' and implicit (undefined) keys are not allowed