In [1]:
import pandas as pd
import pwlf
import numpy as np 
import matplotlib.pyplot as plt

In [2]:
def outflow_curve_filter(outflow_df, pump_number, feature, bin_size, delay, pump_selection = False):
    # Prepare column names based on the selected pump number
    if pump_selection == True:
        if pump_number == 1:
            outflow_df[(outflow_df['pump1_speed'] > 0) & (outflow_df['pump3_speed'] == 0) & (outflow_df['pump4_speed'] == 0)]
        elif pump_number == 3:
            outflow_df[(outflow_df['pump1_speed'] == 0) & (outflow_df['pump3_speed'] >= 0) & (outflow_df['pump4_speed'] == 0)]
        elif pump_number == 4:
            outflow_df[(outflow_df['pump1_speed'] == 0) & (outflow_df['pump3_speed'] == 0) & (outflow_df['pump3_speed'] > 0)]
            
        pump_speed_col = f'pump{pump_number}_speed'
    
    outflow_df = outflow_df[outflow_df["time"] <= '2024-01-01'].reset_index()
    
    outflow_df.loc[outflow_df[pump_speed_col] <= 0, 'outflow'] = 0
    outflow_df.loc[outflow_df[pump_speed_col] < 0, pump_speed_col] = 0
    
    outflow_df = outflow_df[(outflow_df["outflow"] >= 0) & (outflow_df["outflow"] < 1000)]
    outflow_df["outflow"] = outflow_df["outflow"].shift(delay)
    
    max_speed = outflow_df[pump_speed_col].max()
    bins = range(0, int(max_speed) + bin_size, bin_size)
    
    # Bin the pump speeds
    outflow_df['speed_bin'] = pd.cut(outflow_df[pump_speed_col], bins, right=False)
    
    # Group by the new 'speed_bin' column and calculate mean values
    outflow_df = outflow_df.groupby('speed_bin')[[feature, pump_speed_col]].median().reset_index()
    
    # Calculate the midpoint of each speed bin
    outflow_df['speed_mid'] = outflow_df['speed_bin'].apply(lambda x: (x.left + x.right) / 2)
    
    return outflow_df.dropna()

In [3]:
outflow_df  = pd.read_parquet("../data/static_models/outflow_miso.par")

pump1_power_df = pd.read_parquet("../data/static_models/pump1_power_siso.par")
#pump4_power_df = pd.read_parquet("./data/static_models/pump4_power_siso.par")
#pump3_power_df = pd.read_parquet("./data/static_models/pump3_power_siso.par")

In [4]:
static_qout_p1 = outflow_curve_filter(outflow_df = outflow_df,
                                           pump_number = 3,
                                           feature = "outflow",
                                           bin_size = 5, 
                                           delay=2,
                                           pump_selection=True)


  outflow_df = outflow_df.groupby('speed_bin')[[feature, pump_speed_col]].median().reset_index()


In [5]:
def fit_piecewise_outflow(outflow_static_curve_df: pd.DataFrame, breakpoints: list, pump_number: int):
    
    pwlf_model = None
    pump_speed_col = f'pump{pump_number}_speed'
    pwlf_model = pwlf.PiecewiseLinFit(outflow_static_curve_df[pump_speed_col], outflow_static_curve_df["outflow"])
    pwlf_model.fit_with_breaks(breakpoints)
    yhat = pwlf_model.predict(outflow_static_curve_df[pump_speed_col])
    print(pwlf_model.beta)
    
    return outflow_static_curve_df[pump_speed_col],  outflow_static_curve_df["outflow"],  yhat 

In [6]:
x, y, yhat = fit_piecewise_outflow(outflow_static_curve_df= static_qout_p1, 
                      breakpoints=[0, 600, np.max(static_qout_p1["pump3_speed"])], 
                      pump_number=3)

[3.22161731 0.08378681 0.83715562]
