In [None]:
import numpy as np
import cvxpy as cp
import pandas as pd

df = pd.read_csv("output_site_null_ewh.csv")

# Constants
cp_val = 4181.3
rho = 1000
dt = 8 #what should this be? euler steps per timestep?
T_a = 22
V = 250 #(L)

# Data
T_u = df["T_WH3"].values
T_m = df["T_WH7"].values
T_l = df["T_WH10"].values

W_list = []
z_list = []

for j in range(len(T_u) - 1):  # because of j+1
    Wj = np.zeros((3, 7))  # Each Wj is 3x7

    # Row 1: Upper node (δtpu)
    Wj[0, 2] = dt * (T_u[j] - T_a)       # U_u
    Wj[0, 4] = dt * (T_u[j] - T_m[j])    # K_um
    Wj[0, 6] = rho * cp_val * (T_u[j+1] - T_u[j])  # V_u

    # Row 2: Middle node (δtpm)
    Wj[1, 1] = dt * (T_m[j] - T_a)       # U_m
    Wj[1, 3] = dt * (T_m[j] - T_l[j])    # K_ml
    Wj[1, 4] = dt * (T_m[j] - T_u[j])    # K_um
    Wj[1, 5] = rho * cp_val * (T_m[j+1] - T_m[j])  # V_m

    # Row 3: Lower node
    Wj[2, 0] = dt * (T_l[j] - T_a)       # U_l
    Wj[2, 3] = dt * (T_l[j] - T_m[j])    # K_ml
    Wj[2, 5] = -rho * cp_val * (T_l[j+1] - T_l[j])  # -V_m
    Wj[2, 6] = -rho * cp_val * (T_l[j+1] - T_l[j])  # -V_u

    # not sure what this should be
    z_j = np.array([
        # δtpu upper node
        rho * cp_val * (T_u[j+1] - T_u[j]), #Power node 3 * time interval (60s?)

        # δtpm middle node
        rho * cp_val * (T_m[j+1] - T_m[j]),

        # lower node energy change
        V * rho * cp_val * (T_l[j] - T_l[j+1]) #node 16 cold water input
    ])

    W_list.append(Wj)
    z_list.append(z_j)

# Stack based on time dimension
W = np.vstack(W_list)  # (3N x 7)
z_full = np.hstack(z_list)  # (3N, )

# CVXPY OLS problem
theta = cp.Variable(7) #Ul, Um, Uu, Kml, Kum, Vm, Vu] U - Tank Insulation, K - thermal conductivity, V - node volume
cost = cp.sum_squares(W @ theta - z_full) #z_j = W_j @ theta
problem = cp.Problem(cp.Minimize(cost))
problem.solve()

# Output estimated parameters
param_names = ["Ul", "Um", "Uu", "Kml", "Kum", "Vm", "Vu"]
print("\nThe optimal value is", problem.value)
for name, val in zip(param_names, theta.value):
    print(f"{name} = {val:.4f}")

print("The norm of the residual is ", cp.norm(W @ theta - z_full, p=2).value)



The optimal value is 4.072623873503658e-11
Ul = -0.0000
Um = -0.0000
Uu = -0.0000
Kml = 0.0000
Kum = 0.0000
Vm = 1.0000
Vu = 1.0000
The norm of the residual is  6.381711270109027e-06


In [17]:
W_list[0]

array([[ 0.00000000e+00,  0.00000000e+00,  2.64034763e+01,
         0.00000000e+00,  1.30645071e-06,  0.00000000e+00,
        -1.83262222e+05],
       [ 0.00000000e+00,  2.64034750e+01,  0.00000000e+00,
         2.84034750e+01, -1.30645071e-06, -1.83304418e+05,
         0.00000000e+00],
       [-2.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        -2.84034750e+01,  0.00000000e+00, -0.00000000e+00,
        -0.00000000e+00]])

In [24]:
#Rearrange energy balance formulas to compute T[j + 1]

def predict_temperatures(T_init, timesteps, theta, dt=1, cp=4181.3, rho=1000, T_a=22, V =250):
    Ul, Um, Uu, Kml, Kum, Vm, Vu = theta
    Vl = V - Vm  - Vu
    Tu_pred, Tm_pred, Tl_pred = [], [], []

    # Starting Temperatures [upper, middle, lower] nodes
    Tu_pred.append(T_init[0])
    Tm_pred.append(T_init[1])
    Tl_pred.append(T_init[2])

    for _ in range(timesteps):
        Tu = Tu_pred[-1] #get last predictions
        Tm = Tm_pred[-1]
        Tl = Tl_pred[-1]

        #rearrange energy balance equations
        # Upper node
        Tu_next = Tu + (1 / (rho * cp * Vu)) * (
            Uu * dt * (T_a - Tu) +
            Kum * dt * (Tm - Tu)
        )

        # Middle node
        Tm_next = Tm + (1 / (rho * cp * Vm)) * (
            Um * dt * (T_a - Tm) +
            Kml * dt * (Tl - Tm) +
            Kum * dt * (Tu - Tm)
        )

        # Lower node
        Tl_next = Tl - (1 / (rho * cp * (V + Vm + Vu)))  *(
            Ul * dt * (Tl - T_a) +
            Kml * dt * (Tl - Tm)
        )

        Tu_pred.append(Tu_next)
        Tm_pred.append(Tm_next)
        Tl_pred.append(Tl_next)

    return np.array(Tu_pred), np.array(Tm_pred), np.array(Tl_pred)

# Validation

In [None]:
df

In [None]:
predict_temperatures([48.4034762611966,48.403474954745896,20], 15, theta.value)


if type == 1: # HPWH
    val_data = pd.read_csv("output_site_90159.csv")

else: #EWH
    val_data = pd.read_csv("output_site_90159_ewh.csv") 

#val_data = pd.read_csv("output_site_90023_ewh.csv")
#val_data = pd.read_csv("output_site_10441.csv")
#val_data = pd.read_csv("output_site_null.csv")
X_val, y_val = csv_to_features(val_data, type)

#filter out temperatures below 40
X_val_filtered = []
y_val_filtered = []
for x, y in zip(X_val, y_val):
    if all(val >= 40 for val in y):
        X_val_filtered.append(x)
        y_val_filtered.append(y)
X_val = X_val_filtered
y_val = y_val_filtered

from sklearn.linear_model import LinearRegression

#error metrics
from sklearn.metrics import r2_score
y_predict_val = model.predict(X_val)

r_2_val = r2_score(y_val, y_predict_val)
print(f"test coefficient of determination: {r_2_val}")

from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_val, y_predict_val, multioutput='uniform_average')
print("Mean Squared Error:", mse)


(array([48.40347626, 48.40347626, 48.40347626, 48.40347626, 48.40347626,
        48.40347626, 48.40347626, 48.40347626, 48.40347626, 48.40347626,
        48.40347626, 48.40347626, 48.40347626, 48.40347626, 48.40347626,
        48.40347626]),
 array([48.40347495, 48.40347495, 48.40347495, 48.40347495, 48.40347495,
        48.40347495, 48.40347495, 48.40347495, 48.40347495, 48.40347495,
        48.40347495, 48.40347495, 48.40347495, 48.40347495, 48.40347495,
        48.40347495]),
 array([20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.,
        20., 20., 20.]))

Experiments
1) Perfect foresight of future hourly average water use patterns (accurate in daily shape but not granular)
    36, 54, and 72 gal/day draw profilesout
    Simulation based test 28.8 to 72 gal/day

2) CasADi python optimization package, IPOPT solver

3) Use cvxpy for OLS, etc.