In [None]:
import numpy as np
import cvxpy as cp
import pandas as pd

type = 1  # Change this to 1 for HPWH
if type == 1:
    df = pd.read_csv("output_site_null_60_12.csv")

else:
    df = pd.read_csv("output_site_null_ewh.csv")

# Constants
cp_val = 4181.3
rho = 1000
dt = 8 #what should this be? euler steps per timestep?
T_a = 22
V = 250 #(L)

# Data
P_u = [value if mode == "Upper On" else 0 for value, mode in zip(df["Average Electric Power"], df["Water Heating Mode"])]
P_m = [value if mode == "Lower On" else 0 for value, mode in zip(df["Average Electric Power"], df["Water Heating Mode"])]
T_u = df["T_WH3"].values
T_m = df["T_WH10"].values
T_l = df["T_WH12"].values

W_list = []
z_list = []

for j in range(len(T_u) - 1):  # because of j+1
    Wj = np.zeros((3, 7))  # Each Wj is 3x7

    # Row 1: Upper node (δtpu)
    Wj[0, 2] = dt * (T_u[j] - T_a)       # U_u
    Wj[0, 4] = dt * (T_u[j] - T_m[j])    # K_um
    Wj[0, 6] = rho * cp_val * (T_u[j+1] - T_u[j])  # V_u

    # Row 2: Middle node (δtpm)
    Wj[1, 1] = dt * (T_m[j] - T_a)       # U_m
    Wj[1, 3] = dt * (T_m[j] - T_l[j])    # K_ml
    Wj[1, 4] = dt * (T_m[j] - T_u[j])    # K_um
    Wj[1, 5] = rho * cp_val * (T_m[j+1] - T_m[j])  # V_m

    # Row 3: Lower node
    Wj[2, 0] = dt * (T_l[j] - T_a)       # U_l
    Wj[2, 3] = dt * (T_l[j] - T_m[j])    # K_ml
    Wj[2, 5] = -rho * cp_val * (T_l[j+1] - T_l[j])  # -V_m
    Wj[2, 6] = -rho * cp_val * (T_l[j+1] - T_l[j])  # -V_u

    # not sure what this should be
    z_j = np.array([
        # δtpu upper node
        dt * (P_u[j]), #Power node 3 * time interval (60s?)

        # δtpm middle node
        dt * (P_m[j]),

        # lower node energy change
        V * rho * cp_val * (T_l[j] - T_l[j+1]) #node 16 cold water input
    ])

    W_list.append(Wj)
    z_list.append(z_j)

# Stack based on time dimension
W = np.vstack(W_list)  # (3N x 7)
z_full = np.hstack(z_list)  # (3N, )

# CVXPY OLS problem
theta = cp.Variable(7) #Ul, Um, Uu, Kml, Kum, Vm, Vu] U - Tank Insulation, K - thermal conductivity, V - node volume
cost = cp.sum_squares(W @ theta - z_full) #z_j = W_j @ theta
problem = cp.Problem(cp.Minimize(cost))
problem.solve()

# Output estimated parameters
param_names = ["Ul", "Um", "Uu", "Kml", "Kum", "Vm", "Vu"]
print("\nThe optimal value is", problem.value)
for name, val in zip(param_names, theta.value):
    print(f"{name} = {val:.4f}")

print("The norm of the residual is ", cp.norm(W @ theta - z_full, p=2).value)



The optimal value is 2.171681078458453e+19
Ul = 236869.9054
Um = -152650.9392
Uu = -5704.3321
Kml = 917171.3392
Kum = 927299.7436
Vm = 40.3624
Vu = 50.1466
The norm of the residual is  4660129910.698255


In [98]:
T_u_positive = [value for value in T_u if value > 0]
T_u_positive

[]

In [100]:
#Rearrange energy balance formulas to compute T[j + 1]

def predict_temperatures(T_init, timesteps, theta, draws, dt=1, cp=4181.3, rho=1000, T_a=22, V =250):
    Ul, Um, Uu, Kml, Kum, Vm, Vu = theta
    Vl = V - Vm  - Vu
    Tu_pred, Tm_pred, Tl_pred = [], [], []

    # Starting Temperatures [upper, middle, lower] nodes
    Tu_pred.append(T_init[0])
    Tm_pred.append(T_init[1])
    Tl_pred.append(T_init[2])

    for t in range(timesteps - 1):
        Tu = Tu_pred[-1] #get last predictions
        Tm = Tm_pred[-1]
        Tl = Tl_pred[-1]
        vt = draws[t] #draws

        #rearrange energy balance equations for Euler Update
        # Upper node
        Tu_next = Tu + (1 / (rho * cp * Vu)) * (
            Uu * dt * vt * (T_a - Tu) +
            Kum * dt * vt * (Tm - Tu)
        )

        # Middle node
        Tm_next = Tm + (1 / (rho * cp * Vm)) * (
            Um * dt * vt *  (T_a - Tm) +
            Kml * dt * vt * (Tl - Tm) +
            Kum * dt * vt * (Tu - Tm)
        )

        # Lower node
        Tl_next = Tl - (1 / (rho * cp * (V + Vm + Vu)))  *(
            Ul * dt * vt *  (Tl - T_a) +
            Kml * dt * vt * (Tl - Tm)
        )

        Tu_pred.append(Tu_next)
        Tm_pred.append(Tm_next)
        Tl_pred.append(Tl_next)

    return np.array(Tu_pred), np.array(Tm_pred), np.array(Tl_pred)

# Validation

In [101]:
import pandas as pd
from sklearn.metrics import mean_squared_error

# Choose data based on type

if type == 1:  # HPWH
    val_data = pd.read_csv("output_site_90023.csv")
else:  # EWH
    val_data = pd.read_csv("output_site_90023_ewh.csv")

# Parameters
window_size = 8
predictions = []
true_values = []

val_data = val_data[val_data['Hot Water Outlet Temperature (C)'] >= 40] #filter for above 40 temperatures

# Sanity check, iterate through idle
for index in range(len(val_data) - window_size):
    t_u = val_data['T_WH3'].iloc[index]
    t_m = val_data['T_WH10'].iloc[index]
    t_l = val_data['T_WH12'].iloc[index]
    draws = val_data['Draw Data'].iloc[index: index + window_size].reset_index(drop=True)
    
    y_pred = predict_temperatures([t_u, t_m, t_l], window_size, theta.value, draws)
    y_true = [
        val_data['T_WH3'].iloc[index  : index  + window_size].values,
        val_data['T_WH10'].iloc[index : index  + window_size].values,
        val_data['T_WH12'].iloc[index : index + window_size].values
    ]
    
    predictions.append(y_pred)
    true_values.append(y_true)
    index += 1

# Flatten lists for metrics, depending on model output format
# Example: compute MSE for each level
import numpy as np

y_pred_array = np.array(predictions)  # shape: (n_sam09ples, 3, window_size) or similar
y_true_array = np.array(true_values)

mse_upper = mean_squared_error(y_true_array[:, 0, :].flatten(), y_pred_array[:, 0, :].flatten())
mse_middle = mean_squared_error(y_true_array[:, 1, :].flatten(), y_pred_array[:, 1, :].flatten())
mse_lower = mean_squared_error(y_true_array[:, 2, :].flatten(), y_pred_array[:, 2, :].flatten())

print(f"MSE Upper: {mse_upper:.3f}")
print(f"MSE Middle: {mse_middle:.3f}")
print(f"MSE Lower: {mse_lower:.3f}")


MSE Upper: 1.303
MSE Middle: 7.826
MSE Lower: 49.976


In [87]:
predictions[0]


(array([48.40347495, 48.40347495, 48.40347495, 48.40347495, 48.40347495,
        48.40347495, 48.40347495, 48.40347495]),
 array([48.40347626, 48.40347626, 48.40347626, 48.40347626, 48.40347626,
        48.40347626, 48.40347626, 48.40347626]),
 array([48.40347495, 48.40347495, 48.40347495, 48.40347495, 48.40347495,
        48.40347495, 48.40347495, 48.40347495]))

In [102]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display
import matplotlib.dates as mdates

#Predictions for 1 timestep

# Constants
intervals = 96  # 1440 minutes / 15 min interval = 96 per day
year = "2013"
window = 8


# Create datetime labels for x-axis
start_time = datetime(2013, 1, 1, 0, 0)
end_time = datetime(2013, 1, 1, 23, 59)
datetime_list = pd.date_range(start=start_time, end=end_time, freq="15min")
time_labels = [dt.strftime("%H:%M") for dt in datetime_list]

import copy
# Interactive plot function
def plot_predictions_by_hour(day, hour):
    plt.figure(figsize=(10, 5))
    
    start_idx = intervals * day
    
    x_day = (val_data[start_idx : start_idx + intervals])
    predictions_day = predictions[start_idx : start_idx + intervals]
    actual_day = true_values[start_idx : start_idx + intervals]
    draw_data_day = x_day['Draw Data']
    power_day = x_day['Average Electric Power']


    time_idx = hour * 4  # 4 intervals per hour

    pred_times = time_labels[time_idx : time_idx + 8]
    y_pred = predictions_day[time_idx] #returns predictions for top, middle, and bottom nodes
    y_true = actual_day[time_idx] 


    draw_slice = draw_data_day[time_idx : time_idx + 8]
    power_slice = power_day[time_idx : time_idx + 8] * 100

    # Convert day number to readable date
    day_num = str((day + 17) % 365).rjust(3, '0')
    date_label = datetime.strptime(year + "-" + day_num, "%Y-%j").strftime("%m-%d-%Y")

    # Primary axis for temperatures
    fig, ax1 = plt.subplots(figsize=(10, 5))

    ax1.plot(pred_times, y_true[0], label="Actual Outlet Temp", color="black", marker='o')
    ax1.plot(pred_times, y_pred[0], label="Predicted Outlet Temp", color="orange", marker='x', linestyle="--")
    ax1.set_ylabel("Temperature (°C)")
    ax1.set_title(f"{date_label} @ {time_labels[time_idx]} — Predictions for t+0 to t+7")
    ax1.set_xticks(pred_times)
    ax1.set_xticklabels(pred_times, rotation=45)
    ax1.grid(True)

    # Secondary axis for water draw
        # Secondary axis for water draw
    ax2 = ax1.twinx()
    ax2.plot(pred_times, draw_slice, label="Water Drawn (L)", color="blue", linestyle="dotted", marker='s')
    ax2.plot(pred_times, power_slice, label="Average Power (kWH)", color = "green")
    ax2.set_ylabel("Water Drawn (L)")
    ax2.set_ylim(0, 200)  # Ensure 0 is at the bottom


    # Combined legend
    lines_1, labels_1 = ax1.get_legend_handles_labels()
    lines_2, labels_2 = ax2.get_legend_handles_labels()
    ax1.legend(lines_1 + lines_2, labels_1 + labels_2, loc='upper left', bbox_to_anchor=(1.05, 1))

    plt.tight_layout()
    plt.show()
    print(power_slice/100)

# Slider widgets
day_slider = widgets.IntSlider(value=25, min=0, max=len(val_data)//intervals - 1, step=1, description="Day:")
hour_slider = widgets.IntSlider(value=0, min=0, max=23, step=1, description="Hour:")

# Display interactive plot
widgets.interact(plot_predictions_by_hour, day=day_slider, hour=hour_slider)



interactive(children=(IntSlider(value=25, description='Day:', max=9676), IntSlider(value=0, description='Hour:…

<function __main__.plot_predictions_by_hour(day, hour)>

Experiments
1) Perfect foresight of future hourly average water use patterns (accurate in daily shape but not granular)
    36, 54, and 72 gal/day draw profilesout
    Simulation based test 28.8 to 72 gal/day

2) CasADi python optimization package, IPOPT solver

3) Use cvxpy for OLS, etc.