## Importing Libraries

In [None]:
import os

os.chdir("../")
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [None]:
import torch
from gpytorch.kernels import (
    RBFKernel,
    ScaleKernel,
    PeriodicKernel,
    MaternKernel,
    CosineKernel,
    LinearKernel,
)
from skgpytorch.models import SVGPRegressor, SGPRegressor
import jax.numpy as jnp
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow_probability.substrates.jax as tfp

dist = tfp.distributions
import pandas as pd
import jax.numpy as jnp
from datetime import datetime
from sklearn.preprocessing import StandardScaler
import numpy as np
from utilities import plot, errors

from datetime import datetime

## Latexify

In [None]:
try:
    from probml_utils import latexify, savefig, is_latexify_enabled
except ModuleNotFoundError:
    %pip install git+https://github.com/probml/probml-utils.git
    from probml_utils import latexify, savefig, is_latexify_enabled

os.environ["LATEXIFY"] = "1"
os.environ["FIG_DIR"] = "./Figures/"

## Data loader

In [None]:
def dataset_load(appliances, train, test=None, linear=False, bias=False):
    x_train = []
    y_train = []
    train_range = []
    x_train_timestamp = []
    n = 99
    units_to_pad = n // 2
    scaler_x = StandardScaler()
    scaler_y = StandardScaler()
    scaler_time = StandardScaler()
    scaler_range = StandardScaler()

    # train
    for key, values in train.items():
        for app in range(len(appliances)):
            df = pd.read_csv(
                f"Data/Building{key}_NILM_data_basic.csv",
                usecols=["Timestamp", "main", appliances[app]],
            )
            df["date"] = pd.to_datetime(df["Timestamp"]).dt.date
            startDate = datetime.strptime(values["start_time"], "%Y-%m-%d").date()
            endDate = datetime.strptime(values["end_time"], "%Y-%m-%d").date()

            if startDate > endDate:
                raise "Start Date must be smaller than Enddate."

            df = df[(df["date"] >= startDate) & (df["date"] <= endDate)]
            df.dropna(inplace=True)
            if app == 0:
                x = df[appliances[app]].values
            else:
                x += df[appliances[app]].values
            if appliances[app] == "Refrigerator":
                y = df[appliances[app]].values

        timestamp_train = (
            pd.to_datetime(df["Timestamp"]).astype(int) / 10**18
        ).values
        x = jnp.pad(x, (units_to_pad, units_to_pad), "constant", constant_values=(0, 0))
        x = jnp.array([x[i : i + n] for i in range(len(x) - n + 1)])
        x_train.extend(x)
        y_train.extend(y)
        train_range.extend(jnp.max(x, axis=1) - jnp.min(x, axis=1))
        x_train_timestamp.extend(torch.tensor(timestamp_train))

    x_train = jnp.array(x_train)
    y_train = jnp.array(y_train).reshape(-1, 1)
    x_train_timestamp = torch.tensor(x_train_timestamp).reshape(-1, 1)
    x_train_range = jnp.array(train_range).reshape(-1, 1)

    x_train = scaler_x.fit_transform(x_train)
    y_train = scaler_y.fit_transform(y_train)
    x_train_timestamp = scaler_time.fit_transform(x_train_timestamp)
    x_train_range = scaler_range.fit_transform(x_train_range)

    # test
    x_test = []
    y_test = []
    test_range = []
    x_test_timestamp = []
    x_test_timestamp_true = []
    for key, values in test.items():
        for app in range(len(appliances)):
            df = pd.read_csv(
                f"Data/Building{key}_NILM_data_basic.csv",
                usecols=["Timestamp", "main", appliances[app]],
            )
            df["date"] = pd.to_datetime(df["Timestamp"]).dt.date
            startDate = datetime.strptime(values["start_time"], "%Y-%m-%d").date()
            endDate = datetime.strptime(values["end_time"], "%Y-%m-%d").date()

            if startDate > endDate:
                raise "Start Date must be smaller than Enddate."

            df = df[(df["date"] >= startDate) & (df["date"] <= endDate)]
            df.dropna(inplace=True)
            if app == 0:
                x = df[appliances[app]].values
            else:
                x += df[appliances[app]].values
            if appliances[app] == "Refrigerator":
                y = df[appliances[app]].values

        if bias == True:
            x = x + 100 * np.ones(x.shape[0])
        timestamp_true = df["Timestamp"].values
        timestamp = (pd.to_datetime(df["Timestamp"]).astype(int) / 10**18).values
        x = jnp.pad(x, (units_to_pad, units_to_pad), "constant", constant_values=(0, 0))
        x = jnp.array([x[i : i + n] for i in range(len(x) - n + 1)])
        x_test.extend(x)
        y_test.extend(y)
        test_range.append(jnp.max(x, axis=1) - jnp.min(x, axis=1))
        x_test_timestamp_true.extend(timestamp_true)
        x_test_timestamp.extend(timestamp)

    x_test = jnp.array(x_test)
    y_test = jnp.array(y_test).reshape(-1, 1)
    x_test_timestamp = torch.tensor(x_test_timestamp).reshape(-1, 1)
    x_test_range = jnp.array(test_range).reshape(-1, 1)

    x_test = scaler_x.transform(x_test)
    x_test_timestamp = scaler_time.transform(x_test_timestamp)
    x_test_range = scaler_range.transform(x_test_range)

    x_train = jnp.array(x_train).reshape(x_train.shape[0], n)
    y_train = jnp.array(y_train)
    x_train_range = jnp.array(x_train_range)
    x_train_timestamp = torch.tensor(x_train_timestamp).reshape(
        x_train_timestamp.shape[0], 1
    )
    x_test = jnp.array(x_test).reshape(x_test.shape[0], n)
    y_test = jnp.array(y_test)
    x_test_timestamp = (
        torch.tensor(x_test_timestamp)
        .reshape(x_test_timestamp.shape[0], 1)
        .to(torch.float64)
    )
    x_test_range = jnp.array(x_test_range).reshape(-1, 1)

    if linear == True:
        n = 100
        x_train = jnp.concatenate((x_train, x_train_range), axis=1).reshape(
            x_train.shape[0], n
        )
        x_test = jnp.concatenate((x_test, x_test_range), axis=1).reshape(
            x_test.shape[0], n
        )

    return (
        x_train,
        y_train,
        x_test,
        y_test,
        x_train_timestamp,
        x_test_timestamp,
        x_test_timestamp_true,
        scaler_x,
        scaler_y,
        scaler_time,
        scaler_range,
    )

In [None]:
train = {
    1: {"start_time": "2011-04-28", "end_time": "2011-05-15"},
    3: {"start_time": "2011-04-19", "end_time": "2011-05-22"},
}
test = {
    2: {"start_time": "2011-04-21", "end_time": "2011-05-21"},
}
appliances = ["Microwave", "Refrigerator", "Dish Washer"]  #

In [None]:
linear = False
bias = False

(
    x_train,
    y_train,
    x_test,
    y_test,
    x_train_timestamp,
    x_test_timestamp,
    x_test_time_true,
    scaler_x,
    scaler_y,
    scaler_time,
    scaler_range,
) = dataset_load(appliances, train, test, linear=linear, bias=bias)

In [None]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

In [None]:
x = torch.tensor(np.array(x_train)).to(torch.float32)
y = (
    torch.tensor(np.array(y_train))
    .reshape(
        -1,
    )
    .to(torch.float32)
)
xt = torch.tensor(np.array(x_test)).to(torch.float32)
yt = (
    torch.tensor(np.array(y_test))
    .reshape(
        -1,
    )
    .to(torch.float32)
)

## GP Model

In [None]:
def GP_model(train, test, linear, ard, model_name):
    kernel1 = ScaleKernel(MaternKernel(nu=2.5, ard_num_dims=ard))
    kernel = kernel1
    if linear:
        kernel2 = ScaleKernel(LinearKernel(active_dims=(99)))
        kernel = kernel1 + kernel2
    inducing_points = x[np.arange(0, x.shape[0], 20)]

    model = SGPRegressor(x.to("cuda"), y.to("cuda"), kernel, inducing_points).to("cuda")
    if train:
        loss = model.fit(lr=1e-2, n_epochs=1500, verbose=1, random_state=0)

        plt.plot(np.asarray(loss[0]))

        model_name = model_name
        torch.save(model.state_dict(), os.path.join("./models", model_name))
    if test:
        model_name = model_name
        model.load_state_dict(torch.load(os.path.join("./models", model_name)))

    return model

In [None]:
if linear:
    model_name = "Seq_to_pt_linear_final.pt"
else:
    model_name = "Seq_to_pt_final.pt"

model = GP_model(
    train=False, test=True, linear=linear, ard=x.shape[1], model_name=model_name
)

## Prediction

In [None]:
pred_dist = model.predict((xt).to("cuda"))
y_mean = pred_dist.loc
y_mean = scaler_y.inverse_transform(y_mean.reshape(-1, 1).cpu()).squeeze()

print(y_test.shape, y_mean.shape)
y_mean = np.clip(y_mean, 0, y_mean.max(), out=y_mean)
var_pred = pred_dist.variance
var_pred = scaler_y.inverse_transform(var_pred.reshape(-1, 1).detach().cpu()).squeeze()
std_pred = pred_dist.stddev
std_pred = torch.tensor(
    scaler_y.inverse_transform(std_pred.reshape(-1, 1).detach().cpu()).squeeze()
)

In [None]:
mae = errors.mae(torch.tensor(y_mean), yt)
msll = errors.msll(var_pred, y_mean, yt)
qce = errors.qce(std_pred, y_mean, yt)
print("mae, msll, qce - ", mae, msll, qce)

## Figure 3

In [None]:
start = [4000, 4800, 13000]
idx = [500, 200, 300]

if bias:
    start = [4170]
    idx = [300]

x = scaler_x.inverse_transform(xt[:, 0:99])[:, 49]
i = 0
for i in range(len(start)):
    if bias:
        plot.prediction_plots(
            x, yt, y_mean, start[i], idx[i], var_pred, "Seq_to_point_bias", i
        )
    else:
        plot.prediction_plots(
            x,
            yt,
            y_mean,
            start[i],
            idx[i],
            var_pred,
            "Seq_to_point_plt" + str(i + 1),
            i,
        )

## Calibration

In [None]:
fig, ax = plt.subplots(1)
latexify(width_scale_factor=2.5, fig_height=1.75)
sigma_pred = jnp.sqrt(var_pred)
df, df1 = plot.calibration_regression(
    y_mean.squeeze(), sigma_pred.squeeze(), y_test.squeeze(), "test", "r", ax
)
ax.legend()
if bias:
    if linear:
        savefig("Sequence_to_point_linear_bias_calibration")
    else:
        savefig("Sequence_to_point_bias_calibration")
elif linear:
    savefig("Sequence_to_point_linear_calibration")
else:
    savefig("Sequence_to_point_calibration")

In [None]:
cal = errors.find_p_hat(np.array(yt), y_mean, sigma_pred)
p = cal.index
mae_cal = errors.ace(p.values, cal.values)
print("calibration error: ", mae_cal)