## Importing Packages

In [None]:
import os

os.chdir("../")
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [None]:
import torch
from gpytorch.kernels import (
    RBFKernel,
    ScaleKernel,
    PeriodicKernel,
    MaternKernel,
    CosineKernel,
)
from skgpytorch.models import SVGPRegressor, SGPRegressor, ExactGPRegressor

In [None]:
import jax.numpy as jnp
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow_probability.substrates.jax as tfp

dist = tfp.distributions
import pandas as pd
import jax.numpy as jnp
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from gpytorch.constraints import GreaterThan
from gpytorch.metrics import (
    mean_standardized_log_loss,
    negative_log_predictive_density,
    mean_squared_error,
)
import time
import numpy as np
from utilities import errors, plot

## Latexifying Figures

In [None]:
try:
    from probml_utils import latexify, savefig, is_latexify_enabled
except ModuleNotFoundError:
    %pip install git+https://github.com/probml/probml-utils.git
    from probml_utils import latexify, savefig, is_latexify_enabled

os.environ["LATEXIFY"] = "1"
os.environ["FIG_DIR"] = "./Figures/"

## Data Loader

In [None]:
def dataset_load(appliances, train, test=None, bias=False):
    x_train = []
    y_train = []
    train_time = []
    x_train_timestamp = []
    scaler_x = StandardScaler()
    scaler_y = StandardScaler()
    scaler_time = StandardScaler()
    app = 0

    ### train
    torch.set_default_dtype(torch.float64)
    for key, values in train.items():
        for app in range(len(appliances)):
            df = pd.read_csv(
                f"Data/Building{key}_NILM_data_basic.csv",
                usecols=["Timestamp", "main", appliances[app]],
            )
            df["date"] = pd.to_datetime(df["Timestamp"]).dt.date
            startDate = datetime.strptime(values["start_time"], "%Y-%m-%d").date()
            endDate = datetime.strptime(values["end_time"], "%Y-%m-%d").date()

            if startDate > endDate:
                raise "Start Date must be smaller than Enddate."

            df = df[(df["date"] >= startDate) & (df["date"] <= endDate)]
            df.dropna(inplace=True)
            if app == 0:
                x = df[appliances[app]].values
            else:
                x += df[appliances[app]].values
            if appliances[app] == "Refrigerator":
                y = df[appliances[app]].values

        timetrain = df["Timestamp"]
        timestamp_train = (
            pd.to_datetime(df["Timestamp"]).astype(int) / 10**18
        ).values

        x_train.extend(torch.tensor(x))
        y_train.extend(torch.tensor(y))
        x_train_timestamp.extend(torch.tensor(timestamp_train))
        train_time.extend(timetrain)

    x_train = torch.tensor(x_train).reshape(-1, 1)
    y_train = torch.tensor(y_train).reshape(-1, 1)
    x_train_timestamp = torch.tensor(x_train_timestamp).reshape(-1, 1)
    x_train = scaler_x.fit_transform(x_train)
    y_train = scaler_y.fit_transform(y_train)
    x_train_timestamp = scaler_time.fit_transform(x_train_timestamp)

    ## test
    x_test = []
    test_time = []
    y_test = []
    x_test_timestamp = []
    app = 0
    for key, values in test.items():
        for app in range(len(appliances)):
            df = pd.read_csv(
                f"Data/Building{key}_NILM_data_basic.csv",
                usecols=["Timestamp", "main", appliances[app]],
            )
            df["date"] = pd.to_datetime(df["Timestamp"]).dt.date
            startDate = datetime.strptime(values["start_time"], "%Y-%m-%d").date()
            endDate = datetime.strptime(values["end_time"], "%Y-%m-%d").date()

            if startDate > endDate:
                raise "Start Date must be smaller than Enddate."

            df = df[(df["date"] >= startDate) & (df["date"] <= endDate)]
            df.dropna(inplace=True)
            if app == 0:
                x = df[appliances[app]].values
            else:
                x += df[appliances[app]].values
            if appliances[app] == "Refrigerator":
                y = df[appliances[app]].values

        timetest = df["Timestamp"]
        timestamp = (pd.to_datetime(df["Timestamp"]).astype(int) / 10**18).values

        if bias == True:
            x = x + 100 * np.ones(x.shape[0])
        x_test.extend(torch.tensor(x))
        y_test.extend(torch.tensor(y))
        x_test_timestamp.extend(timestamp)
        test_time.extend(timetest)

    x_test = torch.tensor(x_test).reshape(-1, 1)
    y_test = torch.tensor(y_test).reshape(-1, 1)
    x_test_timestamp = torch.tensor(x_test_timestamp).reshape(-1, 1)

    x_test = scaler_x.transform(x_test)
    x_test_timestamp = scaler_time.transform(x_test_timestamp)

    x_train = torch.tensor(x_train).reshape(x_train.shape[0], 1).to(torch.float32)
    y_train = (
        torch.tensor(y_train)
        .reshape(
            -1,
        )
        .to(torch.float32)
    )
    x_train_timestamp = (
        torch.tensor(x_train_timestamp)
        .reshape(x_train_timestamp.shape[0], 1)
        .to(torch.float32)
    )
    x_test = torch.tensor(x_test).reshape(x_test.shape[0], 1).to(torch.float32)
    y_test = (
        torch.tensor(y_test)
        .reshape(
            -1,
        )
        .to(torch.float32)
    )
    x_test_timestamp = (
        torch.tensor(x_test_timestamp)
        .reshape(x_test_timestamp.shape[0], 1)
        .to(torch.float32)
    )

    return (
        x_train,
        y_train,
        x_test,
        y_test,
        x_train_timestamp,
        x_test_timestamp,
        scaler_x,
        scaler_y,
        scaler_time,
        test_time,
        train_time,
    )

In [None]:
train = {
    1: {"start_time": "2011-04-28", "end_time": "2011-05-15"},
    3: {"start_time": "2011-04-19", "end_time": "2011-05-22"},
}
test = {
    2: {"start_time": "2011-04-21", "end_time": "2011-05-21"},
}

appliances = ["Microwave", "Refrigerator", "Dish Washer"]  #

In [None]:
bias = False  ## To create an artificial data with added bias
(
    x_train,
    y_train,
    x_test,
    y_test,
    x_train_timestamp,
    x_test_timestamp,
    scaler_x,
    scaler_y,
    scaler_time,
    test_time,
    train_time,
) = dataset_load(appliances, train, test, bias=bias)

In [None]:
x_train.shape, x_train_timestamp.shape, x_test.shape, y_train.shape, y_test.shape

In [None]:
x_train_full = x_train
y_train = y_train
x_test_full = x_test
x_train_full.shape, x_test_full.dtype

## GP Model

In [None]:
def GP_model(train, test):
    kernel = ScaleKernel(MaternKernel(nu=2.5))
    inducing_points = x_train_full[np.arange(0, x_train_full.shape[0], 20)]
    model = SGPRegressor(
        x_train_full.to("cuda"), y_train.to("cuda"), kernel, inducing_points
    ).to("cuda")

    if train:
        loss = model.fit(lr=1e-3, n_epochs=3000, verbose=1, thetas=None, random_state=0)
        plt.plot(np.asarray(loss[0]))

        ## Save model
        model_name = "Point_to_point_main_power.pt"
        torch.save(model.state_dict(), os.path.join("./models", model_name))
    if test:
        model_name = "Point_to_point_main_power.pt"
        model.load_state_dict(torch.load(os.path.join("./models", model_name)))
    return model

In [None]:
model = GP_model(train=False, test=True)

## Prediction

In [None]:
pred_dist = model.predict((x_test_full).to("cuda"))
y_mean = pred_dist.loc
y_mean = scaler_y.inverse_transform(y_mean.reshape(-1, 1).cpu()).squeeze()

print(y_test.shape, y_mean.shape)
y_mean = np.clip(y_mean, 0, y_mean.max(), out=y_mean)
var_pred = pred_dist.variance
var_pred = scaler_y.inverse_transform(var_pred.reshape(-1, 1).detach().cpu()).squeeze()
std_pred = pred_dist.stddev
std_pred = torch.tensor(
    scaler_y.inverse_transform(std_pred.reshape(-1, 1).detach().cpu()).squeeze()
)

## Metrics

In [None]:
mae = errors.mae(torch.tensor(y_mean), y_test)
msll = errors.msll(var_pred, y_mean, y_test)
qce = errors.qce(std_pred, y_mean, y_test)
print("mae, msll, qce - ", mae, msll, qce)

## Figure 1

In [None]:
start = [4000, 4800, 13000]
idx = [500, 200, 300]

if bias:
    start = [4170]
    idx = [300]

x = scaler_x.inverse_transform(x_test_full.cpu().reshape(-1, 1))

for i in range(len(start)):
    if bias:
        plot.prediction_plots(
            x, y_test, y_mean, start[i], idx[i], var_pred, "Point_to_point_bias", i
        )
    else:
        plot.prediction_plots(
            x,
            y_test,
            y_mean,
            start[i],
            idx[i],
            var_pred,
            "Point_to_point_plt" + str(i + 1),
            i,
        )

## Calibration

In [None]:
fig, ax = plt.subplots(1)
latexify(width_scale_factor=2.5, fig_height=1.75)
sigma_pred = jnp.sqrt(var_pred)
df, df1 = plot.calibration_regression(
    y_mean.squeeze(), sigma_pred.squeeze(), y_test.squeeze(), "test", "r", ax
)
ax.legend()
if bias:
    savefig("Point_to_point_bias_calibration")
else:
    savefig("Point_to_point_calibration")

In [None]:
cal = errors.find_p_hat(np.array(y_test), y_mean, sigma_pred)
p = cal.index
mae_cal = errors.ace(p.values, cal.values)
print("calibration error: ", mae_cal)

## Figure 2(a)

In [None]:
x_lin_max = 3000
x_lin = np.linspace(0, x_lin_max, 15656)
x_time = np.linspace(
    scaler_time.inverse_transform(x_test_timestamp).min(),
    scaler_time.inverse_transform(x_test_timestamp).max(),
    15656,
)
x_lin_scale = scaler_x.transform(x_lin.reshape(-1, 1)).flatten()
x_new = torch.tensor(x_lin_scale).reshape(-1, 1).to(torch.float32)

x_new.shape, x_new.dtype

In [None]:
pred_dist = model.predict(x_new.to("cuda"))
y_mean = pred_dist.loc
y_mean = scaler_y.inverse_transform(y_mean.cpu().reshape(-1, 1))

In [None]:
plt.figure()
latexify(width_scale_factor=2, fig_height=1.5)
start = 500
idx = 4000
plt.plot(x_lin, y_mean, "k", label=" Predicted Mean", alpha=0.7)
plt.scatter(
    scaler_x.inverse_transform(x_train_full[:, 0].reshape(-1, 1)),
    scaler_y.inverse_transform(y_train.reshape(-1, 1)),
    s=4,
    label="Appliance Power",
)
plt.xlim(00, 1500)
sns.despine()

plt.xlabel("Train Mains")
plt.ylabel("Train Appliance Power")
plt.axvline(x=145, color="olive", linestyle="dotted", label="Mains = ~150")
plt.axvline(x=188, color="red", linestyle="dotted", label="Mains = ~188")

plt.axvline(x=490, color="magenta", linestyle="dotted")
plt.axvline(x=1250, color="brown", linestyle="dotted")

plt.legend(frameon=False, fontsize=6, bbox_to_anchor=(0.35, 0.55))
savefig("Main_vs_app_mean")

## Figure 2(b)

In [None]:
plt.figure()
latexify(width_scale_factor=2.5, fig_height=1.75)
sns.kdeplot(
    data={
        "Train Appliance": scaler_y.inverse_transform(
            y_train.reshape(-1, 1).cpu()
        ).squeeze(),
        "Test Appliance": (y_test.cpu()).squeeze(),
    }
)
sns.despine()
savefig("kde")

## Figure 2(c)

In [None]:
values = test_time

x_ticks_labels = pd.to_datetime(values)
x_ticks_labels
time_ = [(i.split("-04:00")[0].strip()) for i in test_time[:]]

date = [(i.split(" ")[0].strip()) for i in time_[:]]
mins_data = [(i.split(" ")[1].strip()) for i in time_[:]]
secs = [(i.split(":00")[1].strip()) for i in time_[:]]


def date_con(input_string: str):
    year, month, day = input_string.split("-")
    ret_month = ""
    if int(month) == 4:
        ret_month = "April"
    elif int(month) == 5:
        ret_month = "May"

    ret_string = f"{day} {ret_month}"
    return ret_string

In [None]:
idx = 100
fig, ax = plt.subplots(1, 1)
start = 13100
time_plot = scaler_time.inverse_transform(x_test_timestamp.cpu().reshape(-1, 1))
latexify(width_scale_factor=3, fig_height=1.75)
ax.scatter(
    time_plot[start : start + idx],
    scaler_x.inverse_transform(x_train)[start : start + idx],
    label="Train Main",
    s=6,
)
mins = mins_data[start : start + idx]
dates = date[start : start + idx]
ax.set_ylabel("Train Mains Power")
plt.tick_params(
    axis="x",
    which="both",
    bottom=False,
    top=False,
    labelbottom=False,
)
ax.set_xlabel(
    "Time"
    + "\n"
    + date_con(dates[0])
    + " ("
    + mins[0][:-3]
    + ") "
    + "to  "
    + date_con(dates[-1])
    + " ("
    + mins[-1][:-3]
    + ")"
)
sns.despine()
savefig("Train_Scatter")