In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import scipy.stats as stats
import sys
sys.path.append('/Users/wastechs/Documents/git-repos/energy-efficiency')
from lib.util.helper import query_table, weekday_time_series
from validation.gp.create_train_inference import create_train_inference_gp
from lib.util import helper, data_preprocessing
import torch
from torch.utils.data import TensorDataset, DataLoader
import gpytorch
import seaborn as sns
import plotly.express as px
import plotly
import plotly.graph_objects as go
import torch
import tqdm
import gpytorch
from gpytorch.models import ApproximateGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy
%matplotlib widget

In [2]:
df = query_table(table='vacuum_pump_1_30T')

In [3]:
df['kw'] = round(df['kw'], 2)
df['kw'] = df['kw'].apply(lambda x: 0.0 if x == -0.0 else x)

In [10]:
#df = df[df.index < '2017-12-09']
df = df[(df.index >= '2017-11-05') & (df.index < '2017-12-09')]

print(df.shape)

time_int_range = np.arange(0, df.shape[0]*30, 30)
df['t'] = time_int_range
df['t'] = (df['t'] - df['t'].min()) / (df['t'].max() - df['t'].min())

training = df[df.index < '2017-12-08']
testing = df[df.index >= '2017-12-08']

X_train = torch.from_numpy(training['t'].values).to(torch.double)
y_train = torch.from_numpy(training['kw'].values).to(torch.double)

X_test = torch.from_numpy(df['t'].values).to(torch.float32)
X_test_sub = torch.from_numpy(testing['t'].values).to(torch.double)
y_test = torch.from_numpy(testing['kw'].values).to(torch.double)

# Standardizing helps with hyperparameter initialization
y_train_mean = torch.mean(y_train)
y_train_std = torch.std(y_train)

y_train = (y_train - y_train_mean) / (y_train_std)
y_test = (y_test - y_train_mean) / (y_train_std)

(1632, 3)


In [11]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32)

test_dataset = TensorDataset(X_test_sub, y_test)
test_loader = DataLoader(test_dataset, batch_size=32)

In [12]:
X_train.dtype, X_test.dtype, y_train.dtype, y_test.dtype

(torch.float64, torch.float32, torch.float64, torch.float64)

In [13]:
## v1 - end loss = 1.09 ##

class GPModel(ApproximateGP):
    def __init__(self, inducing_points):

        variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0))
        variational_strategy = VariationalStrategy(
            self, inducing_points, variational_distribution, learn_inducing_locations=True)

        super(GPModel, self).__init__(variational_strategy)

        period_constraint_short = gpytorch.constraints.Interval(0.034, 0.045) ## short term 1

        seasonal_periodic_short = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.PeriodicKernel(
                period_length_constraint=period_constraint_short
                )
            )


        ## Local Variations ##
        local_variation_alpha = gpytorch.priors.GammaPrior(1, 0.5)

        local_variation = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RQKernel()
            )
            
        local_variation.alpha = 1

        self.covar_module = seasonal_periodic_short + local_variation
        self.mean_module = gpytorch.means.ZeroMean()

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# initialize likelihood and model
inducing_points = X_train[:500]
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GPModel(inducing_points=inducing_points)

In [15]:
smoke_test = ('CI' in os.environ)
num_epochs = 1 if smoke_test else 4

model.double()
likelihood.double()

model.train()
likelihood.train()

optimizer = torch.optim.Adam([
    {'params': model.parameters()},
    {'params': likelihood.parameters()},
], lr=0.01)


mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=y_train.size(0))

epochs_iter = tqdm.notebook.tqdm(range(num_epochs), desc="Epoch")
for i in epochs_iter:
    # Within each iteration, we will go over each minibatch of data
    minibatch_iter = tqdm.notebook.tqdm(train_loader, desc="Minibatch", leave=False)
    for x_batch, y_batch in minibatch_iter:
        optimizer.zero_grad()
        output = model(x_batch)
        loss = -mll(output, y_batch)
        minibatch_iter.set_postfix(loss=loss.item())
        loss.backward()
        optimizer.step()

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Minibatch:   0%|          | 0/50 [00:00<?, ?it/s]

Minibatch:   0%|          | 0/50 [00:00<?, ?it/s]

Minibatch:   0%|          | 0/50 [00:00<?, ?it/s]

Minibatch:   0%|          | 0/50 [00:00<?, ?it/s]

In [17]:
model.eval()
likelihood.eval()

with torch.no_grad():
    for x_batch, y_batch in test_loader:
        preds = model(x_batch)
        #means = torch.cat([means, preds.mean])

In [19]:
preds.mean

tensor([-0.0007, -0.0013, -0.0022, -0.0035, -0.0050, -0.0068, -0.0086, -0.0105,
        -0.0122, -0.0136, -0.0144, -0.0143, -0.0133, -0.0112, -0.0079, -0.0036],
       dtype=torch.float64)