In [406]:
%matplotlib inline
import itertools
import numpy as np
import time
import gpflow
import tensorflow as tf
import matplotlib.pyplot as plt
from gpflow.ci_utils import ci_niter
from gpflow.utilities import print_summary, set_trainable, deepcopy
from IPython.display import clear_output
from tensorflow_probability import distributions as tfd
import tensorflow_probability as tfp
# for reproducibility of this notebook:
tf.random.set_seed(42)
import time
import ipywidgets as widgets
out = widgets.Output()

In [372]:
def pollution(x,y,t, with_noise=False):
    pm25 = 3 + (0.4*np.sin(t/300) + 0.1*np.cos(t/200) +0.3*np.cos(t/5) + 0.3*np.sin(x) + 0.5*np.sin(y))
    if with_noise == True:
        pm25 = np.random.normal(pm25,scale=1.1)
    return pm25

In [373]:
def get_sensor_locs(N_train_sensors, N_pool_sensors, N_test_sensors):
    N_sensors = N_train_sensors + N_pool_sensors + N_test_sensors
    locations = 10*np.random.random_sample([N_sensors,2])
    train_locs = locations[0:N_train_sensors]
    pool_locs = locations[N_train_sensors:(N_train_sensors + N_pool_sensors)]
    test_locs = locations[(N_train_sensors + N_pool_sensors):]
    return train_locs, pool_locs, test_locs

In [374]:
def get_training_data(train_locs, N_start, N_end):
    training_set = []
    for t in range(N_start, N_end):
        for i in range(len(train_locs)):
            pm25 = pollution(train_locs[i,0], train_locs[i,1], t, with_noise=True)
            training_set.append([train_locs[i,0], train_locs[i,1], t, pm25])
    training_set = np.array(training_set)
    X = training_set[:,0:3]
    Y = training_set[:,3][:,None]
    return X, Y

In [375]:
def get_test_data(test_locs, N_start, N_end):
    test_set = []
    for t in range(N_start+1, N_end+1):
        for i in range(len(test_locs)):
            pm25 = pollution(test_locs[i,0], test_locs[i,1], t)
            test_set.append([test_locs[i,0], test_locs[i,1], t, pm25])
    test_set = np.array(test_set)
    X = test_set[:,0:3]
    Y = test_set[:,3][:,None]
    return X, Y

In [376]:
def get_model(Xtrain, Ytrain, N_inducing=15):
    Zid = np.random.choice(range(len(Xtrain)), size = N_inducing)
    Z = Xtrain[Zid]
    k = gpflow.kernels.RBF(active_dims=[0,1,2], lengthscales=[1, 1, 500]) + (
        gpflow.kernels.Bias(active_dims=[0,1,2]))
    model = gpflow.models.SGPR(data=(Xtrain,Ytrain), inducing_variable = Z, kernel=k)
    #m.kernel.variance.assign(10)
    gpflow.set_trainable(model.inducing_variable, False)
    return model 

In [479]:
def run_adam(model, iterations):
    """
    Utility function running the Adam optimizer

    :param model: GPflow model
    :param interations: number of iterations
    :param data: observed data set
    :return logf: model training loss
    """
    
    # Create an Adam Optimizer action
    logf = []
    training_loss = model.training_loss
    optimizer = tf.optimizers.Adam()

    @tf.function
    def optimization_step():
        optimizer.minimize(training_loss, model.trainable_variables)
    @out.capture()
    def print_step(step):
        print("iteration {}".format(step))
                
    # Optimize variational parameters 
    tic = time.perf_counter()
    for step in range(iterations):
        optimization_step()
        out.clear_output(wait=True)
        print_step(step)
        if step % 20 == 0:
            loss = training_loss().numpy()
            logf.append(loss)
    toc = time.perf_counter()
    elapsed = toc-tic
    per_iter = elapsed/iterations
    print("Trained for {} iterations in {:.2f} seconds, average {:.2f} seconds per iteration".format(
                                                                                                    iterations, 
                                                                                                    elapsed, 
                                                                                                    per_iter))
    return logf

In [480]:
def get_pool_data(pool_locs, N_start, N_end):
    pool_set = []
    for t in range(N_start+1, N_end+1):
        for i in range(len(pool_locs)):
            pm25 = pollution(pool_locs[i,0], pool_locs[i,1], t)
            pool_set.append([pool_locs[i,0], pool_locs[i,1], t, pm25])
    pool_set = np.array(pool_set)
    X = pool_set[:,0:3]
    Y = pool_set[:,3][:,None]
    return X, Y

In [490]:
def experiment_loop():
    MAE = tf.losses.MeanAbsoluteError()
    
    Nt = 24*180 # hourly average for 180 days
    N_add = 24*30 # add 30 days of data at a time
    N_train_sensors = 10 # number of sensors in training set
    N_pool_sensors = 10 # number of candidate sensors
    N_test_sensors = 5 # number of test sensors
    
    train_locs, pool_locs, test_locs = get_sensor_locs(N_train_sensors, 
                                                       N_pool_sensors, 
                                                       N_test_sensors)
    
    sensor_locs = [[train_locs, pool_locs, test_locs]]
    model_log = []
    loss_log = []
    mae_log = []
    removed_sensor = []
    
    tic1 = time.perf_counter()
    
    for iter in range(N_pool_sensors):
        
        print("Starting run {}".format((iter+1)))
        
        tic2 = time.perf_counter()
        
        # get training data for initial training locations
        Xtrain, Ytrain = get_training_data(train_locs, 0, Nt)
        
        # get model, default num inducing points = 15
        model = get_model(Xtrain, Ytrain)
        
        # train model using adam optimizer 
        
        loss = run_adam(model, 100)
        
        # test model at test locations against actual data for next 24 hours
        Xtest, Ytest = get_test_data(test_locs, Nt, Nt+24)
        pred, _ = model.predict_f(Xtest)        
        
        # get average mae over all test sensors
        mae = MAE(Ytest,pred).numpy()
        
        # get predictions for pool sensors over next 24 hours
        Xpool, _ = get_pool_data(pool_locs, Nt, Nt+24)
        _, var = model.predict_f(Xpool)
        
        # average over timepoints and move sensor with maximum variance to train set
        var = var.numpy().reshape(-1,len(pool_locs))
        var = np.average(var, axis=0)
        idx = np.argmax(var)
        train_locs = np.vstack((train_locs,pool_locs[idx][:,None].T))
        
        # add sensor to logs and remove from pool locations
        removed_sensor.append(pool_locs[idx])
        pool_locs = np.delete(pool_locs,idx,axis=0)
        
        # move to next time step
        Nt = Nt + N_add
        
        # save logs
        sensor_locs.append([train_locs, pool_locs, test_locs])
        model_log.append(model)
        loss_log.append(loss)
        mae_log.append(mae)
        
        toc = time.perf_counter()
        run_elapsed = toc-tic2
        total_elapsed = toc-tic1
        
        
        print("Run {} complete in {:.2f} seconds, total time elapsed is {:.2f} seconds".format((iter+1),
                                                                                                run_elapsed, 
                                                                                                total_elapsed))
         
    logs = {
        "sensor_locs": sensor_locs[:-1],
        "models": model_log,
        "loss": loss_log,
        "MAE": mae_log,
        "removed sensor": removed_sensor
           }
    
    return logs
        

In [493]:
logs = experiment_loop()

Starting run 1
Trained for 100 iterations in 4.50 seconds, average 0.05 seconds per iteration
Run 1 complete in 5.03 seconds, total time elapsed is 5.03 seconds
Starting run 2
Trained for 100 iterations in 5.79 seconds, average 0.06 seconds per iteration
Run 2 complete in 6.46 seconds, total time elapsed is 11.49 seconds
Starting run 3
Trained for 100 iterations in 6.61 seconds, average 0.07 seconds per iteration
Run 3 complete in 7.42 seconds, total time elapsed is 18.91 seconds
Starting run 4
Trained for 100 iterations in 7.80 seconds, average 0.08 seconds per iteration
Run 4 complete in 8.79 seconds, total time elapsed is 27.70 seconds
Starting run 5
Trained for 100 iterations in 9.19 seconds, average 0.09 seconds per iteration
Run 5 complete in 10.58 seconds, total time elapsed is 38.28 seconds
Starting run 6
Trained for 100 iterations in 10.59 seconds, average 0.11 seconds per iteration
Run 6 complete in 12.19 seconds, total time elapsed is 50.47 seconds
Starting run 7
Trained for

In [469]:
full_experiment_logs.keys()

dict_keys(['sensor_locs', 'models', 'loss', 'MAE', 'removed sensor'])

In [487]:
full_experiment_logs

{'sensor_locs': [[array([[6.8493558 , 3.91944172],
          [9.71498365, 3.40662936],
          [7.06903401, 7.58018513],
          [8.09593601, 6.38802745],
          [0.71231396, 4.35489589],
          [3.07401252, 3.218029  ],
          [0.31631791, 1.987888  ],
          [0.57921792, 0.05893325],
          [9.79496924, 0.22255694],
          [4.51772878, 1.31218406]]),
   array([[7.0899624 , 2.96768321],
          [0.97293533, 7.56294258],
          [0.11192967, 9.28664111],
          [0.34938126, 4.70361915],
          [6.88692502, 7.33284581],
          [2.96685528, 7.59622938],
          [4.99496691, 9.31689573],
          [9.55481578, 8.36446328],
          [1.97475223, 2.06233971],
          [4.7733693 , 1.06482044]]),
   array([[6.57002214, 8.90595492],
          [5.32595348, 1.83161499],
          [2.47876599, 9.25492279],
          [5.24818238, 4.65020948],
          [5.11041534, 0.78511435]])],
  [array([[6.8493558 , 3.91944172],
          [9.71498365, 3.40662936],
      