In [1]:
import numpy as np
from surrogate_model import Aleatoric_NN, Epistemic_NN
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import copy
import pickle
import os
import json
from offloader import Offloader, OffloadVector

## Active Learning experiment

In [2]:
n_init = 2000
n_test = 2000
k = [500, 1000, 2000, 4000, 8000, 16000, 32000]
m=4
iterations = 10
active_learning_mses = []
baseline_mses = []
baseline_ns = []

In [3]:
dom = ([-3,3],[-2,2])

def six_hump_camel_function(X):
    x = X[:,0]
    y = X[:,1]

    x2 = np.power(x,2)
    x4 = np.power(x,4)
    y2 = np.power(y,2)

    return (4.0 - 2.1 * x2 + (x4 / 3.0)) * x2 + x*y + (-4.0 + 4.0 * y2) * y2

In [4]:
grid = 1200
X1 = np.linspace(dom[0][0], dom[0][1], grid)
X2 = np.linspace(dom[1][0], dom[1][1], grid)
x1, x2 = np.meshgrid(X1, X2)
X = np.hstack((x1.reshape(grid*grid,1),x2.reshape(grid*grid,1)))


In [5]:
rng = np.random.RandomState(42)
test_indices = np.random.randint(0, (grid*grid), size = n_test)
testset_X = X[test_indices,:]
X = np.delete(X, test_indices, axis=0)

In [6]:
print(testset_X.shape)
print(X.shape)

(2000, 2)
(1438003, 2)


In [7]:
testset_y = six_hump_camel_function(testset_X)

In [8]:
init_set = X[np.random.randint(0,X.shape[0],n_init),:]

In [9]:
testset_y.shape

(2000,)

In [10]:
y = six_hump_camel_function(init_set)

In [11]:
def pre(task_folder, x, y):
    res = {'x':x, 'y':y}
    with open(os.path.join(task_folder,"example", "new_content.json"), "w") as f:
        json.dump(res, f)
    

In [12]:
def post(task_folder, x, y):
    with open(os.path.join(task_folder,"example","new_new_content.json"), "r") as f:
        res = json.load(f)
    return res


In [13]:
offloader = Offloader("offload.dt4si.nl", "api/v1", offload_folder="tmp")

Offloader connects to: http://offload.dt4si.nl/api/v1


In [14]:
for j in k:
    total_n = 0
    nn = Epistemic_NN()
    start_set = copy.deepcopy(init_set)
    y_new = copy.deepcopy(y)
    nn.create_model(start_set)
    nn.fit(start_set, y_new, epochs = 25)
    for i in range(iterations):
        new_set = X[np.random.randint(0,X.shape[0],m*j),:]
        evaluation_m, evaluation_s = nn.predict(new_set)
        #print(evaluation_s)
        highest_error_indices = np.argpartition(evaluation_s, -j)[-j:]
        
        #rebuild dict for multiple datapoint
        vec = [{'x':new_set[0,0], 'y':new_set[0,1]}]
        off = OffloadVector(offloader, pre, post, "ls && pip install numpy && python test_functions.py", "python:3", vec, local=False, auto_delete=False)
        off.add_file("example", "")
        off.add_file("test_functions.py", "")
        off.get_file(os.path.join("example","new_new_content.json"))

        y_new_ = off.run() #get elements
        print(y_new)
        
        #y_new_ = six_hump_camel_function(new_set[highest_error_indices,:])
        print(f"Mean prediction uncertainty: {np.mean(evaluation_s[highest_error_indices])}")
        #start_set = np.concatenate((start_set,  new_set[highest_error_indices]), axis = 0)
        print(f"trainset size: {start_set.shape[0]}")
        #y_new = np.concatenate((y_new, y_new_), axis=0)
        nn.updateModel(new_set[highest_error_indices,:], y_new_, epochs = 20, verbose=0)
        total_n+=j
        print(f"finished n={j}, iteration={i}")
    nn.model.save(f"AL_{j}.h5")
        
    test_m, test_s = nn.predict(testset_X)
    active_learning_mses.append(mean_squared_error(testset_y, test_m))
    print(f"mse: {active_learning_mses}")
    baseline_ns.append(total_n)
    print(f"baseline_ns: {baseline_ns}")



Offloading vector folder: tmp\offload-xocf6
Argo workflow name: vector-99898
total: 1, pending: 1, running: 0, succeeded: 0, failed: 0, finished: False
total: 1, pending: 0, running: 0, succeeded: 0, failed: 0, finished: True


OffloadFailedException: The offloading failed, see argo.dt4si.nl, workflow: vector-99898