In [1]:
import botorch.acquisition
import torch
import numpy
from NNTraining import k_fold_training, PandasDataset
import matplotlib.pyplot as plt
import pandas as pd
import botorch as bt
import numpy as np
from botorch.models import SingleTaskGP
from gpytorch.constraints import GreaterThan
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch.acquisition.analytic import LogExpectedImprovement
from botorch import fit_gpytorch_mll
from botorch.optim import optimize_acqf
import gpytorch

In [2]:
# Loading dataset
dataset = pd.read_csv('iris.csv', names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])
feature_cols = [col for col in dataset.columns]
feature_cols.remove('class')
dataset

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [10]:
class_names = list(dataset['class'].unique())
# for col in feature_cols:
#     #normalize dataset
#     dataset[col] = (dataset[col] - dataset[col].mean())/(dataset[col].std())
#set feature and class names
dataset['class'] = dataset['class'].map({species:i for i, species in enumerate(class_names)})
#print(dataset.head)
#turn into training-friendly pytorch dataset format
dataset = PandasDataset(dataset, feature_cols, 'class')

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [4]:
def objective_function(hyperparams):
    hyperparams[hyperparams==0] += 1e-10
    topology = torch.ceil(30 * hyperparams[0:4]).int() #first 4 params are topological structure
    batch_size = torch.ceil((hyperparams[4] + 1e-8) * 128).int()
    learning_rate = hyperparams[5]
    accs = k_fold_training(dataset, 4, topology, 3, stratified=True, epochs=50, k=10, batch_size=int(batch_size), learning_rate=learning_rate)
    return np.mean(accs)

def GPUCB_function(t: int, model, error):
    '''Generate relevant GPUCB parameters, return mu + beta_t sigma for the model'''
    #train inputs are a tuple of the form (inputs,...), so access inputs and then get the shape of the first one
    dims = len(model.train_inputs[0][0])
    grid = torch.meshgrid(*[torch.linspace(1e-10,1,100) for _ in range(dims)], indexing='xy')
    grid = torch.stack([m.flatten() for m in grid], dim=-1)
    distribution = model(grid) #evaluates the model at every point in the grid and returns the distribution
    # mu = distribution.mean.unsqueeze(-1) #gets the mean at every point and reshapes to grid shape
    # sigma = distribution.variance.unsqueeze(-1)
    kernel = model.covar_module
    train_points = model.train_inputs[0]
    covariance_matrix = kernel(train_points).to_dense() + torch.eye(train_points.shape[0]) * 1e2 #adds noise 
    k_matrix = kernel(train_points, grid.unsqueeze(1)).to_dense()
    mu = k_matrix.transpose(1,2) @ torch.linalg.solve(covariance_matrix, init_y)
    mu = mu_t.squeeze()
    sigma = kernel(grid.unsqueeze(1), grid.unsqueeze(1)).to_dense().squeeze() #variance of every point with itself
    sigma -= (k_matrix.transpose(1,2) @ torch.linalg.solve(covariance_matrix, k_matrix)).squeeze() #noise value
    beta = 2*np.log((dims * t**2 * torch.pi**2)/(6*error))
    
    max_arg = torch.argmax(mu + torch.sqrt(sigma*beta))
    return grid[max_arg]


In [8]:
#Do initial runs
init_x = torch.rand(size = (20,6), dtype = torch.float64)
init_y = torch.tensor(list(map(objective_function, init_x))).unsqueeze(-1)

RuntimeError: mat1 and mat2 must have the same dtype, but got Double and Float

In [1]:
#do 20 more runs with gp
model = SingleTaskGP(train_X=init_x, train_Y=init_y)
dims, tol = 6, 0.05
for i in range(1, 60+1):
    #model.likelihood.noise_covar.register_constraint("raw_noise", GreaterThan(1e-5))
    mll = ExactMarginalLogLikelihood(likelihood=model.likelihood, model=model)
    fit_gpytorch_mll(mll)
    
    #acquisition_function = LogExpectedImprovement(model=model, best_f=max(init_y), maximize=True)
    beta = 2*np.log((dims * i**2 * torch.pi**2)/(6*tol))
    acquisition_function = botorch.acquisition.analytic.UpperConfidenceBound(model, beta/5)

    candidates, acquisition_value = optimize_acqf(acq_function=acquisition_function,
                                          bounds = torch.tensor([[0. for _ in range(dims)], [1. for _ in range(dims)]]),
                                          q=1,
                                          num_restarts=20,
                                          raw_samples=1024,
                                          options={"batch_limit": 5, "maxiter": 200})
    #print(candidates[0])
    next_result = torch.tensor(objective_function(candidates[0])).unsqueeze(-1)
    model = model.condition_on_observations(candidates.to(torch.float64), next_result)
    init_y = torch.cat((init_y, next_result.unsqueeze(-1)), dim=0)
        

NameError: name 'SingleTaskGP' is not defined

In [23]:
hyperparams=model.train_inputs[0][torch.argmax(init_y)]
topology = torch.ceil(30 * hyperparams[0:4]).int() #first 4 params are topological structure
batch_size = torch.ceil((hyperparams[4] + 1e-8) * 128).int()
learning_rate = hyperparams[5]
print(topology, batch_size, learning_rate, torch.max(init_y))

tensor([26, 18, 27, 12], dtype=torch.int32) tensor(104, dtype=torch.int32) tensor(0.0152, dtype=torch.float64) tensor(0.9800, dtype=torch.float64)


In [None]:
dims = len(model.train_inputs[0][0])
grid = torch.meshgrid(*[torch.linspace(0,1,100) for _ in range(dims)], indexing='xy')
grid = torch.stack([m.flatten() for m in grid], dim=-1)
distribution = model(grid) #evaluates the model at every point in the grid and returns the distribution
mu = distribution.mean
sigma_t = distribution.stddev
t, error = (20, 0.05)
beta_t = 2*np.log((dims * t**2 * torch.pi**2)/(6*error))
max_arg = torch.argmax(mu + sigma_t*beta_t)
kernel = model.covar_module
train_points = model.train_inputs[0]
covariance_matrix = kernel(train_points).to_dense() + torch.eye(train_points.shape[0]) * 1e-8 #adds noise 
k_matrix = kernel(train_points, grid.unsqueeze(1)).to_dense()
mu_t = k_matrix.transpose(1,2) @ torch.linalg.solve(covariance_matrix, init_y)
mu_t = mu_t.squeeze()
grid_covariance = kernel(grid.unsqueeze(1), grid.unsqueeze(1)).to_dense().squeeze() #variance of every point with itself
grid_covariance -= (k_matrix.transpose(1,2) @ torch.linalg.solve(covariance_matrix, k_matrix)).squeeze() #noise value
gpucb_max = torch.argmax(mu_t + grid_covariance*beta_t)
print(grid[max_arg],grid[gpucb_max])

In [None]:
sigma = kernel(grid.unsqueeze(1), grid.unsqueeze(1)).to_dense()
sigma = sigma.squeeze()
init_y

In [None]:
covariance_matrix.diag() + 1e-4

In [None]:
#GPUCB(1,model,0.05)
dims = len(model.train_inputs[0][0])
grid = torch.meshgrid(*[torch.linspace(0,1,100) for _ in range(dims)], indexing='xy')
grid = torch.stack([m.flatten() for m in grid], dim=-1)
distribution = model(grid) #evaluates the model at every point in the grid and returns the distribution
mu = distribution.mean.unsqueeze(-1) #gets the mean at every point and reshapes to grid shape
# sigma = distribution.variance.unsqueeze(-1)
# beta = 2*torch.log((dims * t**2 * torch.pi**2)/(6*error))


In [None]:
kernel = gpytorch.kernels.RBFKernel()
kernel(grid[0:5], grid[0:5]).to_dense()