Copyright 2021-2023 Lawrence Livermore National Security, LLC and other MuyGPyS Project Developers. See the top-level COPYRIGHT file for details.

SPDX-License-Identifier: MIT

# Deep Kernels with MuyGPs in PyTorch Tutorial

In this tutorial, we outline how to construct a simple deep kernel model using the PyTorch implementation of MuyGPs.


In [None]:
from MuyGPyS import config
config.update("muygpys_backend","torch")

In [None]:
import numpy as np
import torch

We use the MNIST classification problem as a benchmark. We will use the deep kernel MuyGPs model to classify images of handwritten digits between 0 and 9. We will use a fully-connected architecture, meaning we will have to vectorize each image prior to training. We download the training and testing data using the torchvision.datasets API.

In [None]:
import torchvision
import os
from torch.nn.functional import one_hot
root = './data'
if not os.path.exists(root):
    os.mkdir(root)
    
# Download and transform MNIST dataset    
    
trans = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.5,),(1.0,))])
train_set = torchvision.datasets.MNIST(root=root,train=True,transform=trans,download=True)
test_set = torchvision.datasets.MNIST(root=root,train=False,transform=trans,download=True)


# Ten Different Digits, 60,000 training samples, 10,000 test samples, 784 features

num_classes = 10
num_train_samples = 60000
num_test_samples = 10000
num_features = 784


# Construct training and test data feature tensors. Vectorize the images of digits and one-hot encode the classes.

train_features = torch.zeros((num_train_samples,num_features))
train_responses = torch.zeros((num_train_samples,num_classes))

for i in range(num_train_samples):
    train_features[i,:] = train_set[i][0].flatten()
    train_responses[i,:] = one_hot(torch.tensor(train_set[i][1]).to(torch.int64),num_classes=num_classes)


test_features = torch.zeros((num_test_samples,num_features))
test_responses = torch.zeros((num_test_samples,num_classes))

for i in range(num_test_samples):
    test_features[i,:] = test_set[i][0].flatten()
    test_responses[i,:] = one_hot(torch.tensor(test_set[i][1]).to(torch.int64),num_classes=num_classes)

We set up our nearest neighbor lookup structure using the NN_Wrapper data structure in MuyGPs. We then define our batch and construct tensor containing the features and targets of the batched elements and their nearest neighbors. 

In [None]:
from torch import nn
import random
from torch.optim.lr_scheduler import ExponentialLR
torch.autograd.set_detect_anomaly(True)
#Define the nearest neighbor data structure, setting the number of nearest neighbors and choosing the NN algorithm
#For reproductibility we set a random seed
np.random.seed(0)
test_count, _ = test_features.shape
train_count, _ = train_features.shape

from MuyGPyS.neighbors import NN_Wrapper
nn_count = 30
nbrs_lookup = NN_Wrapper(train_features, nn_count, nn_method="hnsw")

#We will make use of batching in our hyperparameter training
from MuyGPyS.optimize.batch import sample_batch
batch_count = 500
batch_indices, batch_nn_indices = sample_batch(
    nbrs_lookup, batch_count, train_count
)

batch_indices, batch_nn_indices = batch_indices.astype(np.int64), batch_nn_indices.astype(np.int64)
batch_indices, batch_nn_indices = torch.from_numpy(batch_indices), torch.from_numpy(batch_nn_indices)


batch_features = train_features[batch_indices,:]
batch_targets = train_responses[batch_indices, :]
batch_nn_targets = train_responses[batch_nn_indices, :]

if torch.cuda.is_available():
    train_features = train_features.cuda()
    train_responses = train_responses.cuda()
    test_features = test_features.cuda()
    test_responses = test_responses.cuda()

We now construct a custom MuyGPs deep kernel class inheriting from PyTorch. 

In [None]:
print('Building and Training Neural Network')

#Import MuyGPs_layer and MultivariateMuyGPs_layer objects for composing PyTorch models.
from MuyGPyS.torch.muygps_layer import MuyGPs_layer, MultivariateMuyGPs_layer


#Build a custom object inheriting from the neural network Module in PyTorch
#Our model is composed of an embedding (neural network) and a MuyGPs Gaussian process layer.
#The embedding includes linear layers and ReLU activation functions.

class SVDKMuyGPs(nn.Module):

    def __init__(self,num_models,kernel_eps,nu,length_scale,batch_indices,batch_nn_indices,batch_targets,batch_nn_targets):
        super().__init__()
        self.embedding = nn.Sequential(
        nn.Linear(28**2,400),
        nn.ReLU(1),
        nn.Linear(400,100),
         nn.ReLU(1),
        )
        self.eps = kernel_eps
        self.nu = nu
        self.length_scale = length_scale
        self.batch_indices = batch_indices
        self.num_models = num_models
        self.batch_nn_indices = batch_nn_indices
        self.batch_targets = batch_targets
        self.batch_nn_targets = batch_nn_targets
        #self.GP_layer = MultivariateMuyGPs_layer(self.num_models,self.eps,self.nu,self.length_scale,self.batch_indices,self.batch_nn_indices,self.batch_targets,self.batch_nn_targets)
        self.GP_layer = MuyGPs_layer(kernel_eps,nu,length_scale,batch_indices,batch_nn_indices,batch_targets,batch_nn_targets)
   
    def forward(self,x): 
        predictions = self.embedding(x)
        predictions,variances,sigma_sq = self.GP_layer(predictions)
        return predictions,variances,sigma_sq

# Train Deep Kernel MuyGPs model using low-level API implementation

In [None]:
#Construct Deep Kernel model via SVDKMuyGPs object. 
model = SVDKMuyGPs(num_models=num_classes,kernel_eps=1e-6,nu=0.5,length_scale=1.0,batch_indices=batch_indices,batch_nn_indices=batch_nn_indices,batch_targets=batch_targets,batch_nn_targets=batch_nn_targets)
#model = SVDKMuyGPs(num_models=num_classes,kernel_eps=1e-3*torch.ones(num_classes),nu=1/2*torch.ones(num_classes),length_scale=2.0*torch.ones(num_classes),batch_indices=batch_indices,batch_nn_indices=batch_nn_indices,batch_targets=batch_targets,batch_nn_targets=batch_nn_targets)
if torch.cuda.is_available():
    model = model.cuda()
training_iterations = 10

#Use the adam optimizer with an initial learning rate of 1e-3 and an exponential decay rate of 0.97.
optimizer = torch.optim.Adam([
    {'params': model.parameters()},
], lr=1e-3) 
scheduler = ExponentialLR(optimizer,gamma=0.97)
mse_loss = nn.MSELoss()
l1_loss = nn.L1Loss()
bce_loss = nn.BCELoss()
ce_loss = nn.CrossEntropyLoss()

#Build standard PyTorch training loop function

def train(nbrs_lookup):
    for i in range(training_iterations):
        model.train()
        optimizer.zero_grad()
        predictions,variances,sigma_sq = model(train_features)
        
        #Use Cross-Entropy Loss since this is a classification problem
        
        loss = ce_loss(predictions,batch_targets)
        loss.backward()      
        optimizer.step()
        scheduler.step()
        if np.mod(i,1) == 0:
            print('Iter %d/%d - Loss: %.10f' % (i + 1, training_iterations, loss.item()))
            model.eval()
            nbrs_lookup = NN_Wrapper(model.embedding(train_features).detach().numpy(), nn_count, nn_method="hnsw")
            batch_nn_indices,_ = nbrs_lookup._get_nns(model.embedding(batch_features).detach().numpy(),nn_count=nn_count)
            batch_nn_indices = torch.from_numpy(batch_nn_indices.astype(np.int64))
            batch_nn_targets = train_responses[batch_nn_indices, :]  
            model.batch_nn_indices = batch_nn_indices
            model.batch_nn_targets = batch_nn_targets
        torch.cuda.empty_cache()
    nbrs_lookup = NN_Wrapper(model.embedding(train_features).detach().numpy(), nn_count, nn_method="hnsw")
    batch_nn_indices,_ = nbrs_lookup._get_nns(model.embedding(batch_features).detach().numpy(),nn_count=nn_count)
    batch_nn_indices = torch.from_numpy(batch_nn_indices.astype(np.int64))
    batch_nn_targets = train_responses[batch_nn_indices, :]
    model.batch_nn_indices = batch_nn_indices
    model.batch_nn_targets = batch_nn_targets
    return nbrs_lookup, model

nbrs_lookup, model_trained = train(nbrs_lookup)
model_trained.eval()

Predict test responses using trained model.

In [None]:
from MuyGPyS.examples.muygps_torch import predict_model
predictions,variances,sigma_sq = predict_model(model=model_trained,test_features=test_features,train_features=train_features,train_responses=train_responses,nbrs_lookup=nbrs_lookup,nn_count=nn_count)

print("MNIST Prediction Accuracy Using Low-Level Torch Implementation:")
print((torch.sum(torch.argmax(predictions,dim=1)==torch.argmax(test_responses,dim=1))/10000).numpy())

# Train Deep Kernel MuyGPs model using high-level API function

In [None]:
#Import high-level API function train_deep_kernel_muygps 
from MuyGPyS.examples.muygps_torch import train_deep_kernel_muygps

#Use leave-one-out-likelihood loss function to train model
model = SVDKMuyGPs(num_models=num_classes,kernel_eps=1e-6,nu=0.5,length_scale=1.0,batch_indices=batch_indices,batch_nn_indices=batch_nn_indices,batch_targets=batch_targets,batch_nn_targets=batch_nn_targets)

nbrs_lookup, model_trained = train_deep_kernel_muygps(
    model=model,
    train_features=train_features,
    train_responses=train_responses,
    batch_indices=batch_indices,
    nbrs_lookup=nbrs_lookup,
    training_iterations=10,
    optimizer_method=torch.optim.Adam,
    learning_rate=1e-3,
    scheduler_decay=0.97,
    loss_function="ce",
    update_frequency=1,
    verbose=True,
)

model_trained.eval()

Predict test responses using trained model.

In [None]:
from MuyGPyS.examples.muygps_torch import predict_model
predictions,variances,sigma_sq = predict_model(model=model_trained,test_features=test_features,train_features=train_features,train_responses=train_responses,nbrs_lookup=nbrs_lookup,nn_count=nn_count)

print("MNIST Prediction Accuracy Using High-Level Training API:")
print((torch.sum(torch.argmax(predictions,dim=1)==torch.argmax(test_responses,dim=1))/10000).numpy())

In [None]:
print(signature(ce_loss))