In [1]:
from core import *
import torch
import torch.utils.data as Data
import numpy as np
from torch.autograd import Variable
import random
import math
import itertools

from bandits import *

Use GPU: False


In [None]:
use_cuda = torch.cuda.is_available()

# hypers that do not need to be tuned
N_Steps = 6000 # in actual training, we use 6000

# Load the UCI Mushroom Dataset: 8124 datapoints, each with 22 categorical
# features and one label - edible/poisonous. The features are transformed to a
# one-hot encoding. 
# The missing values (marked with ?) are treated as a different class for now.

mushroom_dataset = pd.read_csv('mushrooms.csv')
train_labels = mushroom_dataset['class']
train_labels = train_labels.replace(['p', 'e'],
                                    [POISONOUS_CONSTANT, EDIBLE_CONSTANT])
train_features = pd.get_dummies(mushroom_dataset.drop(['class'], axis=1))

train_features = torch.tensor(train_features.values, dtype=torch.float)
train_labels = torch.tensor(train_labels.values)

trainset = torch.utils.data.TensorDataset(train_features, train_labels)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=1,
                                          shuffle=True, num_workers=0)

# TODO: cannot specify the hyper for priors outside the network

LearningRate_candidates = [1e-4, 1e-3]
Epsilon_candidates = [0.1, 0.01, 0.001]
mixture_PI_candidates = [0.25, 0.5, 0.75]
mixture_sigma1_candidates = [math.exp(-0), math.exp(-1), math.exp(-2)]
mixture_sigma2_candidates = [math.exp(-6), math.exp(-7), math.exp(-8)]

learning_rate = 0.0001
epsilon = 0.001

hyper_val_error_dict = {}

if __name__ == '__main__':

    # could may have more
    hyper_list = itertools.product(mixture_PI_candidates,
                                   mixture_sigma1_candidates,
                                   mixture_sigma2_candidates)
    
    for pi, sigma1, sigma2  in hyper_list:
      
      print("*"*50)
      
      print('PI: {}'.format(pi))
      print('Sigma1: {}'.format(sigma1))
      print('Sigma2: {}'.format(sigma2))
      
      # Initialize network
      optimizer_constructor = torch.optim.Adam
      optimizer_params = {'lr': learning_rate, 'eps': epsilon}
      prior_params = {'pi': pi, 'sigma1': sigma1, 'sigma2': sigma2}
      
      bnn_agent = BNNAgent(optimizer_constructor=optimizer_constructor,
                           optim_params=optimizer_params,
                           prior_params=prior_params)
      bnn_env = Environment(bnn_agent, trainloader)

      loss = []
      regret = []
        

      for i_step in range(N_Steps):

          # Training
          loss.append(bnn_env.play_round())
          regret.append(bnn_env.cumulative_regret)
          
          if (i_step + 1) % 100 == 0:
            print('Step {}. Regret {}'.format(i_step, bnn_env.cumulative_regret))

      plt.plot(np.array(loss))
      plt.ylabel('Loss')
      plt.show()
    
      plt.plot(np.array(regret))
      plt.ylabel('Cumulative Regret')
      plt.show()

**************************************************
PI: 0.25
Sigma1: 1.0
Sigma2: 0.0024787521766663585
Step 99. Regret 635.0
Step 199. Regret 1385.0
Step 299. Regret 1900.0
Step 399. Regret 2365.0
Step 499. Regret 2855.0
Step 599. Regret 3470.0
Step 699. Regret 4150.0
Step 799. Regret 4585.0
Step 899. Regret 5165.0
Step 999. Regret 5825.0
Step 1099. Regret 6325.0
Step 1199. Regret 7065.0
