In [1]:
from core import *
import torch
import torchvision
import torch.utils.data as Data
import numpy as np
from torch.autograd import Variable
import random
import math
import itertools

from bandits import *

Use GPU: True


In [0]:
use_cuda = torch.cuda.is_available()

# hypers that do not need to be tuned
N_Steps = 4000 # in actual training, we use 600

# Load the UCI Mushroom Dataset: 8124 datapoints, each with 22 categorical
# features and one label - edible/poisonous. The features are transformed to a
# one-hot encoding. 
# The missing values (marked with ?) are treated as a different class for now.

mushroom_dataset = pd.read_csv('mushrooms.csv')
train_labels = mushroom_dataset['class']
train_labels = train_labels.replace(['p', 'e'],
                                    [POISONOUS_CONSTANT, EDIBLE_CONSTANT])
train_features = pd.get_dummies(mushroom_dataset.drop(['class'], axis=1))

train_features = torch.tensor(train_features.values, dtype=torch.float)
train_labels = torch.tensor(train_labels.values)

trainset = torch.utils.data.TensorDataset(train_features, train_labels)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=1,
                                          shuffle=True, num_workers=1)

# TODO: cannot specify the hyper for priors outside the network

# need N_epochs * 4 * 2 * 3 * 3 * 3 = 100 * 4 * 54= 21600 epochs = 648000 seconds = 180 hours
N_Samples_Testing_candidates = [1,2,5,10]
LearningRate_candidates = [1e-4, 1e-3]
mixture_PI_candidates = [0.25, 0.5, 0.75]
mixture_sigma1_candidates = [math.exp(-0), math.exp(-1), math.exp(-2)]
mixture_sigma2_candidates = [math.exp(-6), math.exp(-7), math.exp(-8)]
Epsilon_candidates = [0.1, 0.01, 0.001]

hyper_val_error_dict = {}

if __name__ == '__main__':

    # could may have more
    hyper_list = itertools.product(LearningRate_candidates, Epsilon_candidates)
    
    for LearningRate, epsilon in hyper_list:
      
      print("*"*50)
      
      print("Learning rate: {}".format(LearningRate))
      print("Epsilon: {}".format(epsilon))
      
      # Initialize network
      optimizer_constructor = torch.optim.Adam
      optimizer_params = {'lr': LearningRate, 'eps': epsilon}
      
      eg_agent = EGreedyNNAgent(epsilon=.05, 
                                optimizer_constructor=optimizer_constructor,
                                optim_params=optimizer_params)
      eg_env = Environment(eg_agent, trainloader)

      loss = []
      regret = []
        

      for i_step in range(N_Steps):

          # Training
          loss.append(eg_env.play_round())
          regret.append(eg_env.cumulative_regret)
          
          if (i_step + 1) % 100 == 0:
            print('Step {}. Regret {}'.format(i_step, eg_env.cumulative_regret))

      plt.plot(np.array(loss))
      plt.ylabel('Loss')
      plt.show()
    
      plt.plot(np.array(regret))
      plt.ylabel('Cumulative Regret')
      plt.show()

**************************************************
Learning rate: 0.0001
Epsilon: 0.1
