In [329]:
#The 3 following cells aim at simuling the behavior of NN models with a simple model for MNIST

EPOCH_SIZE = 512
TEST_SIZE = 256

#This is a function that can be used by several NN model
def train(model, optimizer ,func ,train_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    #for (data, target) in train_loader:
    for batch_idx, (data, target) in enumerate(train_loader):
        # We set this just for the example to run quickly.
        if batch_idx * len(data) > EPOCH_SIZE:
           # print("hehe")
            return
        # We set this just for the example to run quickly.
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = func(output, target)
        loss.backward()
        optimizer.step()
        
#This is a function that can be used by several NN model (it only does accuracy ATM)
def test(model, func, data_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(data_loader):
            # We set this just for the example to run quickly.
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)

            total += target.size(0)
            correct += (predicted == target).sum().item()


                
    return correct / total

In [233]:
# A random mnist from the internet to get a correct model to reason about

class train_mnist():
    def __init__(self,config):
        
        self.config = {
       , "sigmoid_func": 1
      ,  "hidden_dim":43
      ,  "n_layer":2    }
        for key, value in config.items():
            self.config[key] = value
        config = self.config
        
        self.i = 0
        
        mnist_transforms = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize((0.1307, ), (0.3081, ))])

        self.train_loader = DataLoader(
            datasets.MNIST("~/data", train=True, download=True, transform=mnist_transforms),
            batch_size=64,
            shuffle=True)
        self.test_loader = DataLoader(
            datasets.MNIST("~/data", train=False, transform=mnist_transforms),
            batch_size=64,
            shuffle=True)

        sigmoid_func_uniq = nn.Tanh()


        self.model = LeNet(192,int(round(config.get("hidden_dim",64))),10,
                    int( round(config.get("n_layer",1))),
                    config.get("droupout_prob",0.1) ,sigmoid_func_uniq)
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config.get("lr", 0.01), 
                                     betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                    eps=config.get("eps", 1e-08), 
                                     weight_decay=config.get("weight_decay", 0), 
                                     amsgrad=True)

    
    def adapt(self, config):
        for key, value in config.items():
            self.config[key] = value
        config = self.config
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config.get("lr", 0.01), 
                                     betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                    eps=config.get("eps", 1e-08), 
                                     weight_decay=config.get("weight_decay", 0), 
                                     amsgrad=True)
        return copy.deepcopy(self)
    
    
# All NN models should have a function train1 and test1 that calls the common train and test defined above.
# train1 and test1 is then used in the scheduler
    def train1(self):
        print("iteration: " + str(self.i) )
        self.i+=1
        train(self.model, self.optimizer, F.nll_loss, self.train_loader)

    def test1(self):
        return test(self.model, F.nll_loss, self.test_loader)

# This should be a hyperspace instead of constants.

# __INCEPTION_SCORE_begin__
class LeNet(nn.Module):
    """
    LeNet for MNist classification, used for inception_score
    """

    def __init__(self,input_dim, hidden_dim, output_dim, n_layers,
                 drop_prob, sigmoid ):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)



# Convolution Neural network using Pytorch 
class ConvNet(nn.Module):
    def __init__(self,input_dim, hidden_dim, output_dim, n_layers,
                 drop_prob, sigmoid ):
        super(ConvNet, self).__init__()
        
        self.sigmoid = sigmoid
        self.i_d = input_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.conv1 = nn.Conv2d(1, 3, kernel_size=3)

        self.fc = nn.Linear(input_dim, output_dim)
        self.first= nn.Linear(input_dim, hidden_dim)
        self.hidden = [nn.Linear(hidden_dim,hidden_dim) for _ in range(self.n_layers)]
        self.drop_out = nn.Dropout(drop_prob)

        self.last = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.sigmoid(F.max_pool2d(self.conv1(x), 3))
        x = x.view(-1, self.i_d)
        x=self.first(x)
        x=self.drop_out(x)
        for i in range(self.n_layers):
            x=self.hidden[i](x)
            x=self.drop_out(x)
        x = self.last(x)
        return F.log_softmax(x, dim=1)


In [191]:
config_linear_reg = {
    "lr": 0.031,
    "l2_regularization": .01,
    "minibatch_size": 512,
    "hidden_dims": [150, 100, 75],
    "droupout_prob": 0.28,
    "weight_decay": .017,
    "batch_size": 64
}

class LinearReg(nn.Module):
    def __init__(self, config):
        super(LinearReg, self).__init__()

        hidden_dims = config.get("hidden_dims", [150, 100, 75])
        self.linears = nn.ModuleList([nn.Linear(28*28, hidden_dims[0], bias=True), nn.ReLU()])
        for i in range(1, len(hidden_dims) ):
            self.linears.append(nn.Linear(hidden_dims[i-1], hidden_dims[i], bias=True))
            self.linears.append(nn.ReLU())
        self.model = Net(self.linears) 

        mnist_transforms = transforms.Compose([transforms.ToTensor(), \
                                               transforms.Normalize((0.1307,), (0.3081,))])

        self.train_loader = DataLoader(datasets.MNIST("~/data", train=True, download=True, transform=mnist_transforms),\
                                       batch_size=config.get("batch_size", 64), shuffle=True)

        self.test_loader = DataLoader(datasets.MNIST("~/data", train=False, transform=mnist_transforms),\
                                      batch_size=config.get("batch_size", 64), shuffle=True)
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config.get("lr", 0.01), 
                                     betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                    eps=config.get("eps", 1e-08), 
                                     weight_decay=config.get("weight_decay", 0), 
                                     amsgrad=True)
    def adapt(self, config):
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config.get("lr", 0.01), \
                                          betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),\
                                          eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0), \
                                          amsgrad=True)
    def forward(self, x):
        for i, layer in enumerate(self.linears):
            x = l(x)
        return x     
    
    def train1(self):
        train(self.model, self.optimizer, F.nll_loss, self.train_loader)

    def test1(self):
        return test(self.model, F.nll_loss, self.test_loader)
    
    
class Net(nn.Module):
    def __init__(self,linears):
        super(Net, self).__init__()
        self.linears = linears

    def forward(self, x):
        for i, layer in enumerate(self.linears):
            x = layer(x)
        return x 


In [120]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch
import torch.nn.functional as F
import math
import numpy as np
import random
import math 


In [205]:
config = {
    "lr": 0.031,
    "l2_regularization": .01,
    "minibatch_size": 512,
    "hidden_dims": [150, 100, 75],
    "droupout_prob": 0.28,
    "weight_decay": .017,
    "batch_size": 64
}

a = LinearReg(config)
a.train1()
print(a.test1())


TypeError: __init__() missing 5 required positional arguments: 'hidden_dim', 'output_dim', 'n_layers', 'drop_prob', and 'sigmoid'

In [242]:
# A random mnist from the internet to get a correct model to reason about

class toy():
    def __init__(self,config):
        self.hyperparameter = config.get("lr",0)
    
    def adapt(self,config):
        self.hyperparameter = config.get("lr",0)
        return self
    
    def train1(self):
        self.hyperparameter = self.hyperparameter * 2 
    def test1(self):
        return self.hyperparameter

# This should be a hyperspace instead of constants.
config= {
     "lr": 0.031
}
a = train_mnist(config)
a.train1()
print(a.test1())

class Parent():
    def __init__(self,hyperspace,configuration, model, loss):
        
        self.hyperspace = hyperspace
        self.configuration_list = np.array(configuration) 
        self.loss_list = [np.array(loss)] 
        self.model = model
    
    def update(self,configuration,loss, model):
        self.configuration_list = np.append(self.configuration_list,configuration) 
        self.loss_list = np.append(self.loss_list,loss)
        self.model = model
    
    def get_hyperspace(self):
        return self.hyperspace

    def get_model(self):
        return self.model
    
    def get_loss(self):
        return self.loss_list

iteration: 0
0.1714


In [214]:
a.train1()
print(a.test1())


0.3674


In [218]:
a.train1()
print(a.test1())


0.6875


In [350]:
import copy


class Scheduler():
    def __init__(self, model, num_iteration, num_config,
                 oracle):
        #Oracle manages the Bayesian optimization
        self.oracle = oracle
        self.iteration = num_iteration
        self.num_config = num_config 
        self.sqrt_config =  math.floor(math.sqrt(num_config)) #math.ceil(num_config/10)
        
        #self.h is for the m "h" used at every loop, h is a configuration from the search space
        self.h = np.repeat({},num_config) 
        
        #self.out is for storing the result of the algorithm, ie all "h" from all iterations
        #from all sqrt(m) best models per iterations.
        self.out = np.zeros((num_iteration,self.sqrt_config))
        
        #self.hyperspaces is for storing the sqrt(m) hyperspaces used by the algorithm
        self.hyperspaces = np.zeros(self.sqrt_config)
        

        
        #self.model is the m model that will explore new hyperspace points at every iterations
        self.models = np.repeat(model,num_config)
        
        #self.parents is the sqrt(m) best model from last iteration
        self.parents = np.repeat(model,self.sqrt_config)

        #self.losses remembers the performances of all m models at one iteration to decide which ones are the sqrt(m) best from self.models.
        self.losses = np.zeros(num_config)
        
        self.k = [0] # c'est pour avoir un pointeur sur k, c'est pas paralélisable pour le moment du coup.
    
    def initialisation(self):
        num_config = self.num_config
        extended_Hyperspace = Trials()
        #for k in range(num_config):
        fmin_objective = partial(test_function, models=self.models,h=self.h,losses=self.losses,parent_model=self.models, k_f = self.k,iteration = 0)
        self.oracle.compute_Batch(extended_Hyperspace ,num_config , 0 ,fmin_objective)
            
        indexes = np.argsort(self.losses)     
        self.out[0] = (self.losses[indexes])[0:self.sqrt_config]
        self.hyperspaces = np.repeat(extended_Hyperspace,self.sqrt_config)    
        self.parents = np.array([Parent(copy.deepcopy(extended_Hyperspace),(self.h[indexes])[i], (self.models[indexes])[i],(self.losses[indexes])[i])  
                                 for i in range(self.sqrt_config) ])         
        
    def loop(self):
        sqrt_config = self.sqrt_config
        iteration = self.iteration
        for i in range(1,iteration):
            for j in range(sqrt_config):
                k = j*sqrt_config
                parent = self.parents[j]
                extended_Hyperspace = parent.get_hyperspace()
                print("\n loss of parent " + str(parent.get_loss()[-1]) )
                print("\n loss " + str(parent.get_loss()))

                
                for l in range(int(self.num_config/sqrt_config)):
                    fmin_objective = partial(test_function, models=self.models,h=self.h,losses=self.losses,parent_model=parent.get_model(), k_f = k,iteration = i)
                    self.oracle.compute_Once(extended_Hyperspace ,i ,fmin_objective)
                    k = k+1
                
            indexes = np.argsort(self.losses)     
            #parent_idx = np.floor(indexes/sqrt_config)[:sqrt_config]
            parent_idx = indexes[:sqrt_config]
            print(self.losses)
            print(indexes)
            #print(parent_idx)
 
            temp = np.empty(self.sqrt_config, dtype=Parent)
            for a,x in enumerate(parent_idx):
                x = int(x)
                temp[a] = copy.deepcopy(self.parents[math.floor(x/self.num_config*sqrt_config)])
                temp[a].update(self.h[x], self.losses[x], self.models[x])
            self.parents = temp
  


    
    
CONFIGURATION = 20
ITERATIONS = 10
config= {
     "lr": hp.uniform("lr",0,.1)
    , "droupout_prob": hp.uniform("droupout_prob",0,1),
             "weight_decay": hp.uniform("weight_decay",0,1)


}
model = train_mnist
oracle = Oracle(config)
scheduler = Scheduler(
    model,
    ITERATIONS,
    CONFIGURATION,
    oracle) 

scheduler.initialisation()     
scheduler.loop()     


                                                      
 lr, 0.07533558198243544
dropout_prob, 0.693140938181428                       
weight_decay, 0.23867346786112376                     
iteration: 0                                          
accuracy, 0.0932                                      
                                                                     
 lr, 0.0845619699486023
dropout_prob, 0.746749775007297                                      
weight_decay, 0.1500726587336071                                     
iteration: 0                                                         
accuracy, 0.204                                                      
                                                                     
 lr, 0.09664422978578663
dropout_prob, 0.931574790684421                                     
weight_decay, 0.08278096295636182                                   
iteration: 0                                                        
accuracy, 0.1295        

iteration: 1                                                    
accuracy, 0.1776                                                
100%|██████████| 22/22 [00:02<00:00,  9.93trial/s, best loss: -0.669]
                                                       
 lr, 0.014072665611075305
dropout_prob, 0.5581414957908524                                
weight_decay, 0.369921037600242                                 
iteration: 1                                                    
accuracy, 0.098                                                 
100%|██████████| 23/23 [00:02<00:00, 10.55trial/s, best loss: -0.669]
                                                       
 lr, 0.026009679432969002
dropout_prob, 0.39196835366412996                               
weight_decay, 0.0769975868942736                                
iteration: 1                                                    
accuracy, 0.0892                                                
100%|██████████| 24/24 [00:02<00:00, 10.95tria

weight_decay, 0.2494164522901307                                
iteration: 2                                                    
accuracy, 0.566                                                 
100%|██████████| 26/26 [00:03<00:00,  7.97trial/s, best loss: -0.669]
                                                       
 lr, 0.06290662030590038
dropout_prob, 0.6095316985448869                                
weight_decay, 0.21708388923685676                               
iteration: 2                                                    
accuracy, 0.1536                                                
100%|██████████| 27/27 [00:03<00:00,  8.17trial/s, best loss: -0.669]
                                                       
 lr, 0.01923843644468677
dropout_prob, 0.22008344628588658                               
weight_decay, 0.33298016533347335                               
iteration: 2                                                    
accuracy, 0.1527                                

100%|██████████| 30/30 [00:02<00:00, 12.92trial/s, best loss: -0.3936]
[-0.566  -0.1536 -0.1527 -0.6166 -0.138  -0.5205 -0.19   -0.0894 -0.132
 -0.0773 -0.1028 -0.3365 -0.1135 -0.1086 -0.0892 -0.2643 -0.1135 -0.2092
 -0.0974 -0.2007]
[ 3  0  5 11 15 17 19  6  1  2  4  8 12 16 13 10 18  7 14  9]

 loss of parent -0.6166

 loss [-0.3936 -0.669  -0.6166]
                                                       
 lr, 0.006050530587450748
dropout_prob, 0.6221405348692626                                
weight_decay, 0.1064705226453958                                
iteration: 3                                                    
accuracy, 0.7815                                                
100%|██████████| 31/31 [00:03<00:00,  9.41trial/s, best loss: -0.7815]
                                                       
 lr, 0.026688497689751685
dropout_prob, 0.7063946256626648                                
weight_decay, 0.13448942677276504                               
iteration: 3         

iteration: 3                                                    
accuracy, 0.1028                                                
100%|██████████| 34/34 [00:03<00:00, 11.32trial/s, best loss: -0.4428]
                                                       
 lr, 0.06376639562858674
dropout_prob, 0.43775427675795486                               
weight_decay, 0.1212760631468595                                
iteration: 3                                                    
accuracy, 0.0981                                                
100%|██████████| 35/35 [00:02<00:00, 16.10trial/s, best loss: -0.4428]
[-0.7815 -0.1136 -0.585  -0.7738 -0.7166 -0.2051 -0.164  -0.7877 -0.1028
 -0.0762 -0.1621 -0.3881 -0.305  -0.1028 -0.4428 -0.1028 -0.0981 -0.2092
 -0.0974 -0.2007]
[ 7  0  3  4  2 14 11 12 17  5 19  6 10  1  8 13 15 16 18  9]

 loss of parent -0.7877

 loss [-0.3936 -0.669  -0.566  -0.7877]
                                                       
 lr, 0.03177763211281947
dropout_prob, 

dropout_prob, 0.5688761563020416                                
weight_decay, 0.3200887851193691                                
iteration: 4                                                    
accuracy, 0.1953                                                
100%|██████████| 38/38 [00:02<00:00, 16.85trial/s, best loss: -0.7815]
                                                       
 lr, 0.04942818754483203
dropout_prob, 0.40419092632936665                               
weight_decay, 0.7070509751556157                                
iteration: 4                                                    
accuracy, 0.1245                                                
100%|██████████| 39/39 [00:02<00:00, 16.64trial/s, best loss: -0.7815]
                                                       
 lr, 0.0910178432169928
dropout_prob, 0.7907724489767428                                
weight_decay, 0.40209520828113404                               
iteration: 4                                   

accuracy, 0.1874                                                
100%|██████████| 41/41 [00:02<00:00, 15.58trial/s, best loss: -0.7815]
                                                       
 lr, 0.030949177901648556
dropout_prob, 0.9849186411211723                                
weight_decay, 0.06719701682243173                               
iteration: 1                                                    
accuracy, 0.1061                                                
100%|██████████| 42/42 [00:02<00:00, 16.06trial/s, best loss: -0.7815]
                                                       
 lr, 0.004992783312678181
dropout_prob, 0.713620253751165                                 
weight_decay, 0.6343811130888085                                
iteration: 1                                                    
accuracy, 0.1135                                                
100%|██████████| 43/43 [00:03<00:00, 12.47trial/s, best loss: -0.7815]
                                      

weight_decay, 0.6778687064268569                                
iteration: 6                                                    
accuracy, 0.1028                                                
100%|██████████| 50/50 [00:03<00:00, 15.33trial/s, best loss: -0.7877]

 loss of parent -0.3287

 loss [-0.3936 -0.669  -0.6166 -0.7738 -0.7533 -0.3287]
                                                       
 lr, 0.00047124184789688797
dropout_prob, 0.718108892187628                                 
weight_decay, 0.15001726652388933                               
iteration: 6                                                    
accuracy, 0.3555                                                
100%|██████████| 46/46 [00:02<00:00, 18.55trial/s, best loss: -0.8304]
                                                       
 lr, 0.05128820084614362
dropout_prob, 0.49805170489572814                               
weight_decay, 0.7686751709566366                                
iteration: 6              

accuracy, 0.1135                                                
100%|██████████| 53/53 [00:02<00:00, 23.34trial/s, best loss: -0.8143]
                                                       
 lr, 8.98465076273221e-05
dropout_prob, 0.3063761396595653                                
weight_decay, 0.15716040259494374                               
iteration: 7                                                    
accuracy, 0.6182                                                
100%|██████████| 54/54 [00:03<00:00, 17.27trial/s, best loss: -0.8143]
                                                       
 lr, 0.05805490179839351
dropout_prob, 0.42531565882520006                               
weight_decay, 0.039318066547826366                              
iteration: 7                                                    
accuracy, 0.0973                                                
100%|██████████| 55/55 [00:02<00:00, 25.88trial/s, best loss: -0.8143]

 loss of parent -0.3555

 loss [-0.393

                                                       
 lr, 0.05885375591640321
dropout_prob, 0.8493384648947297                                
weight_decay, 0.5795902539679801                                
iteration: 8                                                    
accuracy, 0.097                                                 
100%|██████████| 57/57 [00:02<00:00, 19.15trial/s, best loss: -0.7877]
                                                       
 lr, 0.00010617342337718161
dropout_prob, 0.7032703045037285                                
weight_decay, 0.7546003350325764                                
iteration: 8                                                    
accuracy, 0.6536                                                
100%|██████████| 58/58 [00:02<00:00, 20.20trial/s, best loss: -0.7877]
                                                       
 lr, 0.04717152450382445
dropout_prob, 0.9999224337605909                                
weight_decay, 0.19279636919

iteration: 9                                                    
accuracy, 0.2645                                                
100%|██████████| 65/65 [00:02<00:00, 22.57trial/s, best loss: -0.7877]

 loss of parent -0.6729

 loss [-0.3936 -0.669  -0.566  -0.7877 -0.7457 -0.6454 -0.6473 -0.6511 -0.6729]
                                                       
 lr, 0.06983726568509771
dropout_prob, 0.6200715024221032                                
weight_decay, 0.26618859507604165                               
iteration: 9                                                    
accuracy, 0.1137                                                
100%|██████████| 61/61 [00:02<00:00, 20.81trial/s, best loss: -0.7877]
                                                       
 lr, 0.022137961170488947
dropout_prob, 0.840760416921249                                 
weight_decay, 0.9328729655500371                                
iteration: 9                                                    
accu

In [331]:
from hyperopt import hp, fmin, tpe, Trials

# Oracle (Paul) TODO
class Oracle():
    def __init__(self, searchspace ):
        #self.hyperspace is the original (input) searchspace
        self.searchspace = searchspace

    def compute_Once(self,trials, iteration,function): #add space
        space = copy.deepcopy(self.searchspace)
        curr_eval = getattr(trials,'_ids')
        if curr_eval == set():
            curr_eval = 0
        else:
            curr_eval = max(curr_eval) +1
        space["itération"] =  hp.quniform("itération",-.5+iteration,.5+iteration, 1) 
        fmin(function, space, algo=partial(tpe.suggest, n_startup_jobs=1), max_evals=curr_eval
+1, trials=trials)
        
        
    def compute_Batch(self,trials, nb_eval, iteration,function): #add space
        space = copy.deepcopy(self.searchspace)
        curr_eval = getattr(trials,'_ids')
        if curr_eval == set():
            curr_eval = 0
        else:
            curr_eval = max(curr_eval) +1
            
        space["itération"] =  hp.quniform("itération",-.5+iteration,.5+iteration, 1) 
        fmin(function, space, algo=partial(tpe.suggest, n_startup_jobs=1), max_evals=curr_eval
+nb_eval, trials=trials)
    


In [347]:
def test_function(x,models,h,losses, parent_model,k_f,iteration):
    if iteration == 0:
        k = k_f[0]
        models[k] = parent_model[k](x)
        k_f[0] += 1

    else:      
        k = k_f
        models[k] = parent_model.adapt(x)
    print("\n lr, " + str(x["lr"]))
    print("dropout_prob, " + str(x["droupout_prob"]))
    print("weight_decay, " + str(x["weight_decay"]))

    

    h[k] = x
    models[k].train1()
    loss = models[k].test1()
    losses[k] = -loss
    print("accuracy, " + str(loss))
    return -loss

In [345]:
#Normal Bayes Opt

def function(x):
    print(x)
    model = train_mnist(x)
    for _ in range(10):
            model.train1()
    loss = model.test1()
    return -loss

In [344]:
class BayesOpt():
    def __init__(self, searchspace ):
        self.searchspace = searchspace

    def compute_Once(self,function): 
        fmin(function, self.searchspace, algo=partial(tpe.suggest, n_startup_jobs=1), max_evals=20, trials=Trials())
    
config= {
     "lr": hp.uniform("lr",0,.11),
     "droupout_prob": hp.uniform("droupout_prob",0,1),
     "weight_decay": hp.uniform("weight_decay",0,1)
    
}
oracle = BayesOpt(config)

In [346]:
oracle.compute_Once(function)

{'droupout_prob': 0.08195461068614285, 'lr': 0.05882682078394428, 'weight_decay': 0.6724825422787089}
iteration: 0                                          
iteration: 1                                          
iteration: 2                                          
iteration: 3                                          
iteration: 4                                          
iteration: 5                                          
iteration: 6                                          
iteration: 7                                          
iteration: 8                                          
iteration: 9                                          
{'droupout_prob': 0.04943277773811725, 'lr': 0.0577914251573257, 'weight_decay': 0.7355425948950876}
iteration: 0                                                         
iteration: 1                                                         
iteration: 2                                                         
iteration: 3                         

iteration: 3                                                          
iteration: 4                                                          
iteration: 5                                                          
iteration: 6                                                          
iteration: 7                                                          
iteration: 8                                                          
iteration: 9                                                          
{'droupout_prob': 0.27935944997473994, 'lr': 0.08857529363524526, 'weight_decay': 0.47648938338642055}
iteration: 0                                                          
iteration: 1                                                          
iteration: 2                                                          
iteration: 3                                                          
iteration: 4                                                          
iteration: 5                                 