In [1]:
# IN THIS MODULE: IMPORTS, CNN, TRAIN, TEST, MNIS_FUNCTION, SPACE

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from hyperopt import hp
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.suggest.skopt import SkOptSearch
from ray import tune
from ray.tune.suggest.bayesopt import BayesOptSearch
import time
import ray
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.ax import AxSearch
import argparse
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.nevergrad import NevergradSearch
import nevergrad as ng
import json
import os
from ray.tune import Trainable
from ray.tune.schedulers.hb_bohb import HyperBandForBOHB
from ray.tune.suggest.bohb import TuneBOHB
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler
#from ray.tune.suggest.dragonfly import DragonflySearch
from ray.tune.suggest.zoopt import ZOOptSearch
from ray.tune.schedulers import AsyncHyperBandScheduler
from zoopt import ValueType
import torch
import adabelief_pytorch
global_checkpoint_period=np.inf

In [2]:

#FNN : https://www.deeplearningwizard.com/deep_learning/practical_pytorch/pytorch_feedforward_neuralnetwork/


class Net42(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net42, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)
        self.predict = torch.nn.Linear(n_hidden, n_output)

    def forward(self, x):
        x = sigmoid_func_uniq(self.hidden(x))
        x = self.predict(x)
        return (x)

    def squash(self, input_tensor):
        squared_norm = (input_tensor ** 2).sum(-1, keepdim=True)
        denom = ((1. + squared_norm) * torch.sqrt(squared_norm))
        if torch.isinf(denom).sum().item()>0:
              output_tensor = input_tensor / torch.sqrt(squared_norm)
        else:
              output_tensor = squared_norm * input_tensor / ((1. + squared_norm) * torch.sqrt(squared_norm))
        return output_tensor



#https://blog.floydhub.com/gru-with-pytorch/
class GRUNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2):
        super(GRUNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
        
    def forward(self, x, h):
        out, h = self.gru(x, h)
        out = self.fc(self.relu(out[:,-1]))
        return out, h
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
        return hidden
    
#https://blog.floydhub.com/gru-with-pytorch/
class LSTMNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2):
        super(LSTMNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
        
    def forward(self, x, h):
        out, h = self.lstm(x, h)
        out = self.fc(self.relu(out[:,-1]))
        return out, h
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
        return hidden

class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

    
#https://www.deeplearningwizard.com/deep_learning/practical_pytorch/pytorch_recurrent_neuralnetwork/    
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim
        # Number of hidden layers
        self.layer_dim = layer_dim
        # Building your RNN
        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, input_dim)
        # batch_dim = number of samples per batch
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')
        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initialize hidden state with zeros
        # (layer_dim, batch_size, hidden_dim)
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        # We need to detach the hidden state to prevent exploding/vanishing gradients
        # This is part of truncated backpropagation through time (BPTT)
        out, hn = self.rnn(x, h0.detach())
        # Index hidden state of last time step
        # out.size() --> 100, 28, 10
        # out[:, -1, :] --> 100, 10 --> just want last time step hidden states! 
        out = self.fc(out[:, -1, :]) 
        # out.size() --> 100, 10
        return out

    
    
class ConvNet(nn.Module):
    def __init__(self,input_dim, hidden_dim, output_dim, n_layers,
                 drop_prob, sigmoid ):
        super(ConvNet, self).__init__()
        
        self.sigmoid = sigmoid
        self.i_d = input_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.conv1 = nn.Conv2d(1, 3, kernel_size=3)

        self.fc = nn.Linear(input_dim, output_dim)
        self.first= nn.Linear(input_dim, hidden_dim)
        self.hidden = nn.Linear(hidden_dim,hidden_dim)
        self.drop_out = nn.Dropout(drop_prob)

        self.last = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.sigmoid(F.max_pool2d(self.conv1(x), 3))
        x = x.view(-1, self.i_d)
        x=self.first(x)
        x=self.drop_out(x)
        for _ in range(self.n_layers):
            x=self.hidden(x)
            x=self.drop_out(x)
        x = self.last(x)
        return F.log_softmax(x, dim=1)


    
    
class LogReg(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogReg, self).__init__()
        self.input_d = input_dim
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = x.view(-1, self.input_d)
        out = self.linear(x)
        return out

    
    
class NeurNet(nn.Module):
    def __init__(self,input_dim, hidden_dim, output_dim, n_layers,
                 drop_prob, sigmoid ):
        super(NeurNet, self).__init__()
        
        self.sigmoid = sigmoid
        self.i_d = input_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.first= nn.Linear(input_dim, hidden_dim)
        self.hidden = nn.Linear(hidden_dim,hidden_dim)
        self.drop_out = nn.Dropout(drop_prob)

        self.last = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = x.view(-1, self.i_d)
        x=self.first(x)
        x=self.drop_out(x)
        for _ in range(self.n_layers):
            x=self.hidden(x)
            x=self.drop_out(x)
        x = self.last(x)
        return x
 
    

class Net(nn.Module):
    """
    LeNet for MNist classification, used for inception_score
    """

    def __init__(self,sigmoid = F.log_softmax):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        #return F.log_softmax(x, dim=1)
        return sigmoid(x)
    
class NetG(torch.nn.Module):
    def __init__(self, cols, size_hidden, n_output):
        super(NetG, self).__init__()
        self.hidden = torch.nn.Linear(cols, size_hidden)   # hidden layer
        self.predict = torch.nn.Linear(size_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x


In [3]:

    
# Change these values if you want the training to run quicker or slower.
EPOCH_SIZE = 512*32*2
TEST_SIZE = 256*32

def train(model, optimizer ,func ,train_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    #for (data, target) in train_loader:
    for batch_idx, (data, target) in enumerate(train_loader):
        # We set this just for the example to run quickly.
        if batch_idx * len(data) > EPOCH_SIZE:
           # print("hehe")
            return
        # We set this just for the example to run quickly.
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = func(output, target)
        loss.backward()
        optimizer.step()

        
def test(model, func, data_loader, clas):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(data_loader):
            if batch_idx * len(data) > TEST_SIZE:
                break
            if(clas == 1): #classification
                # We set this just for the example to run quickly.
                data, target = data.to(device), target.to(device)
                outputs = model(data)
                _, predicted = torch.max(outputs.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()
                
            if(clas == 2): #regression
                data, target = data.to(device), target.to(device)
                from torch.autograd import Variable

                X = Variable(torch.FloatTensor(data)) 
                result = model(X)
                pred=result.data[:,0].numpy()
                out = target.data[:,0].numpy()
                #print( pred)
                #print(out)
                #pred.fillna(X_train.mean(), inplace=True)
                total += target.size(0)
                #correct += r2_score(pred,out)
                correct+=func(result,target).numpy()   
            if(clas == 3): #RNN
                val_h = model.init_hidden(50) #batch size
                val_losses = []
                model.eval()
                #val_h = tuple([each.data for each in val_h])

                #if(train_on_gpu): FALSE
                #    data, target = data.cuda(), target.cuda()

                output, val_h = model(data, val_h)
                val_loss = func(output.squeeze(), target.float())

                val_losses.append(val_loss.item())

               # model.train()
                correct = np.mean(val_losses)
                total = 1;
                
    return correct / total

def train_mnist(config):
    # Data Setup
    mnist_transforms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    train_loader = DataLoader(
        datasets.MNIST("~/data", train=True, download=True, transform=mnist_transforms),
        batch_size=64,
        shuffle=True)
    test_loader = DataLoader(
        datasets.MNIST("~/data", train=False, transform=mnist_transforms),
        batch_size=64,
        shuffle=True)
    
    sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))

    if(config.get("model", 0.4)<0.5):
        model = ConvNet(192,int(round(config.get("hidden_dim",64))),10,
                    int( round(config.get("n_layer",1))),
                    config.get("droupout_prob",0.1) ,sigmoid_func_uniq)
    else:
        model = NeurNet(784,int(round(config.get("hidden_dim",64))),10,
                    int( round(config.get("n_layer",1))),
                    config.get("droupout_prob",0.1) ,sigmoid_func_uniq)

   # optimizer = optim.SGD(    model.parameters(), lr=config["lr"], momentum=config["momentum"])
    if(config.get("adam",1) >= 0.5):
        optimizer = torch.optim.Adam(model.parameters(), lr=config.get("lr", 0.01), 
                             #    betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
 #                                           eps=config.get("eps", 1e-08), 
                                     weight_decay=config.get("weight_decay", 0), 
                                         amsgrad=True)
    else: 
        optimizer = adabelief_pytorch.AdaBelief(model.parameters(), lr=config.get("lr", 0.01), 
                             #    betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                  #                       eps=config.get("eps", 1e-08),
                                                weight_decay=config.get("weight_decay", 0))
    
    for i in range(config.get("steps",10)):
        train(model, optimizer,F.nll_loss ,train_loader)
        acc = test(model, F.nll_loss, test_loader,1)

        # Send the current training result back to Tune
        tune.report(mean_accuracy=acc)

        if i % 5 == 0:
            # This saves the model to the trial directory
            torch.save(model.state_dict(), "./model.pth")


In [4]:

import torch
import torchvision                                                       
import torchvision.transforms as transforms
#https://teaching.pages.centralesupelec.fr/deeplearning-lectures-build/00-pytorch-fashionMnist.html
import os.path        

def train_fashion_mnist(config):
    from keras.datasets import fashion_mnist
    dataset_dir = os.path.join(os.path.expanduser("~"), 'Datasets', 'FashionMNIST')
    valid_ratio = 0.2  # Going to use 80%/20% split for train/valid

    # Load the dataset for the training/validation sets
    train_valid_dataset = torchvision.datasets.FashionMNIST(root=dataset_dir,
                                           train=True,
                                           transform= None, #transforms.ToTensor(),
                                           download=True)

    # Split it into training and validation sets
    nb_train = int((1.0 - valid_ratio) * len(train_valid_dataset))
    nb_valid =  int(valid_ratio * len(train_valid_dataset))
    train_dataset, valid_dataset = torch.utils.data.dataset.random_split(train_valid_dataset, [nb_train, nb_valid])


    # Load the test set
    test_dataset = torchvision.datasets.FashionMNIST(root=dataset_dir,
                                                     transform= None, #transforms.ToTensor(),
                                                    train=False)
    
    class DatasetTransformer(torch.utils.data.Dataset):

        def __init__(self, base_dataset, transform):
            self.base_dataset = base_dataset
            self.transform = transform

        def __getitem__(self, index):
            img, target = self.base_dataset[index]
            return self.transform(img), target

        def __len__(self):
            return len(self.base_dataset)


    train_dataset = DatasetTransformer(train_dataset, transforms.ToTensor())
    valid_dataset = DatasetTransformer(valid_dataset, transforms.ToTensor())
    test_dataset  = DatasetTransformer(test_dataset , transforms.ToTensor())
    ############################################################################################ Dataloaders
    num_threads = 4     # Loading the dataset is using 4 CPU threads
    batch_size  = 512*8   # Using minibatches of 128 samples

    train_loader1 = torch.utils.data.DataLoader(dataset=train_dataset,
                                              batch_size=batch_size,
                                              shuffle=True,                # <-- this reshuffles the data at every epoch
                                              num_workers=num_threads)

    valid_loader1 = torch.utils.data.DataLoader(dataset=valid_dataset,
                                              batch_size=batch_size, 
                                              shuffle=False,
                                              num_workers=num_threads)


    test_loader1 = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=num_threads)

    sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))

    if(config.get("model", 0.4)<0.5):
        model = ConvNet(192,int(round(config.get("hidden_dim",64))),10,
                    int( round(config.get("n_layer",1))),
                    config.get("droupout_prob",0.1) ,sigmoid_func_uniq)
    else:
        model = NeurNet(784,int(round(config.get("hidden_dim",64))),10,
                    int( round(config.get("n_layer",1))),
                    config.get("droupout_prob",0.1) ,sigmoid_func_uniq)
        
   # optimizer = optim.SGD(    model.parameters(), lr=config["lr"], momentum=config["momentum"])
    if(config.get("adam",1) >= 0.5):
        optimizer = torch.optim.Adam(model.parameters(), lr=config.get("lr", 0.01), 
                             #    betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
 #                                           eps=config.get("eps", 1e-08), 
                                     weight_decay=config.get("weight_decay", 0), 
                                         amsgrad=True)
    else: 
        optimizer = adabelief_pytorch.AdaBelief(model.parameters(), lr=config.get("lr", 0.01), 
                             #    betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                  #                       eps=config.get("eps", 1e-08),
                                                weight_decay=config.get("weight_decay", 0))
    
    
    for i in range(config.get("steps",10)):
        train(model, optimizer,F.nll_loss ,train_loader1)
        acc = test(model, F.nll_loss, test_loader1,1)
        # Send the current training result back to Tune
        tune.report(mean_accuracy=acc)

        if i % 5 == 0:
            # This saves the model to the trial directory
            torch.save(model.state_dict(), "./model.pth")



In [5]:
def train_TREC(config):
    import torch
    from torchtext import data
    from torchtext import datasets
    import random

    SEED = 1234
    savedPath = os.getcwd()
    os.chdir('/home/antoine/Projet/NovelTuning')
    
    
    #torch.manual_seed(SEED)
    #torch.backends.cudnn.deterministic = True

    TEXT = data.Field(tokenize = 'spacy')
    LABEL = data.LabelField()

    train_data, test_data = datasets.TREC.splits(TEXT, LABEL,root='data/trec', fine_grained=False)

    train_data, valid_data = train_data.split(random_state = random.seed(SEED))

    MAX_VOCAB_SIZE = 25_000


    sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))

    
    TEXT.build_vocab(train_data, 
                     max_size = MAX_VOCAB_SIZE, 
                     vectors = 'glove.6B.100d', 
                     unk_init = torch.Tensor.normal_)

    LABEL.build_vocab(train_data)

    os.chdir(savedPath)
    
    BATCH_SIZE = 64

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
        (train_data, valid_data, test_data), 
        batch_size = BATCH_SIZE,
        device = device)



   
    import torch.nn as nn
    import torch.nn.functional as F

    class CNN(nn.Module):
        def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                     dropout, pad_idx):

            super().__init__()

            self.embedding = nn.Embedding(vocab_size, embedding_dim)

            self.convs = nn.ModuleList([
                                        nn.Conv2d(in_channels = 1, 
                                                  out_channels = n_filters, 
                                                  kernel_size = (fs, embedding_dim)) 
                                        for fs in filter_sizes
                                        ])

            self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)

            self.dropout = nn.Dropout(dropout)

        def forward(self, text):

            #text = [sent len, batch size]

            text = text.permute(1, 0)

            #text = [batch size, sent len]

            embedded = self.embedding(text)

            #embedded = [batch size, sent len, emb dim]

            embedded = embedded.unsqueeze(1)

            #embedded = [batch size, 1, sent len, emb dim]

            conved = [sigmoid_func_uniq(conv(embedded)).squeeze(3) for conv in self.convs]

            #conv_n = [batch size, n_filters, sent len - filter_sizes[n]]

            pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]

            #pooled_n = [batch size, n_filters]

            cat = self.dropout(torch.cat(pooled, dim = 1))

            #cat = [batch size, n_filters * len(filter_sizes)]

            return self.fc(cat)
    INPUT_DIM = 7503
    EMBEDDING_DIM = 100
    N_FILTERS = 100
    FILTER_SIZES = [2,3,4]
    OUTPUT_DIM = len(LABEL.vocab)
    DROPOUT = 0.5
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
    def count_parameters(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)

 #   print(f'The model has {count_parameters(model):,} trainable parameters')

    pretrained_embeddings = TEXT.vocab.vectors

    model.embedding.weight.data.copy_(pretrained_embeddings)

    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

    import torch.optim as optim

    #optimizer = optim.Adam(model.parameters())
    if(optimizer_is_adam == True):
        optimizer = torch.optim.Adam(model.parameters(), lr=config.get("lr", 0.01), 
                                 betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                         eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0), 
                                         amsgrad=True)
    else: 
        optimizer = adabelief_pytorch.AdaBelief(model.parameters(), lr=config.get("lr", 0.01), 
                                 betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                         eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0))
    
    
    criterion = nn.CrossEntropyLoss()

    model = model.to(device)
    criterion = criterion.to(device)

    def categorical_accuracy(preds, y):
        """
        Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
        """
        max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
        correct = max_preds.squeeze(1).eq(y)
        return correct.sum() / torch.FloatTensor([y.shape[0]])


    def train(model, iterator, optimizer, criterion):

        epoch_loss = 0
        epoch_acc = 0

        model.train()

        for batch in iterator:

            optimizer.zero_grad()

            predictions = model(batch.text)

            loss = criterion(predictions, batch.label)

            acc = categorical_accuracy(predictions, batch.label)

            loss.backward()

            optimizer.step()

            epoch_loss += loss.item()
            epoch_acc += acc.item()

        return epoch_loss / len(iterator), epoch_acc / len(iterator)

    def evaluate(model, iterator, criterion):

        epoch_loss = 0
        epoch_acc = 0

        model.eval()

        with torch.no_grad():

            for batch in iterator:

                predictions = model(batch.text)

                loss = criterion(predictions, batch.label)

                acc = categorical_accuracy(predictions, batch.label)

                epoch_loss += loss.item()
                epoch_acc += acc.item()

        return epoch_loss / len(iterator), epoch_acc / len(iterator)

    import time

    def epoch_time(start_time, end_time):
        elapsed_time = end_time - start_time
        elapsed_mins = int(elapsed_time / 60)
        elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
        return elapsed_mins, elapsed_secs


    best_valid_loss = float('inf')

    for e in range(ITERATIONS):

        start_time = time.time()

        train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)


        tune.report(loss=valid_acc)

        if e % 5 == 0:
            # This saves the model to the trial directory
            torch.save(model.state_dict(), "./model.pth")

 #       print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
  #      print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
        





In [6]:


def train_diabetes(config):
    import numpy as  np
    import pandas as pd
    from sklearn import datasets

    from sklearn.datasets import load_diabetes

    (X,Y) = load_diabetes( return_X_y=True, as_frame=True)
    X = pd.DataFrame(X)
    Y = pd.DataFrame(Y)
    #normalizing
    Y= Y.apply(
    lambda x: (x - x.mean()) / x.std()
    )
    
    sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))

    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=1234)

    import torch

    x_train = torch.tensor(X_train.values, dtype=torch.float)
    y_train = torch.tensor(y_train.values, dtype=torch.float)
    x_test = torch.tensor(X_test.values, dtype=torch.float)
    y_test = torch.tensor(y_test.values, dtype=torch.float)
  #  y_train = y_train.type(torch.LongTensor)
  #  y_test = y_test.type(torch.LongTensor)

    import torch.nn.functional as F
    
    #print(y_train)
    train_datasets = torch.utils.data.TensorDataset(x_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_datasets, batch_size=100, shuffle=True)
    
    test_datasets = torch.utils.data.TensorDataset(x_test, y_test)
    test_loader = torch.utils.data.DataLoader(test_datasets, batch_size=100, shuffle=True)  

    net = NeurNet(10,int(round(config.get("hidden_dim",64))),1,
                    int( round(config.get("n_layer",1))),
                    config.get("droupout_prob",0.1) ,sigmoid_func_uniq)
    
    net = LogReg(10,1)
        
   # optimizer = torch.optim.SGD(net.parameters(), lr=0.02)
    if(optimizer_is_adam == True):
        optimizer = torch.optim.Adam(net.parameters(), lr=config.get("lr", 0.01), 
                                 betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                         eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0), 
                                         amsgrad=True)
    else: 
        optimizer = adabelief_pytorch.AdaBelief(net.parameters(), lr=config.get("lr", 0.01), 
                                 betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                         eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0))
    
    
        
    loss_func = torch.nn.MSELoss() 
    for i in range(ITERATIONS):
        train(net, optimizer,loss_func, train_loader)
        acc = test(net,loss_func ,test_loader, 2)

        # Send the current training result back to Tune
        tune.report(loss=acc)

        if i % 5 == 0:
            # This saves the model to the trial directory
            torch.save(net.state_dict(), "./model.pth")



In [7]:


def train_boston(config):
    import numpy as  np
    import pandas as pd
    from sklearn import datasets
    data = datasets.load_boston()

    X = pd.DataFrame(data.data, columns=data.feature_names)
    Y = pd.DataFrame(data.target, columns=["MEDV"])

    #normalizing
    Y= Y.apply(
    lambda x: (x - x.mean()) / x.std()
    )
    
    sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))
    print(sigmoid_func_uniq)
    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, random_state=1234)
    X_test,X_last,y_test,y_last = train_test_split(X_test, y_test, test_size=0.50, random_state=1234)
    
    import torch

    x_train = torch.tensor(X_train.values, dtype=torch.float)
    y_train = torch.tensor(y_train.values, dtype=torch.float)
    x_test = torch.tensor(X_test.values, dtype=torch.float)
    y_test = torch.tensor(y_test.values, dtype=torch.float)
    x_last = torch.tensor(X_last.values, dtype=torch.float)
    y_last = torch.tensor(y_last.values, dtype=torch.float)
  #  y_train = y_train.type(torch.LongTensor)
  #  y_test = y_test.type(torch.LongTensor)

    
    train_datasets = torch.utils.data.TensorDataset(x_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_datasets, batch_size=100, shuffle=True)
    
    test_datasets = torch.utils.data.TensorDataset(x_test, y_test)
    test_loader = torch.utils.data.DataLoader(test_datasets, batch_size=100, shuffle=True)  
       
    last_datasets = torch.utils.data.TensorDataset(x_last, y_last)
    last_loader = torch.utils.data.DataLoader(last_datasets, batch_size=100, shuffle=True)  
    
    
    if(config.get("model", 0.4)<0.5):
        net = NeurNet(13,int(round(config.get("hidden_dim",64))),1,
                    int( round(config.get("n_layer",1))),
                    config.get("droupout_prob",0.1) ,sigmoid_func_uniq)
    else:
        net = LogReg(13,1)
        
    if(optimizer_is_adam == True):
        optimizer = torch.optim.Adam(net.parameters(), lr=config.get("lr", 0.01), 
        #                         betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
         #                                eps=config.get("eps", 1e-08),
                                     weight_decay=config.get("weight_decay", 0), 
                                         amsgrad=True)
    else: 
        optimizer = torch.optim.SGD(net.parameters(), lr=config.get("lr", 0.01), 
         #                        betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
          #                               eps=config.get("eps", 1e-08),
                                                momentum=config.get("sigmoid_func", 0))
    
    
    
    loss_func = torch.nn.MSELoss() 
    for i in range(ITERATIONS):
        train(net, optimizer,loss_func, train_loader)
        acc = test(net,loss_func ,test_loader, 2)
        test1= test(net,loss_func ,last_loader, 2)
        # Send the current training result back to Tune
        
        tune.report(loss=acc,mean_accuracy=test1)
        
        if i % 5 == 0:
            # This saves the model to the trial directory
            torch.save(net.state_dict(), "./model.pth")



In [8]:
   #https://colab.research.google.com/github/agungsantoso/deep-learning-v2-pytorch/blob/master/sentiment-rnn/Sentiment_RNN_Exercise.ipynb#scrollTo=AVzirwGqpmva
def train_IMDB(config):
    train_x = np.load('/home/antoine/Projet/NovelTuning/train_x.npy')
    train_y = np.load('/home/antoine/Projet/NovelTuning/train_y.npy')
    val_x = np.load('/home/antoine/Projet/NovelTuning/val_x.npy')
    val_y = np.load('/home/antoine/Projet/NovelTuning/val_y.npy')
    test_x = np.load('/home/antoine/Projet/NovelTuning/test_x.npy')
    test_y = np.load('/home/antoine/Projet/NovelTuning/test_y.npy')
    len_vocab_to_int = 74072
    ## print out the shapes of your resultant feature data
    from torch.utils.data import TensorDataset, DataLoader

    import torch

    # create Tensor datasets
    train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
    valid_data = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
    test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

    # dataloaders
    batch_size = 50

    # make sure to SHUFFLE your data
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size)
    test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)
    # obtain one batch of training data
    dataiter = iter(train_loader)
    sample_x, sample_y = dataiter.next()

    # First checking if GPU is available
    train_on_gpu=torch.cuda.is_available()


    sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))

        
    class SentimentRNN(nn.Module):
        def __init__(self, embedding_dim, hidden_dim, output_size, n_layers,
                 drop_prob, sigmoid , vocab_size):
            super(SentimentRNN, self).__init__()
            self.output_size = output_size
            self.n_layers = n_layers
            self.hidden_dim = hidden_dim

            self.embedding = nn.Embedding(vocab_size, embedding_dim)
            self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers,
                                dropout=drop_prob, batch_first=True)
            #self.lstm = nn.GRU(embedding_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)

            self.dropout = nn.Dropout(0.3)

            self.fc = nn.Linear(hidden_dim, output_size)
            self.sig = nn.Sigmoid()

        def forward(self, x, hidden):
            hidden= tuple([each.data for each in hidden])
            batch_size = x.size(0)
            embeds = self.embedding(x)
            lstm_out, hidden = self.lstm(embeds, hidden)

            # stack up lstm outputs
            lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)

            # dropout and fully connected layer
            out = self.dropout(lstm_out)
            out = self.fc(out)

            # sigmoid function
            sig_out = self.sig(out)

            # reshape to be batch_size first
            sig_out = sig_out.view(batch_size, -1)
            sig_out = sig_out[:, -1] # get last batch of labels

            # return last sigmoid output and hidden state
            return sig_out, hidden


        def init_hidden(self, batch_size):
            weight = next(self.parameters()).data
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                            weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
            return hidden
        

    class SentimentRNN1(nn.Module):
        def __init__(self, embedding_dim, hidden_dim, output_size, n_layers,
                 drop_prob, sigmoid , vocab_size):
            super(SentimentRNN1, self).__init__()
            self.output_size = output_size
            self.n_layers = n_layers
            self.hidden_dim = hidden_dim

            self.embedding = nn.Embedding(vocab_size, embedding_dim)
            #self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers,
                            #    dropout=drop_prob, batch_first=True)
            self.lstm = nn.GRU(embedding_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)

            self.dropout = nn.Dropout(0.3)

            self.fc = nn.Linear(hidden_dim, output_size)
            self.sig = nn.Sigmoid()

        def forward(self, x, hidden):
            hidden = hidden.data
            batch_size = x.size(0)
            embeds = self.embedding(x)
            lstm_out, hidden = self.lstm(embeds, hidden)

            # stack up lstm outputs
            lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)

            # dropout and fully connected layer
            out = self.dropout(lstm_out)
            out = self.fc(out)

            # sigmoid function
            sig_out = self.sig(out)

            # reshape to be batch_size first
            sig_out = sig_out.view(batch_size, -1)
            sig_out = sig_out[:, -1] # get last batch of labels

            # return last sigmoid output and hidden state
            return sig_out, hidden


        def init_hidden(self, batch_size):
            weight = next(self.parameters()).data
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                            weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
            (a,b) = hidden
            return a
        
        
        
    # Instantiate the model w/ hyperparams
    vocab_size = len_vocab_to_int + 1 # +1 for zero padding + our word tokens
    output_size = 1
    embedding_dim = 400 
    hidden_dim = int(round(config.get("hidden_dim",64)))
    n_layers =  2+ int( round(config.get("n_layer",1)))


    
    net = SentimentRNN1(embedding_dim, hidden_dim, output_size, n_layers,
                           config.get("droupout_prob",0.1),
                           sigmoid_func_uniq, vocab_size)    
    net = SentimentRNN(embedding_dim, hidden_dim, output_size, n_layers,
                           config.get("droupout_prob",0.1),
                           sigmoid_func_uniq, vocab_size)
    
    # loss and optimization functions
    lr=0.001

    criterion = nn.BCELoss()
   # print(*(n for n in net.parameters()))
    #optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    # training params
    if(optimizer_is_adam == True):
        optimizer = torch.optim.Adam(net.parameters(), lr=config.get("lr", 0.01), 
                                 betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                         eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0), 
                                         amsgrad=True)
    else: 
        optimizer = adabelief_pytorch.AdaBelief(net.parameters(), lr=config.get("lr", 0.01), 
                                 betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                         eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0))
    
    
   

    counter = 0
    print_every = 1
    clip=5 # gradient clipping

    # move model to GPU, if available
    if(train_on_gpu):
        net.cuda()

    net.train()
    # train for some number of epochs
    EPOCH_SIZE = 128*32
    TEST_SIZE = 64*32

    def train_rnn():

        h = net.init_hidden(batch_size)
        for batch_idx, (inputs, labels) in enumerate(train_loader):
            # We set this just for the example to run quickly.
            if batch_idx * len(inputs) > EPOCH_SIZE:
                return

            if(train_on_gpu):
                inputs, labels = inputs.cuda(), labels.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history

            # zero accumulated gradients
            net.zero_grad()

            # get the output from the model
            output, h = net(inputs, h)

            # calculate the loss and perform backprop
            loss = criterion(output.squeeze(), labels.float())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            optimizer.step() 

    for i in range(ITERATIONS):
        train_rnn()
        acc = test(net,criterion,valid_loader,3)

        tune.report(loss=acc)

        if i % 5 == 0:
            # This saves the model to the trial directory
            torch.save(net.state_dict(), "./model.pth")

        

In [51]:
import torch
from torchtext import data
from torchtext import datasets
import random

SEED = 1234
savedPath = os.getcwd()
os.chdir('/home/antoine/Projet/NovelTuning')


#torch.manual_seed(SEED)
#torch.backends.cudnn.deterministic = True

TEXT = data.Field(tokenize = 'spacy')
LABEL = data.LabelField()

train_data, test_data = datasets.TREC.splits(TEXT, LABEL,root='data/trec', fine_grained=False)

train_data, valid_data = train_data.split(random_state = random.seed(SEED))




MAX_VOCAB_SIZE = 25_000


TEXT.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE, 
                 vectors = 'glove.6B.100d', 
                 unk_init = torch.Tensor.normal_)

LABEL.build_vocab(train_data)

import dill
from pathlib import Path

import torch
from torchtext.data import Dataset

def save_dataset(dataset, path):
    if not isinstance(path, Path):
        path = Path(path)
    path.mkdir(parents=True, exist_ok=True)
    torch.save(dataset.examples, path/"examples.pkl", pickle_module=dill)
    torch.save(dataset.fields, path/"fields.pkl", pickle_module=dill)

def load_dataset(path):
    if not isinstance(path, Path):
        path = Path(path)
    examples = torch.load(path/"examples.pkl", pickle_module=dill)
    fields = torch.load(path/"fields.pkl", pickle_module=dill)
    return Dataset(examples, fields)


save_examples(train_data,'sdf')
examples = load_examples(filename)
examples = [data.Example().fromlist(d, fields) for d in examples]

# Build dataset
mydataset = Dataset(examples, fields)

os.chdir(savedPath)


INFO:torchtext.vocab:Loading vectors from .vector_cache/glove.6B.100d.txt.pt


AttributeError: 'Example' object has no attribute 'src'

In [52]:
def train_TREC(config):
    import torch
    from torchtext import data
    from torchtext import datasets
    import random

    SEED = 1234
    savedPath = os.getcwd()
    os.chdir('/home/antoine/Projet/NovelTuning')
    
    
    #torch.manual_seed(SEED)
    #torch.backends.cudnn.deterministic = True

    TEXT = data.Field(tokenize = 'spacy')
    LABEL = data.LabelField()

    train_data, test_data = datasets.TREC.splits(TEXT, LABEL,root='data/trec', fine_grained=False)

    train_data, valid_data = train_data.split(random_state = random.seed(SEED))

    

    
    MAX_VOCAB_SIZE = 25_000


    sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))

    
    TEXT.build_vocab(train_data, 
                     max_size = MAX_VOCAB_SIZE, 
                     vectors = 'glove.6B.100d', 
                     unk_init = torch.Tensor.normal_)

    LABEL.build_vocab(train_data)

    os.chdir(savedPath)
    
    
    
    BATCH_SIZE = 64
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
        (train_data, valid_data, test_data), 
        batch_size = BATCH_SIZE,
        device = device)



   
    import torch.nn as nn
    import torch.nn.functional as F

    class CNN(nn.Module):
        def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                     dropout, pad_idx):

            super().__init__()

            self.embedding = nn.Embedding(vocab_size, embedding_dim)

            self.convs = nn.ModuleList([
                                        nn.Conv2d(in_channels = 1, 
                                                  out_channels = n_filters, 
                                                  kernel_size = (fs, embedding_dim)) 
                                        for fs in filter_sizes
                                        ])

            self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)

            self.dropout = nn.Dropout(dropout)

        def forward(self, text):

            #text = [sent len, batch size]

            text = text.permute(1, 0)

            #text = [batch size, sent len]

            embedded = self.embedding(text)

            #embedded = [batch size, sent len, emb dim]

            embedded = embedded.unsqueeze(1)

            #embedded = [batch size, 1, sent len, emb dim]

            conved = [sigmoid_func_uniq(conv(embedded)).squeeze(3) for conv in self.convs]

            #conv_n = [batch size, n_filters, sent len - filter_sizes[n]]

            pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]

            #pooled_n = [batch size, n_filters]

            cat = self.dropout(torch.cat(pooled, dim = 1))

            #cat = [batch size, n_filters * len(filter_sizes)]

            return self.fc(cat)
    INPUT_DIM = 7503
    EMBEDDING_DIM = 100
    N_FILTERS = 100
    FILTER_SIZES = [2,3,4]
    OUTPUT_DIM = len(LABEL.vocab)
    DROPOUT = 0.5
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
    def count_parameters(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)

 #   print(f'The model has {count_parameters(model):,} trainable parameters')

    pretrained_embeddings = TEXT.vocab.vectors

    model.embedding.weight.data.copy_(pretrained_embeddings)

    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

    import torch.optim as optim

    #optimizer = optim.Adam(model.parameters())
    if(optimizer_is_adam == True):
        optimizer = torch.optim.Adam(model.parameters(), lr=config.get("lr", 0.01), 
                                 betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                         eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0), 
                                         amsgrad=True)
    else: 
        optimizer = adabelief_pytorch.AdaBelief(model.parameters(), lr=config.get("lr", 0.01), 
                                 betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                         eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0))
    
    
    criterion = nn.CrossEntropyLoss()

    model = model.to(device)
    criterion = criterion.to(device)

    def categorical_accuracy(preds, y):
        """
        Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
        """
        max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
        correct = max_preds.squeeze(1).eq(y)
        return correct.sum() / torch.FloatTensor([y.shape[0]])


    def train(model, iterator, optimizer, criterion):

        epoch_loss = 0
        epoch_acc = 0

        model.train()

        for batch in iterator:
            optimizer.zero_grad()

            predictions = model(batch.text)

            loss = criterion(predictions, batch.label)

            acc = categorical_accuracy(predictions, batch.label)

            loss.backward()

            optimizer.step()

            epoch_loss += loss.item()
            epoch_acc += acc.item()

        return epoch_loss / len(iterator), epoch_acc / len(iterator)

    def evaluate(model, iterator, criterion):

        epoch_loss = 0
        epoch_acc = 0

        model.eval()

        with torch.no_grad():

            for batch in iterator:

                predictions = model(batch.text)

                loss = criterion(predictions, batch.label)

                acc = categorical_accuracy(predictions, batch.label)

                epoch_loss += loss.item()
                epoch_acc += acc.item()

        return epoch_loss / len(iterator), epoch_acc / len(iterator)

    import time

    def epoch_time(start_time, end_time):
        elapsed_time = end_time - start_time
        elapsed_mins = int(elapsed_time / 60)
        elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
        return elapsed_mins, elapsed_secs


    best_valid_loss = float('inf')

    for e in range(ITERATIONS):

        start_time = time.time()

        train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)


        tune.report(mean_accuracy=valid_acc)

        if e % 5 == 0:
            # This saves the model to the trial directory
            torch.save(model.state_dict(), "./model.pth")

 #       print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
  #      print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
        





In [53]:
#Configs
parser = argparse.ArgumentParser()
parser.add_argument(
    "--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()          
        
    
    
experiment_metrics = dict(metric="mean_accuracy", mode="max")
#experiment_metrics = dict(metric="loss", mode="min")


ITERATIONS = 1
NUM_TUNED= 1
    


#[nn.ReLU, nn.Softmax(), nn.Tanh(),nn.Sigmoid() ]

tune_kwargs = {
    "num_samples": 1 if True else 2,
    "config": {
    "steps": 3,  # evaluation times
     "lr":  tune.quniform(1e-10, 0.1,1e-10),
    "b1": tune.quniform(0.9, 1-1e-10,1e-10),
        "b2":tune.quniform(0.9, 1-1e-10,1e-10),
        "eps": tune.uniform(1e-10, 0.1),
         "weight_decay":tune.quniform(1e-10, 0.1,1e-10),
        "sigmoid_func":nn.ReLU()
    }
}

tune_kwargs = {
    "num_samples": NUM_TUNED if args.smoke_test else NUM_TUNED,
    "config": {
    "steps": ITERATIONS,  # evaluation times
     "lr":  tune.loguniform(1e-10, 0.1),
    "b1": tune.loguniform(0.9, 1-1e-10),
        "b2":tune.loguniform(0.9, 1-1e-10),
        "eps": tune.loguniform(1e-10, 0.1),
         "weight_decay":tune.loguniform(1e-10, 0.1)
    }
}
   
#i is in [0;1]
#We want all values between 0 and 1
def get_sigmoid_func(i):
    if(i<0.33):
        return nn.ReLU()
    elif(i<0.67):
        return nn.Tanh()
    else:
        return nn.Sigmoid()

    
optimizer_is_adam = True   
    
f = get_sigmoid_func(3)
print(f(torch.randn(2)))
import random
tune_kwargs = {
    "num_samples": NUM_TUNED if args.smoke_test else NUM_TUNED,
    "config": {
    "steps": ITERATIONS,  # evaluation times
     "lr":  tune.loguniform(1e-4, 0.1 ),#,1e-4), #*10
         "weight_decay":tune.loguniform(1e-4, 0.1),#,1e-4), #*10 et 0
        "sigmoid_func":tune.uniform(0,1),
        "hidden_dim":tune.loguniform(32.,256.),#,1), #log de 32 à 256
        "n_layer":tune.uniform(1,3),#,1), #from 1 to 3
        "droupout_prob":tune.uniform(0,0.5),#,0.1), #0.x pour x allant de 0 à 5     
        "adam":tune.uniform(0,1)
    }
}

tune_kwargs = {
    "num_samples": NUM_TUNED if args.smoke_test else NUM_TUNED,
    "config": {
    "steps": ITERATIONS,  # evaluation times
     "lr":  tune.uniform(1e-4, 0.1 ),#,1e-4), #*10
         "weight_decay":tune.uniform(1e-4, 0.1),#,1e-4), #*10 et 0
        "sigmoid_func":tune.uniform(0,1),
        "hidden_dim":tune.uniform(32.,256.),#,1), #log de 32 à 256
        "n_layer":tune.uniform(1,3),#,1), #from 1 to 3
        "droupout_prob":tune.uniform(0,0.5),#,0.1), #0.x pour x allant de 0 à 5     
        "adam":tune.uniform(0,1),
        "model":tune.uniform(0,1),
    }
}


x_all = [train_IMDB,  train_TREC, train_boston, train_diabetes, train_mnist, train_fashion_mnist]


 

tensor([0.2811, 0.5970])


In [54]:
#One shot

f_HyperOpt(train_TREC)

model_all = [Net,ConvNet]
for i in range(0,0):
    for j in range(0,1):
        x = x_all[i]
        f_HyperOpt(x)
        f_BayesOpt(x)
        f_AX(x)
        f_NeverGrad(x)
        f_BOHB(x)
        f_Random(x)
        f_ZOOpt(x)
        print("all worked with " + str(x)+  " !")
    for i in range(1,1):
        GAN_MNIST(i)

Trial name,status,loc,adam,droupout_prob,hidden_dim,lr,model,n_layer,sigmoid_func,steps,weight_decay
train_TREC_2fb91f8c,RUNNING,,0.837611,0.485956,61.8488,0.00452707,0.0566794,2.48344,0.706041,1,0.0301386




[2m[36m(pid=28808)[0m <generator object Dataset.__getattr__ at 0x7fcef4279150>


2020-11-17 16:47:27,623	ERROR trial_runner.py:567 -- Trial train_TREC_2fb91f8c: Error processing event.
Traceback (most recent call last):
  File "/home/antoine/anaconda3/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 515, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/antoine/anaconda3/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 488, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/home/antoine/anaconda3/lib/python3.7/site-packages/ray/worker.py", line 1428, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(TuneError): [36mray::ImplicitFunc.train()[39m (pid=28808, ip=192.168.1.34)
  File "python/ray/_raylet.pyx", line 484, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 438, in ray._raylet.execute_task.function_executor
  File "/home/antoine/anaconda3/lib/python3.7/site-packages/ray/tune/trainable.py", line 336, in train
    res

[2m[36m(pid=28808)[0m <generator object Dataset.__getattr__ at 0x7fcef4279150>


[2m[36m(pid=28808)[0m 2020-11-17 16:47:27,586	ERROR function_runner.py:233 -- Runner Thread raised error.
[2m[36m(pid=28808)[0m Traceback (most recent call last):
[2m[36m(pid=28808)[0m   File "/home/antoine/anaconda3/lib/python3.7/site-packages/ray/tune/function_runner.py", line 227, in run
[2m[36m(pid=28808)[0m     self._entrypoint()
[2m[36m(pid=28808)[0m   File "/home/antoine/anaconda3/lib/python3.7/site-packages/ray/tune/function_runner.py", line 290, in entrypoint
[2m[36m(pid=28808)[0m     self._status_reporter.get_checkpoint())
[2m[36m(pid=28808)[0m   File "/home/antoine/anaconda3/lib/python3.7/site-packages/ray/tune/function_runner.py", line 497, in _trainable_func
[2m[36m(pid=28808)[0m     output = train_func(config)
[2m[36m(pid=28808)[0m   File "<ipython-input-52-5bd636ffe49d>", line 41, in train_TREC
[2m[36m(pid=28808)[0m   File "<__array_function__ internals>", line 6, in save
[2m[36m(pid=28808)[0m   File "/home/antoine/anaconda3/lib/python3.7

Trial name,status,loc,adam,droupout_prob,hidden_dim,lr,model,n_layer,sigmoid_func,steps,weight_decay
train_TREC_2fb91f8c,ERROR,,0.837611,0.485956,61.8488,0.00452707,0.0566794,2.48344,0.706041,1,0.0301386

Trial name,# failures,error file
train_TREC_2fb91f8c,1,"/home/antoine/ray_results/hyper/train_TREC_2fb91f8c_1_adam=0.83761,droupout_prob=0.48596,hidden_dim=61.849,lr=0.0045271,model=0.056679,n_layer=2.4834,sigmoid_func_2020-11-17_16-47-24/error.txt"


TuneError: ('Trials did not complete', [train_TREC_2fb91f8c])

In [None]:
#Small budget

ITERATIONS = 20
NUM_TUNED= 20


model_all = [Net,ConvNet]
optimizer_is_adam = True
if(0==1):
    for i in range(1,2):
        x = train_TREC
        f_BayesOpt(x)
        f_AX(x)
        f_NeverGrad(x)
        f_BOHB(x)
        f_Random(x)
        f_ZOOpt(x)
        print("all worked with " + str(x)+  " !")
    for i in range(1,1):
        GAN_MNIST(i)

In [11]:
def f_HyperOpt(dataset):
    
    scheduler = AsyncHyperBandScheduler(**experiment_metrics)
    bayesopt = HyperOptSearch(**experiment_metrics)
    tune.run(dataset, **tune_kwargs , scheduler = scheduler,  name="hyper", search_alg=bayesopt)

In [None]:
def f_BayesOpt(dataset):
    
    scheduler = AsyncHyperBandScheduler(**experiment_metrics)
    bayesopt = BayesOptSearch(**experiment_metrics)
    tune.run(dataset, **tune_kwargs , scheduler = scheduler, name="bayes",  search_alg=bayesopt)

In [None]:
def f_AX(dataset):
    
   
    if __name__ == "__main__":
                
        algo = AxSearch(
            max_concurrent=2, #was working with 2
            **experiment_metrics
        )
        scheduler = AsyncHyperBandScheduler(**experiment_metrics)
        tune.run(
            dataset,       name="ax",
            search_alg=algo,
            scheduler=scheduler,
            **tune_kwargs)

        
#        algo = AxSearch(
#            **experiment_metrics
#        )
#        algo = ConcurrencyLimiter(algo, max_concurrent=4)

        
#        scheduler = AsyncHyperBandScheduler()
#        tune.run(
#            dataset,
#            **experiment_metrics,
#            search_alg=algo,
#            scheduler=scheduler,
#            **tune_kwargs)


In [None]:

# TODO We are interested in multiple Population based algorithms from nevergrad, and certainly not in OnePlusOne. 
def f_NeverGrad(dataset):
    algo = NevergradSearch(
    optimizer=ng.optimizers.CMA
    # space=space,  # If you want to set the space manually
    )
    algo = ConcurrencyLimiter(algo, max_concurrent=8)

    scheduler = AsyncHyperBandScheduler()

    tune.run(
        dataset,
        **experiment_metrics,
      #  name="nevergrad",
        search_alg=algo, name="ng",
        scheduler=scheduler,
        **tune_kwargs) 
    

In [None]:
def f_BOHB(dataset):

    bohb_hyperband = HyperBandForBOHB(
        time_attr="training_iteration",
        max_t=100,
        reduction_factor=2,
        **experiment_metrics)

    bohb_search = TuneBOHB(
        # space=config_space, 
        max_concurrent=4,
        **experiment_metrics)

    tune.run(
        dataset,
       # config=config, 
        scheduler=bohb_hyperband,name="bohb",
        search_alg=bohb_search,       
         **tune_kwargs)
        #num_samples=NUM_TUNED,
       # stop={"training_iteration": 100})
    
    
    

In [None]:
def f_Random(dataset):
    
    algo = NevergradSearch(
    optimizer=ng.optimizers.RandomSearch,
    # space=space,  # If you want to set the space manually
    )
    algo = ConcurrencyLimiter(algo, max_concurrent=4)

    scheduler = AsyncHyperBandScheduler()

    tune.run(
        dataset,
        **experiment_metrics,
      #  name="nevergrad",
        search_alg=algo,   name="random",    
        scheduler=scheduler,
        **tune_kwargs) 
    

In [None]:
def f_ZOOpt(dataset):

    dim_dict = {
        "lr": (ValueType.CONTINUOUS, [0, 1], 1e-2),
        "momentum": (ValueType.CONTINUOUS, [0,1, 0.9], 1e-2)
    }

    zoopt_search_config = {
        "parallel_num": 8,  # how many workers to parallel
    }

    

    zoopt_search = ZOOptSearch(
    algo="Asracos",  # only support Asracos currently
    #dim_dict=dim_dict,
    budget=ITERATIONS,
    #dim_dict=dim_dict,
   #     **zoopt_search_config,
    **experiment_metrics)
    
    scheduler = AsyncHyperBandScheduler(**experiment_metrics)

   
    tune.run(dataset,
 #        config = config,
    search_alg=zoopt_search,
   # num_samples= ITERATIONS,
    scheduler=scheduler,       
    #         paralell_num=4,
    name="zoopt_search", 
              **tune_kwargs
    )

In [None]:
# IN THIS MODULE: IMPORTS, CNN, TRAIN, TEST, MNIS_FUNCTION, SPACE

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from hyperopt import hp
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.suggest.skopt import SkOptSearch
from ray import tune
from ray.tune.suggest.bayesopt import BayesOptSearch
import time
import ray
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.ax import AxSearch
import argparse
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.nevergrad import NevergradSearch
import nevergrad as ng
import json
import os
from ray.tune import Trainable
from ray.tune.schedulers.hb_bohb import HyperBandForBOHB
from ray.tune.suggest.bohb import TuneBOHB
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.zoopt import ZOOptSearch
from ray.tune.schedulers import AsyncHyperBandScheduler
from zoopt import ValueType
import torch

def GAN_MNIST(SA):
    import ray

    import os
    import torch
    import torch.nn as nn
    import torch.nn.parallel
    import torch.utils.data
    import torchvision.datasets as dset
    import torchvision.transforms as transforms
    import torchvision.utils as vutils
    import numpy as np

    import ray
    from ray import tune
    from ray.tune.trial import ExportFormat
    from ray.tune.schedulers import PopulationBasedTraining

    import argparse
    import os
    from filelock import FileLock
    import random
    import torch
    import torch.nn as nn
    import torch.nn.parallel
    import torch.optim as optim
    import torch.utils.data
    import numpy as np
    from ray.tune.suggest.bayesopt import BayesOptSearch
    from ray.tune.suggest.ax import AxSearch



    from torch.autograd import Variable
    from torch.nn import functional as F
    from scipy.stats import entropy

    import matplotlib.pyplot as plt
    import matplotlib.animation as animation


    # Training parameters
    dataroot = ray.utils.get_user_temp_dir() + os.sep
    workers = 2
    batch_size = 64
    image_size = 32

    # Number of channels in the training images. For color images this is 3
    nc = 1

    # Size of z latent vector (i.e. size of generator input)
    nz = 100

    # Size of feature maps in generator
    ngf = 32

    # Size of feature maps in discriminator
    ndf = 32

    # Beta1 hyperparam for Adam optimizers
    beta1 = 0.5

    # iterations of actual training in each Trainable _train
    train_iterations_per_step = 5

    MODEL_PATH = os.path.expanduser("~/.ray/models/mnist_cnn.pt")


    def get_data_loader():
        dataset = dset.MNIST(
            root=dataroot,
            download=True,
            transform=transforms.Compose([
                transforms.Resize(image_size),
                transforms.ToTensor(),
                transforms.Normalize((0.5, ), (0.5, )),
            ]))

        # Create the dataloader
        dataloader = torch.utils.data.DataLoader(
            dataset, batch_size=batch_size, shuffle=True, num_workers=workers)

        return dataloader


    # __GANmodel_begin__
    # custom weights initialization called on netG and netD
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find("Conv") != -1:
            nn.init.normal_(m.weight.data, 0.0, 0.02)
        elif classname.find("BatchNorm") != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0)


    # Generator Code
    class Generator(nn.Module):
        def __init__(self):
            super(Generator, self).__init__()
            self.main = nn.Sequential(
                # input is Z, going into a convolution
                nn.ConvTranspose2d(nz, ngf * 4, 4, 1, 0, bias=False),
                nn.BatchNorm2d(ngf * 4),
                nn.ReLU(True),
                nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ngf * 2),
                nn.ReLU(True),
                nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ngf),
                nn.ReLU(True),
                nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
                nn.Tanh())

        def forward(self, input):
            return self.main(input)


    class Discriminator(nn.Module):
        def __init__(self):
            super(Discriminator, self).__init__()
            self.main = nn.Sequential(
                nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
                nn.LeakyReLU(0.2, inplace=True),
                nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ndf * 2), nn.LeakyReLU(0.2, inplace=True),
                nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
                nn.BatchNorm2d(ndf * 4), nn.LeakyReLU(0.2, inplace=True),
                nn.Conv2d(ndf * 4, 1, 4, 1, 0, bias=False), nn.Sigmoid())

        def forward(self, input):
            return self.main(input)


    # __GANmodel_end__


    # __INCEPTION_SCORE_begin__
    class Net(nn.Module):
        """
        LeNet for MNist classification, used for inception_score
        """

        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
            self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
            self.conv2_drop = nn.Dropout2d()
            self.fc1 = nn.Linear(320, 50)
            self.fc2 = nn.Linear(50, 10)

        def forward(self, x):
            x = F.relu(F.max_pool2d(self.conv1(x), 2))
            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
            x = x.view(-1, 320)
            x = F.relu(self.fc1(x))
            x = F.dropout(x, training=self.training)
            x = self.fc2(x)
            return F.log_softmax(x, dim=1)


    def inception_score(imgs, mnist_model_ref, batch_size=32, splits=1):
        N = len(imgs)
        dtype = torch.FloatTensor
        dataloader = torch.utils.data.DataLoader(imgs, batch_size=batch_size)
        cm = ray.get(mnist_model_ref)  # Get the mnist model from Ray object store.
        up = nn.Upsample(size=(28, 28), mode="bilinear").type(dtype)

        def get_pred(x):
            x = up(x)
            x = cm(x)
            return F.softmax(x).data.cpu().numpy()

        preds = np.zeros((N, 10))
        for i, batch in enumerate(dataloader, 0):
            batch = batch.type(dtype)
            batchv = Variable(batch)
            batch_size_i = batch.size()[0]
            preds[i * batch_size:i * batch_size + batch_size_i] = get_pred(batchv)

        # Now compute the mean kl-div
        split_scores = []
        for k in range(splits):
            part = preds[k * (N // splits):(k + 1) * (N // splits), :]
            py = np.mean(part, axis=0)
            scores = []
            for i in range(part.shape[0]):
                pyx = part[i, :]
                scores.append(entropy(pyx, py))
            split_scores.append(np.exp(np.mean(scores)))

        return np.mean(split_scores), np.std(split_scores)


    # __INCEPTION_SCORE_end__


    def train(netD, netG, optimG, optimD, criterion, dataloader, iteration, device,
              mnist_model_ref):
        real_label = 1
        fake_label = 0

        for i, data in enumerate(dataloader, 0):
            if i >= train_iterations_per_step:
                break

            netD.zero_grad()
            real_cpu = data[0].to(device)
            b_size = real_cpu.size(0)
            label = torch.full(
                (b_size, ), real_label, dtype=torch.float, device=device)
            output = netD(real_cpu).view(-1)
            errD_real = criterion(output, label)
            errD_real.backward()
            D_x = output.mean().item()

            noise = torch.randn(b_size, nz, 1, 1, device=device)
            fake = netG(noise)
            label.fill_(fake_label)
            output = netD(fake.detach()).view(-1)
            errD_fake = criterion(output, label)
            errD_fake.backward()
            D_G_z1 = output.mean().item()
            errD = errD_real + errD_fake
            optimD.step()

            netG.zero_grad()
            label.fill_(real_label)
            output = netD(fake).view(-1)
            errG = criterion(output, label)
            errG.backward()
            D_G_z2 = output.mean().item()
            optimG.step()

            is_score, is_std = inception_score(fake, mnist_model_ref)

            # Output training stats
            if iteration % 10 == 0:
                print("[%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z))"
                      ": %.4f / %.4f \tInception score: %.4f" %
                      (iteration, len(dataloader), errD.item(), errG.item(), D_x,
                       D_G_z1, D_G_z2, is_score))

        return errG.item(), errD.item(), is_score


    def plot_images(dataloader):
        # Plot some training images
        real_batch = next(iter(dataloader))
        plt.figure(figsize=(8, 8))
        plt.axis("off")
        plt.title("Original Images")
        plt.imshow(
            np.transpose(
                vutils.make_grid(real_batch[0][:64], padding=2,
                                 normalize=True).cpu(), (1, 2, 0)))

        plt.show()


    def demo_gan(checkpoint_paths):
        img_list = []
        fixed_noise = torch.randn(64, nz, 1, 1)
        for netG_path in checkpoint_paths:
            loadedG = Generator()
            loadedG.load_state_dict(torch.load(netG_path)["netGmodel"])
            with torch.no_grad():
                fake = loadedG(fixed_noise).detach().cpu()
            img_list.append(vutils.make_grid(fake, padding=2, normalize=True))

        fig = plt.figure(figsize=(8, 8))
        plt.axis("off")
        ims = [[plt.imshow(np.transpose(i, (1, 2, 0)), animated=True)]
               for i in img_list]
        ani = animation.ArtistAnimation(
            fig, ims, interval=1000, repeat_delay=1000, blit=True)
        ani.save("./generated.gif", writer="imagemagick", dpi=72)
        plt.show()




    # __Trainable_begin__
    class PytorchTrainable(tune.Trainable):
        def setup(self, config):
            use_cuda = config.get("use_gpu") and torch.cuda.is_available()
            self.device = torch.device("cuda" if use_cuda else "cpu")
            self.netD = Discriminator().to(self.device)
            self.netD.apply(weights_init)
            self.netG = Generator().to(self.device)
            self.netG.apply(weights_init)
            self.criterion = nn.BCELoss()
            self.optimizerD = optim.Adam(
                self.netD.parameters(),
                lr=config.get("lr", 0.01),
                betas=(beta1, 0.999))
            self.optimizerG = optim.Adam(
                self.netG.parameters(),
                lr=config.get("lr", 0.01),
                betas=(beta1, 0.999))
            with FileLock(os.path.expanduser("~/.data.lock")):
                self.dataloader = get_data_loader()
            self.mnist_model_ref = c["mnist_model_ref"]

        def step(self):
            lossG, lossD, is_score = train(self.netD, self.netG, self.optimizerG,
                                           self.optimizerD, self.criterion,
                                           self.dataloader, self._iteration,
                                           self.device, self.mnist_model_ref)
            return {"lossg": lossG, "lossd": lossD, "is_score": is_score}

        def save_checkpoint(self, checkpoint_dir):
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save({
                "netDmodel": self.netD.state_dict(),
                "netGmodel": self.netG.state_dict(),
                "optimD": self.optimizerD.state_dict(),
                "optimG": self.optimizerG.state_dict(),
            }, path)

            return checkpoint_dir

        def load_checkpoint(self, checkpoint_dir):
            path = os.path.join(checkpoint_dir, "checkpoint")
            checkpoint = torch.load(path)
            self.netD.load_state_dict(checkpoint["netDmodel"])
            self.netG.load_state_dict(checkpoint["netGmodel"])
            self.optimizerD.load_state_dict(checkpoint["optimD"])
            self.optimizerG.load_state_dict(checkpoint["optimG"])

        def reset_config(self, new_config):
            if "netD_lr" in new_config:
                for param_group in self.optimizerD.param_groups:
                    param_group["lr"] = new_config["netD_lr"]
            if "netG_lr" in new_config:
                for param_group in self.optimizerG.param_groups:
                    param_group["lr"] = new_config["netG_lr"]

            self.config = new_config
            return True

        def _export_model(self, export_formats, export_dir):
            if export_formats == [ExportFormat.MODEL]:
                path = os.path.join(export_dir, "exported_models")
                torch.save({
                    "netDmodel": self.netD.state_dict(),
                    "netGmodel": self.netG.state_dict()
                }, path)
                return {ExportFormat.MODEL: path}
            else:
                raise ValueError("unexpected formats: " + str(export_formats))



    import urllib.request
    # Download a pre-trained MNIST model for inception score calculation.
    # This is a tiny model (<100kb).
    if not os.path.exists(MODEL_PATH):
        print("downloading model")
        os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
        urllib.request.urlretrieve(
            "https://github.com/ray-project/ray/raw/master/python/ray/tune/"
            "examples/pbt_dcgan_mnist/mnist_cnn.pt", MODEL_PATH)

    dataloader = get_data_loader()
    if not args.smoke_test:
        plot_images(dataloader)

    # load the pretrained mnist classification model for inception_score
    mnist_cnn = Net()
    mnist_cnn.load_state_dict(torch.load(MODEL_PATH))
    mnist_cnn.eval()
    mnist_model_ref = ray.put(mnist_cnn)

    # __tune_begin__
    scheduler = PopulationBasedTraining(
        time_attr="training_iteration",
        metric="is_score",
        mode="max",
        perturbation_interval=5,
        hyperparam_mutations={
            # distribution for resampling
            "netG_lr": lambda: np.random.uniform(1e-2, 1e-5),
            "netD_lr": lambda: np.random.uniform(1e-2, 1e-5),
        })


    experiment_metrics= dict(metric="is_score",
        mode="max")

   
    dim_dict = {
        "netG_lr": (ValueType.CONTINUOUS, [0, 0.1], 1e-2),
        "netD_lr": (ValueType.CONTINUOUS, [0, 0.1], 1e-2)
    }

    config =     {
            "netG_lr": tune.loguniform(1e-10, 0.1),
           "netD_lr": tune.loguniform(1e-10, 0.1)
        }

    algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)

    
    tune_iter = 5 if args.smoke_test else 1
    c={"mnist_model_ref" : mnist_model_ref}
    
    
    if(SA==1):
        algo =   BayesOptSearch(**experiment_metrics) 
        analysis = tune.run(
        PytorchTrainable,
        name="pbt_dcgan_mnist",
        scheduler=scheduler,
        reuse_actors=True,
        search_alg=algo,
        verbose=1,
        checkpoint_at_end=True,
        stop={
            "training_iteration": tune_iter,
        },
        num_samples=8,
        export_formats=[ExportFormat.MODEL],
        config={
            "netG_lr": tune.loguniform(1e-10, 0.1),
            "netD_lr": tune.loguniform(1e-10, 0.1)
        })
        
    if(SA==2):
        algo =  AxSearch(
            **experiment_metrics) 
        algo = ConcurrencyLimiter(algo, max_concurrent=4)

        analysis = tune.run(
        PytorchTrainable,
        name="pbt_dcgan_mnist",
        scheduler=scheduler,
        reuse_actors=True,
        search_alg=algo,
        verbose=1,
        checkpoint_at_end=True,
        stop={
            "training_iteration": tune_iter,
        },
        export_formats=[ExportFormat.MODEL],
        config={
            "netG_lr": tune.loguniform(1e-3, 0.1),
            "netD_lr": tune.loguniform(1e-3, 0.1)


        })
        
        
    if(SA==3):
        algo =   NevergradSearch(
    optimizer=ng.optimizers.OnePlusOne,**experiment_metrics) 
        analysis = tune.run(
        PytorchTrainable,
        name="pbt_dcgan_mnist",
        scheduler=scheduler,
        reuse_actors=True,
        search_alg=algo,
        verbose=1,
        checkpoint_at_end=True,
        stop={
            "training_iteration": tune_iter,
        },
        num_samples=8,
        export_formats=[ExportFormat.MODEL],
        config={
            "netG_lr": tune.loguniform(1e-10, 0.1),
            "netD_lr": tune.loguniform(1e-10, 0.1)
        })
        
     
    if(SA==4):
        bohb_hyperband = HyperBandForBOHB(
            time_attr="training_iteration",
            max_t=100,
            reduction_factor=4,
            **experiment_metrics)

        bohb_search = TuneBOHB(
            # space=config_space, 
            max_concurrent=4,
            **experiment_metrics)
        analysis = tune.run(
        PytorchTrainable,
        name="pbt_dcgan_mnist",
        scheduler=bohb_hyperband,
        reuse_actors=True,
        search_alg=bohb_search,
        verbose=1,
        checkpoint_at_end=True,
        stop={
            "training_iteration": tune_iter,
        },
        num_samples=8,
        export_formats=[ExportFormat.MODEL],
        config={
            "netG_lr": tune.loguniform(1e-10, 0.1),
            "netD_lr": tune.loguniform(1e-10, 0.1)
        })
        
        
        
    if(SA==5):
        algo =   NevergradSearch(
    optimizer=ng.optimizers.RandomSearch,**experiment_metrics) 
        analysis = tune.run(
        PytorchTrainable,
        name="pbt_dcgan_mnist",
        scheduler=scheduler,
        reuse_actors=True,
        search_alg=algo,
        verbose=1,
        checkpoint_at_end=True,
        stop={
            "training_iteration": tune_iter,
        },
        num_samples=8,
        export_formats=[ExportFormat.MODEL],
        config={
            "netG_lr": tune.loguniform(1e-10, 0.1),
            "netD_lr": tune.loguniform(1e-10, 0.1)
        })
        

        
    if(SA==6):
        algo=  ZOOptSearch(
                algo="Asracos",  # only support Asracos currently
                dim_dict=dim_dict,
                budget=10,
                #dim_dict=dim_dict,
                **experiment_metrics)
        analysis = tune.run(
            PytorchTrainable,
            name="pbt_dcgan_mnist",
            scheduler=scheduler,
            reuse_actors=True,
            search_alg=algo,
            verbose=1,
            checkpoint_at_end=True,
            stop={
                "training_iteration": tune_iter,
            },
            num_samples=8,
            export_formats=[ExportFormat.MODEL],
            config=dim_dict)



In [None]:
#Metrics

from sklearn.metrics import *

#Classification:

metrics.accuracy_score()
metrics.f1_score
metrics.log_loss
metrics.precision_score
metrics.recall_score

#Regression
mean_absolute_error
mean_squared_error
r2_score

In [2]:

#!/usr/bin/env python
"""
Example of training DCGAN on MNIST using PBT with Tune's function API.
"""
import ray
from ray import tune
from ray.tune.schedulers import PopulationBasedTraining

import argparse
import os
from filelock import FileLock
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import numpy as np

from common import demo_gan, get_data_loader, plot_images, train, weights_init
from common import Discriminator, Generator, Net


# __Train_begin__
def dcgan_train(config, checkpoint_dir=None):
    step = 0
    use_cuda = config.get("use_gpu") and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    netD = Discriminator().to(device)
    netD.apply(weights_init)
    netG = Generator().to(device)
    netG.apply(weights_init)
    criterion = nn.BCELoss()
    optimizerD = optim.Adam(
        netD.parameters(), lr=config.get("lr", 0.01), betas=(beta1, 0.999))
    optimizerG = optim.Adam(
        netG.parameters(), lr=config.get("lr", 0.01), betas=(beta1, 0.999))
    with FileLock(os.path.expanduser("~/.data.lock")):
        dataloader = get_data_loader()

    if checkpoint_dir is not None:
        path = os.path.join(checkpoint_dir, "checkpoint")
        checkpoint = torch.load(path)
        netD.load_state_dict(checkpoint["netDmodel"])
        netG.load_state_dict(checkpoint["netGmodel"])
        optimizerD.load_state_dict(checkpoint["optimD"])
        optimizerG.load_state_dict(checkpoint["optimG"])
        step = checkpoint["step"]

        if "netD_lr" in config:
            for param_group in optimizerD.param_groups:
                param_group["lr"] = config["netD_lr"]
        if "netG_lr" in config:
            for param_group in optimizerG.param_groups:
                param_group["lr"] = config["netG_lr"]

    while True:
        lossG, lossD, is_score = train(netD, netG, optimizerG, optimizerD,
                                       criterion, dataloader, step, device,
                                       config["mnist_model_ref"])
        step += 1
        with tune.checkpoint_dir(step=step) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save({
                "netDmodel": netD.state_dict(),
                "netGmodel": netG.state_dict(),
                "optimD": optimizerD.state_dict(),
                "optimG": optimizerG.state_dict(),
                "step": step,
            }, path)
        tune.report(lossg=lossG, lossd=lossD, is_score=is_score)


# __Train_end__

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init()

    import urllib.request
    # Download a pre-trained MNIST model for inception score calculation.
    # This is a tiny model (<100kb).
    if not os.path.exists(MODEL_PATH):
        print("downloading model")
        os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
        urllib.request.urlretrieve(
            "https://github.com/ray-project/ray/raw/master/python/ray/tune/"
            "examples/pbt_dcgan_mnist/mnist_cnn.pt", MODEL_PATH)

    dataloader = get_data_loader()
    if not args.smoke_test:
        plot_images(dataloader)

    # __tune_begin__

    # load the pretrained mnist classification model for inception_score
    mnist_cnn = Net()
    mnist_cnn.load_state_dict(torch.load(MODEL_PATH))
    mnist_cnn.eval()
    # Put the model in Ray object store.
    mnist_model_ref = ray.put(mnist_cnn)

    scheduler = PopulationBasedTraining(
        perturbation_interval=5,
        hyperparam_mutations={
            # distribution for resampling
            "netG_lr": lambda: np.random.uniform(1e-2, 1e-5),
            "netD_lr": lambda: np.random.uniform(1e-2, 1e-5),
        })

    tune_iter = 5 if args.smoke_test else 300
    analysis = tune.run(
        dcgan_train,
        name="pbt_dcgan_mnist",
        scheduler=scheduler,
        verbose=1,
        stop={
            "training_iteration": tune_iter,
        },
        metric="is_score",
        mode="max",
        num_samples=8,
        config={
            "netG_lr": tune.choice([0.0001, 0.0002, 0.0005]),
            "netD_lr": tune.choice([0.0001, 0.0002, 0.0005]),
            "mnist_model_ref": mnist_model_ref
        })
    # __tune_end__

    # demo of the trained Generators
    if not args.smoke_test:
        all_trials = analysis.trials
        checkpoint_paths = [
            os.path.join(analysis.get_best_checkpoint(t), "checkpoint")
            for t in all_trials
        ]
        demo_gan(analysis, checkpoint_paths)

ImportError: cannot import name 'demo_gan' from 'common' (/home/antoine/anaconda3/lib/python3.7/site-packages/common/__init__.py)