In [1]:
# IN THIS MODULE: IMPORTS, CNN, TRAIN, TEST, MNIS_FUNCTION, SPACE

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from hyperopt import hp
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.suggest.skopt import SkOptSearch
from ray import tune
from ray.tune.suggest.bayesopt import BayesOptSearch
import time
import ray
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.ax import AxSearch
import argparse
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.nevergrad import NevergradSearch
import nevergrad as ng
import json
import os
from ray.tune import Trainable
from ray.tune.schedulers.hb_bohb import HyperBandForBOHB
from ray.tune.suggest.bohb import TuneBOHB
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler
#from ray.tune.suggest.dragonfly import DragonflySearch
from ray.tune.suggest.zoopt import ZOOptSearch
from ray.tune.schedulers import AsyncHyperBandScheduler
from zoopt import ValueType
import torch
import adabelief_pytorch
global_checkpoint_period=np.inf

In [2]:
def train_TREC(config):
    import torch
    from torchtext import data
    from torchtext import datasets
    import random

    SEED = 1234
    savedPath = os.getcwd()
    os.chdir('/home/antoine/Projet/NovelTuning')
    
    
    #torch.manual_seed(SEED)
    #torch.backends.cudnn.deterministic = True

    TEXT = data.Field(tokenize = 'spacy')
    LABEL = data.LabelField()

    train_data, test_data = datasets.TREC.splits(TEXT, LABEL,root='data/trec', fine_grained=False)

    train_data, valid_data = train_data.split(random_state = random.seed(SEED))

    MAX_VOCAB_SIZE = 25_000


    sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))

    
    TEXT.build_vocab(train_data, 
                     max_size = MAX_VOCAB_SIZE, 
                     vectors = 'glove.6B.100d', 
                     unk_init = torch.Tensor.normal_)

    LABEL.build_vocab(train_data)

    os.chdir(savedPath)
    
    BATCH_SIZE = 64

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
        (train_data, valid_data, test_data), 
        batch_size = BATCH_SIZE,
        device = device)



   
    import torch.nn as nn
    import torch.nn.functional as F

    class CNN(nn.Module):
        def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                     dropout, pad_idx):

            super().__init__()

            self.embedding = nn.Embedding(vocab_size, embedding_dim)

            self.convs = nn.ModuleList([
                                        nn.Conv2d(in_channels = 1, 
                                                  out_channels = n_filters, 
                                                  kernel_size = (fs, embedding_dim)) 
                                        for fs in filter_sizes
                                        ])

            self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)

            self.dropout = nn.Dropout(dropout)

        def forward(self, text):

            #text = [sent len, batch size]

            text = text.permute(1, 0)

            #text = [batch size, sent len]

            embedded = self.embedding(text)

            #embedded = [batch size, sent len, emb dim]

            embedded = embedded.unsqueeze(1)

            #embedded = [batch size, 1, sent len, emb dim]

            conved = [sigmoid_func_uniq(conv(embedded)).squeeze(3) for conv in self.convs]

            #conv_n = [batch size, n_filters, sent len - filter_sizes[n]]

            pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]

            #pooled_n = [batch size, n_filters]

            cat = self.dropout(torch.cat(pooled, dim = 1))

            #cat = [batch size, n_filters * len(filter_sizes)]

            return self.fc(cat)
    INPUT_DIM = 7503
    EMBEDDING_DIM = 100
    N_FILTERS = 100
    FILTER_SIZES = [2,3,4]
    OUTPUT_DIM = len(LABEL.vocab)
    DROPOUT = 0.5
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
    def count_parameters(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)

 #   print(f'The model has {count_parameters(model):,} trainable parameters')

    pretrained_embeddings = TEXT.vocab.vectors

    model.embedding.weight.data.copy_(pretrained_embeddings)

    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

    import torch.optim as optim

    #optimizer = optim.Adam(model.parameters())
    if(optimizer_is_adam == True):
        optimizer = torch.optim.Adam(model.parameters(), lr=config.get("lr", 0.01), 
                                 betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                         eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0), 
                                         amsgrad=True)
    else: 
        optimizer = adabelief_pytorch.AdaBelief(model.parameters(), lr=config.get("lr", 0.01), 
                                 betas=((config.get("b1", 0.999),config.get("b2", 0.9999))),
                                         eps=config.get("eps", 1e-08), weight_decay=config.get("weight_decay", 0))
    
    
    criterion = nn.CrossEntropyLoss()

    model = model.to(device)
    criterion = criterion.to(device)

    def categorical_accuracy(preds, y):
        """
        Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
        """
        max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
        correct = max_preds.squeeze(1).eq(y)
        return correct.sum() / torch.FloatTensor([y.shape[0]])


    def train(model, iterator, optimizer, criterion):

        epoch_loss = 0
        epoch_acc = 0

        model.train()

        for batch in iterator:

            optimizer.zero_grad()

            predictions = model(batch.text)

            loss = criterion(predictions, batch.label)

            acc = categorical_accuracy(predictions, batch.label)

            loss.backward()

            optimizer.step()

            epoch_loss += loss.item()
            epoch_acc += acc.item()

        return epoch_loss / len(iterator), epoch_acc / len(iterator)

    def evaluate(model, iterator, criterion):

        epoch_loss = 0
        epoch_acc = 0

        model.eval()

        with torch.no_grad():

            for batch in iterator:

                predictions = model(batch.text)

                loss = criterion(predictions, batch.label)

                acc = categorical_accuracy(predictions, batch.label)

                epoch_loss += loss.item()
                epoch_acc += acc.item()

        return epoch_loss / len(iterator), epoch_acc / len(iterator)

    import time

    def epoch_time(start_time, end_time):
        elapsed_time = end_time - start_time
        elapsed_mins = int(elapsed_time / 60)
        elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
        return elapsed_mins, elapsed_secs


    best_valid_loss = float('inf')

    for e in range(ITERATIONS):

        start_time = time.time()

        train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)


        tune.report(loss=valid_loss)

        if e % 5 == 0:
            # This saves the model to the trial directory
            torch.save(model.state_dict(), "./model.pth")

 #       print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
  #      print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
        





In [3]:
class SentimentRNN(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_size, n_layers,
             drop_prob, sigmoid , vocab_size):
        super(SentimentRNN, self).__init__()
        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers,
                            dropout=drop_prob, batch_first=True)
        #self.lstm = nn.GRU(embedding_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)

        self.dropout = nn.Dropout(0.3)

        self.fc = nn.Linear(hidden_dim, output_size)
        self.sig = nn.Sigmoid()

    def forward(self, x, hidden):
        hidden= tuple([each.data for each in hidden])
        batch_size = x.size(0)
        embeds = self.embedding(x)
        lstm_out, hidden = self.lstm(embeds, hidden)

        # stack up lstm outputs
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)

        # dropout and fully connected layer
        out = self.dropout(lstm_out)
        out = self.fc(out)

        # sigmoid function
        sig_out = self.sig(out)

        # reshape to be batch_size first
        sig_out = sig_out.view(batch_size, -1)
        sig_out = sig_out[:, -1] # get last batch of labels

        # return last sigmoid output and hidden state
        return sig_out, hidden


    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                        weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
        return hidden


class SentimentRNN1(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_size, n_layers,
             drop_prob, sigmoid , vocab_size):
        super(SentimentRNN1, self).__init__()
        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        #self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers,
                        #    dropout=drop_prob, batch_first=True)
        self.lstm = nn.GRU(embedding_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)

        self.dropout = nn.Dropout(0.3)

        self.fc = nn.Linear(hidden_dim, output_size)
        self.sig = nn.Sigmoid()

    def forward(self, x, hidden):
        hidden = hidden.data
        batch_size = x.size(0)
        embeds = self.embedding(x)
        lstm_out, hidden = self.lstm(embeds, hidden)

        # stack up lstm outputs
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)

        # dropout and fully connected layer
        out = self.dropout(lstm_out)
        out = self.fc(out)

        # sigmoid function
        sig_out = self.sig(out)

        # reshape to be batch_size first
        sig_out = sig_out.view(batch_size, -1)
        sig_out = sig_out[:, -1] # get last batch of labels

        # return last sigmoid output and hidden state
        return sig_out, hidden


    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                        weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
        (a,b) = hidden
        return a
    
class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx,sigmoid):

        super().__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])

        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)

        self.dropout = nn.Dropout(dropout)
        
        self.sigmoid = sigmoid
        self.sig = nn.Sigmoid()


    def forward(self, text):

        #text = [sent len, batch size]

       # text = text.permute(1, 0)
        #We want already have batch size, len for sentiment!!!!!
        #text = [batch size, sent len]

        embedded = self.embedding(text)

        #embedded = [batch size, sent len, emb dim]

        embedded = embedded.unsqueeze(1)

        #embedded = [batch size, 1, sent len, emb dim]

        conved = [self.sigmoid(conv(embedded)).squeeze(3) for conv in self.convs]

        #conv_n = [batch size, n_filters, sent len - filter_sizes[n]]

        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]

        #pooled_n = [batch size, n_filters]

        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
        sig_out = self.sig(self.fc(cat))

        # reshape to be batch_size first
        sig_out = sig_out.view(50, -1)
        #sig_out = sig_out[:, -1] # get last batch of labels

        # return last sigmoid output and hidden state
        return np.squeeze(sig_out)

In [4]:
   #https://colab.research.google.com/github/agungsantoso/deep-learning-v2-pytorch/blob/master/sentiment-rnn/Sentiment_RNN_Exercise.ipynb#scrollTo=AVzirwGqpmva
def train_IMDB(config):
    train_x = np.load('/home/antoine/Projet/NovelTuning/train_x.npy')
    train_y = np.load('/home/antoine/Projet/NovelTuning/train_y.npy')
    val_x = np.load('/home/antoine/Projet/NovelTuning/val_x.npy')
    val_y = np.load('/home/antoine/Projet/NovelTuning/val_y.npy')
    test_x = np.load('/home/antoine/Projet/NovelTuning/test_x.npy')
    test_y = np.load('/home/antoine/Projet/NovelTuning/test_y.npy')
    len_vocab_to_int = 74072
    ## print out the shapes of your resultant feature data
    from torch.utils.data import TensorDataset, DataLoader

    import torch

    # create Tensor datasets
    train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
    valid_data = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
    test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

    # dataloaders
    batch_size = 50

    # make sure to SHUFFLE your data
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size)
    test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)
    # obtain one batch of training data
    dataiter = iter(train_loader)
    sample_x, sample_y = dataiter.next()

    # First checking if GPU is available
    train_on_gpu=torch.cuda.is_available()


    sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))

        
   
        
        
        
    # Instantiate the model w/ hyperparams
    vocab_size = len_vocab_to_int + 1 # +1 for zero padding + our word tokens
    output_size = 1
    embedding_dim = int(config.get("embedding",600))
    hidden_dim = int(round(config.get("hidden_dim",64)))
    n_layers =  2+ int( round(config.get("n_layer",1)))


    INPUT_DIM = 7503
    EMBEDDING_DIM = 100
    N_FILTERS = 100
    FILTER_SIZES = [2,3,4]
   # OUTPUT_DIM = len(LABEL.vocab)
    DROPOUT = 0.5
    PAD_IDX = 4    
    cnn = 0 
    if(config.get("model",0)<1/3):

        net = SentimentRNN(embedding_dim, hidden_dim, output_size, n_layers,
                           config.get("droupout_prob",0.1),
                           sigmoid_func_uniq, vocab_size)    
    elif(config.get("model",0)<2/3):
        net = SentimentRNN1(embedding_dim, hidden_dim, output_size, n_layers,
                           config.get("droupout_prob",0.1),
                           sigmoid_func_uniq, vocab_size)
    else:
        cnn = 1;
        net = CNN(vocab_size, embedding_dim, hidden_dim, FILTER_SIZES, 
              output_size, config.get("droupout_prob",0.1), PAD_IDX,sigmoid_func_uniq)
    # loss and optimization functions

    criterion = nn.BCELoss()
   # print(*(n for n in net.parameters()))
    #optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    # training params
    if(config.get("adam",0)>0.5):
        optimizer = torch.optim.Adam(net.parameters(), lr=config.get("lr", 0.01), 
           weight_decay=config.get("weight_decay", 0), 
                                         amsgrad=True)
    else: 
        optimizer = adabelief_pytorch.AdaBelief(net.parameters(), lr=config.get("lr", 0.01), 
                              weight_decay=config.get("weight_decay", 0))
    
    
   

    counter = 0
    print_every = 1
    clip=5 # gradient clipping

    # move model to GPU, if available
    if(train_on_gpu):
        net.cuda()

    net.train()
    # train for some number of epochs
    EPOCH_SIZE = 32 *4 *8
    TEST_SIZE = 32 *2 * 4

    def train_rnn():
        
        h = net.init_hidden(batch_size)
        for batch_idx, (inputs, labels) in enumerate(train_loader):
            # We set this just for the example to run quickly.
            if batch_idx * len(inputs) > EPOCH_SIZE:
                return
            if(train_on_gpu):
                inputs, labels = inputs.cuda(), labels.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()

            # get the output from the model
            output, h = net(inputs, h)

            # calculate the loss and perform backprop

            loss = criterion(output.squeeze(), labels.float())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            optimizer.step() 

    def train_cnn(model, optimizer ,func ,train_loader):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.train()
        #for (data, target) in train_loader:
        for batch_idx, (data, target) in enumerate(train_loader):
            # We set this just for the example to run quickly.
            if batch_idx * len(data) > EPOCH_SIZE:
               # print("hehe")
                return

            # We set this just for the example to run quickly.
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)

            loss = func(output.squeeze(), target.float())

            loss.backward()
            optimizer.step()

    
    
    if(train_on_gpu):
        net.cuda()
    net.train()

    for i in range(ITERATIONS):
        if(cnn==0):

            train_rnn()
        else:
            train_cnn(net,optimizer,criterion,train_loader)
        
        acc = test(net,criterion,valid_loader,3+cnn)

        tune.report(loss=acc)

        if i % 5 == 0:
            # This saves the model to the trial directory
            torch.save(net.state_dict(), "./model.pth")

        

In [5]:
#Configs
parser = argparse.ArgumentParser()
parser.add_argument(
    "--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()          
        
    
    
#experiment_metrics = dict(metric="mean_accuracy", mode="max")
experiment_metrics = dict(metric="loss", mode="min")


ITERATIONS = 20
NUM_TUNED= 5
    


#[nn.ReLU, nn.Softmax(), nn.Tanh(),nn.Sigmoid() ]

tune_kwargs = {
    "num_samples": 1 if True else 2,
    "config": {
    "steps": 3,  # evaluation times
     "lr":  tune.quniform(1e-10, 0.1,1e-10),
    "b1": tune.quniform(0.9, 1-1e-10,1e-10),
        "b2":tune.quniform(0.9, 1-1e-10,1e-10),
        "eps": tune.uniform(1e-10, 0.1),
         "weight_decay":tune.quniform(1e-10, 0.1,1e-10),
        "sigmoid_func":nn.ReLU()
    }
}

tune_kwargs = {
    "num_samples": NUM_TUNED if args.smoke_test else NUM_TUNED,
    "config": {
    "steps": ITERATIONS,  # evaluation times
     "lr":  tune.loguniform(1e-10, 0.1),
    "b1": tune.loguniform(0.9, 1-1e-10),
        "b2":tune.loguniform(0.9, 1-1e-10),
        "eps": tune.loguniform(1e-10, 0.1),
         "weight_decay":tune.loguniform(1e-10, 0.1)
    }
}
   
#i is in [0;1]
#We want all values between 0 and 1
def get_sigmoid_func(i):
    if(i<0.33):
        return nn.ReLU()
    elif(i<0.67):
        return nn.Tanh()
    else:
        return nn.Sigmoid()

    
optimizer_is_adam = True   
    
f = get_sigmoid_func(3)
print(f(torch.randn(2)))
import random
tune_kwargs = {
    "num_samples": NUM_TUNED if args.smoke_test else NUM_TUNED,
    "config": {
    "steps": ITERATIONS,  # evaluation times
     "lr":  tune.loguniform(1e-4, 0.1 ),#,1e-4), #*10
         "weight_decay":tune.loguniform(1e-4, 0.1),#,1e-4), #*10 et 0
        "sigmoid_func":tune.uniform(0,1),
        "hidden_dim":tune.loguniform(32.,256.),#,1), #log de 32 à 256
        "n_layer":tune.uniform(1,3),#,1), #from 1 to 3
        "droupout_prob":tune.uniform(0,0.5),#,0.1), #0.x pour x allant de 0 à 5     
        "adam":tune.uniform(0,1)
    }
}

tune_kwargs = {
    "num_samples": NUM_TUNED if args.smoke_test else NUM_TUNED,
    "config": {
    "steps": ITERATIONS,  # evaluation times
     "lr":  tune.uniform(1e-4, 0.1 ),#,1e-4), #*10
         "weight_decay":tune.uniform(1e-4, 0.1),#,1e-4), #*10 et 0
        "sigmoid_func":tune.uniform(0,1),
        "hidden_dim":tune.uniform(32.,256.),#,1), #log de 32 à 256
        "n_layer":tune.uniform(1,3),#,1), #from 1 to 3
        "droupout_prob":tune.uniform(0,0.5),#,0.1), #0.x pour x allant de 0 à 5     
        "adam":tune.uniform(0,1),
        "model":tune.uniform(0,1),
    }
}

tune_kwargs = {
    "num_samples": NUM_TUNED if args.smoke_test else NUM_TUNED,
    "config": {
    "steps": ITERATIONS,  
     "lr": 0.001# tune.uniform(1e-4, 0.1 ),
     ,    "embedding": 400#tune.uniform(64, 1024),

      ,   "weight_decay":tune.uniform(1e-4, 0.1),
        "sigmoid_func":tune.uniform(0,1),
        "hidden_dim":256#tune.uniform(32.,256.),
      ,  "n_layer":1 #tune.uniform(1,3),
      ,  "droupout_prob":0.5#tune.uniform(0,0.5),     
      ,  "adam":tune.uniform(0,1),
        "model":tune.uniform(0,1),
    }
}


#x_all = [train_IMDB,  train_TREC, train_boston, train_diabetes, train_mnist, train_fashion_mnist]

f_HyperOpt(train_IMDB1)

 

tensor([0.3456, 0.6481])


NameError: name 'f_HyperOpt' is not defined

In [None]:
#One shot

model_all = [Net,ConvNet]
for i in range(0,1):
    for j in range(0,1):
        x = x_all[i]
        f_HyperOpt(x)
        f_BayesOpt(x)
        f_AX(x)
        f_NeverGrad(x)
        f_BOHB(x)
        f_Random(x)
        f_ZOOpt(x)
        print("all worked with " + str(x)+  " !")
    for i in range(1,1):
        GAN_MNIST(i)

In [None]:
#Small budget

ITERATIONS = 20
NUM_TUNED= 20


model_all = [Net,ConvNet]
optimizer_is_adam = True
if(0==1):
    for i in range(1,2):
        x = train_TREC
        f_BayesOpt(x)
        f_AX(x)
        f_NeverGrad(x)
        f_BOHB(x)
        f_Random(x)
        f_ZOOpt(x)
        print("all worked with " + str(x)+  " !")
    for i in range(1,1):
        GAN_MNIST(i)

In [None]:
def f_HyperOpt(dataset):
    
    scheduler = AsyncHyperBandScheduler(**experiment_metrics)
    bayesopt = HyperOptSearch(**experiment_metrics)
    tune.run(dataset, **tune_kwargs , scheduler = scheduler,  name="hyper", search_alg=bayesopt)

In [None]:
def f_BayesOpt(dataset):
    
    scheduler = AsyncHyperBandScheduler(**experiment_metrics)
    bayesopt = BayesOptSearch(**experiment_metrics)
    tune.run(dataset, **tune_kwargs , scheduler = scheduler, name="bayes",  search_alg=bayesopt)

In [None]:
def f_AX(dataset):
    
   
    if __name__ == "__main__":
                
        algo = AxSearch(
            max_concurrent=2, #was working with 2
            **experiment_metrics
        )
        scheduler = AsyncHyperBandScheduler(**experiment_metrics)
        tune.run(
            dataset,       name="ax",
            search_alg=algo,
            scheduler=scheduler,
            **tune_kwargs)

        
#        algo = AxSearch(
#            **experiment_metrics
#        )
#        algo = ConcurrencyLimiter(algo, max_concurrent=4)

        
#        scheduler = AsyncHyperBandScheduler()
#        tune.run(
#            dataset,
#            **experiment_metrics,
#            search_alg=algo,
#            scheduler=scheduler,
#            **tune_kwargs)


In [None]:

# TODO We are interested in multiple Population based algorithms from nevergrad, and certainly not in OnePlusOne. 
def f_NeverGrad(dataset):
    algo = NevergradSearch(
    optimizer=ng.optimizers.CMA
    # space=space,  # If you want to set the space manually
    )
    algo = ConcurrencyLimiter(algo, max_concurrent=8)

    scheduler = AsyncHyperBandScheduler()

    tune.run(
        dataset,
        **experiment_metrics,
      #  name="nevergrad",
        search_alg=algo, name="ng",
        scheduler=scheduler,
        **tune_kwargs) 
    

In [None]:
def f_BOHB(dataset):

    bohb_hyperband = HyperBandForBOHB(
        time_attr="training_iteration",
        max_t=100,
        reduction_factor=2,
        **experiment_metrics)

    bohb_search = TuneBOHB(
        # space=config_space, 
        max_concurrent=4,
        **experiment_metrics)

    tune.run(
        dataset,
       # config=config, 
        scheduler=bohb_hyperband,name="bohb",
        search_alg=bohb_search,       
         **tune_kwargs)
        #num_samples=NUM_TUNED,
       # stop={"training_iteration": 100})
    
    
    

In [None]:
def f_Random(dataset):
    
    algo = NevergradSearch(
    optimizer=ng.optimizers.RandomSearch,
    # space=space,  # If you want to set the space manually
    )
    algo = ConcurrencyLimiter(algo, max_concurrent=4)

    scheduler = AsyncHyperBandScheduler()

    tune.run(
        dataset,
        **experiment_metrics,
      #  name="nevergrad",
        search_alg=algo,   name="random",    
        scheduler=scheduler,
        **tune_kwargs) 
    

In [None]:
def f_ZOOpt(dataset):

    dim_dict = {
        "lr": (ValueType.CONTINUOUS, [0, 1], 1e-2),
        "momentum": (ValueType.CONTINUOUS, [0,1, 0.9], 1e-2)
    }

    zoopt_search_config = {
        "parallel_num": 8,  # how many workers to parallel
    }

    

    zoopt_search = ZOOptSearch(
    algo="Asracos",  # only support Asracos currently
    #dim_dict=dim_dict,
    budget=ITERATIONS,
    #dim_dict=dim_dict,
   #     **zoopt_search_config,
    **experiment_metrics)
    
    scheduler = AsyncHyperBandScheduler(**experiment_metrics)

   
    tune.run(dataset,
 #        config = config,
    search_alg=zoopt_search,
   # num_samples= ITERATIONS,
    scheduler=scheduler,       
    #         paralell_num=4,
    name="zoopt_search", 
              **tune_kwargs
    )

In [None]:
import numpy as np

# read data from text files
with open('data/reviews.txt', 'r') as f:
    reviews = f.read()
with open('data/labels.txt', 'r') as f:
    labels = f.read()

from string import punctuation

print(punctuation)

# get rid of punctuation
reviews = reviews.lower() # lowercase, standardize
all_text = ''.join([c for c in reviews if c not in punctuation])

# split by new lines and spaces
reviews_split = all_text.split('\n')
all_text = ' '.join(reviews_split)

# create a list of words
words = all_text.split()

# feel free to use this import 
from collections import Counter

## Build a dictionary that maps words to integers
counts = Counter(words)
vocab = sorted(counts, key=counts.get, reverse=True)
vocab_to_int = {word: ii for ii, word in enumerate(vocab,1)} 

## use the dict to tokenize each review in reviews_split
## store the tokenized reviews in reviews_ints
reviews_ints = []
for review in reviews_split:
  reviews_ints.append([vocab_to_int[word] for word in review.split()])

# stats about vocabulary
print('Unique words: ', len((vocab_to_int)))  # should ~ 74000+
print()

# print tokens in first review
print('Tokenized review: \n', reviews_ints[:1])

# 1=positive, 0=negative label conversion
labels_split = labels.split('\n')
encoded_labels = np.array([1 if label == 'positive' else 0 for label in labels_split])

# outlier review stats
review_lens = Counter([len(x) for x in reviews_ints])
print("Zero-length reviews: {}".format(review_lens[0]))
print("Maximum review length: {}".format(max(review_lens)))



print('Number of reviews before removing outliers: ', len(reviews_ints))

## remove any reviews/labels with zero length from the reviews_ints list.

## get any indices of any reviews with length 0
non_zero_idx = [ii for ii, review in enumerate(reviews_ints) if len(review) != 0]

# remove 0-length review with their labels
reviews_ints = [reviews_ints[ii] for ii in non_zero_idx]
encoded_labels = np.array([encoded_labels[ii] for ii in non_zero_idx])

print('Number of reviews after removing outliers: ', len(reviews_ints))


def pad_features(reviews_ints, seq_length):
    ''' Return features of review_ints, where each review is padded with 0's 
        or truncated to the input seq_length.
    '''
    ## getting the correct rows x cols shape
    features = np.zeros((len(reviews_ints), seq_length), dtype=int)
    
    ## for each review, I grab that review
    for i, row in enumerate(reviews_ints):
      features[i, -len(row):] = np.array(row)[:seq_length]
    
    return features
# Test your implementation!

seq_length = 200

features = pad_features(reviews_ints, seq_length=seq_length)

## test statements - do not change - ##
assert len(features)==len(reviews_ints), "Your features should have as many rows as reviews."
assert len(features[0])==seq_length, "Each feature row should contain seq_length values."

# print first 10 values of the first 30 batches 
print(features[:30,:10])

split_frac = 0.8

## split data into training, validation, and test data (features and labels, x and y)
split_idx = int(len(features)*0.8)
train_x, remaining_x = features[:split_idx], features[split_idx:]
train_y, remaining_y = encoded_labels[:split_idx], encoded_labels[split_idx:]

test_idx = int(len(remaining_x)*0.5)
val_x, test_x = remaining_x[:test_idx], remaining_x[test_idx:]
val_y, test_y = remaining_y[:test_idx], remaining_y[test_idx:]

In [None]:
   #https://colab.research.google.com/github/agungsantoso/deep-learning-v2-pytorch/blob/master/sentiment-rnn/Sentiment_RNN_Exercise.ipynb#scrollTo=AVzirwGqpmva
def train_IMDB1(config):
    train_x = np.load('/home/antoine/Projet/NovelTuning/train_x.npy')
    train_y = np.load('/home/antoine/Projet/NovelTuning/train_y.npy')
    val_x = np.load('/home/antoine/Projet/NovelTuning/val_x.npy')
    val_y = np.load('/home/antoine/Projet/NovelTuning/val_y.npy')
    test_x = np.load('/home/antoine/Projet/NovelTuning/test_x.npy')
    test_y = np.load('/home/antoine/Projet/NovelTuning/test_y.npy')
    len_vocab_to_int = 74072
    ## print out the shapes of your resultant feature data
    from torch.utils.data import TensorDataset, DataLoader
    print(train_x.shape)
    import torch

    # create Tensor datasets
    train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
    valid_data = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
    test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

    # dataloaders
    batch_size = 50

    # make sure to SHUFFLE your data
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size)
    test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)
    # obtain one batch of training data
    dataiter = iter(train_loader)
    sample_x, sample_y = dataiter.next()

    # First checking if GPU is available
    train_on_gpu=torch.cuda.is_available()


    #sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))





        # Instantiate the model w/ hyperparams
    vocab_size = len_vocab_to_int + 1 # +1 for zero padding + our word tokens
    output_size = 1
    embedding_dim = 400 
    hidden_dim = 256
    n_layers = 2

    net = SentimentRNN(embedding_dim, hidden_dim, output_size, n_layers,
                           0.3,
                           1, vocab_size)  
    print(net)


        # loss and optimization functions
    lr=0.001

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)

    counter = 0
    print_every = 1
    clip=5 # gradient clipping

    # move model to GPU, if available
    if(train_on_gpu):
        net.cuda()

    net.train()
    # train for some number of epochs
    EPOCH_SIZE = 32 *4 *8
    TEST_SIZE = 32 *2 * 4

        # training params

    epochs = 4 # 3-4 is approx where I noticed the validation loss stop decreasing

    counter = 0
    print_every = 10
    clip=5 # gradient clipping

    # move model to GPU, if available
    if(train_on_gpu):
        net.cuda()

    net.train()
    # train for some number of epochs
    for e in range(epochs):
        # initialize hidden state
        h = net.init_hidden(batch_size)

        # batch loop
        for inputs, labels in train_loader:
            counter += 1

            if(train_on_gpu):
                inputs, labels = inputs.cuda(), labels.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()

            # get the output from the model
            output, h = net(inputs, h)

            # calculate the loss and perform backprop
            loss = criterion(output.squeeze(), labels.float())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            optimizer.step()

            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for inputs, labels in valid_loader:

                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])

                    if(train_on_gpu):
                        inputs, labels = inputs.cuda(), labels.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output.squeeze(), labels.float())

                    val_losses.append(val_loss.item())

                net.train()
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.6f}...".format(loss.item()),
                      "Val Loss: {:.6f}".format(np.mean(val_losses)))
    


train_IMDB1(4)

In [None]:
tune_kwargs = {
    "num_samples": 1 if args.smoke_test else 1,
    "config": {
    "steps": ITERATIONS,  
     "lr": 0.001# tune.uniform(1e-4, 0.1 ),
     ,    "embedding": 400#tune.uniform(64, 1024),

      ,   "weight_decay":tune.uniform(1e-4, 0.1),
        "sigmoid_func":tune.uniform(0,1),
        "hidden_dim":256#tune.uniform(32.,256.),
      ,  "n_layer":1 #tune.uniform(1,3),
      ,  "droupout_prob":0.5#tune.uniform(0,0.5),     
      ,  "adam":tune.uniform(0,1),
        "model":0 #tune.uniform(0,1),
    }
}


#x_all = [train_IMDB,  train_TREC, train_boston, train_diabetes, train_mnist, train_fashion_mnist]

f_HyperOpt(train_IMDB1)

In [7]:
   #https://colab.research.google.com/github/agungsantoso/deep-learning-v2-pytorch/blob/master/sentiment-rnn/Sentiment_RNN_Exercise.ipynb#scrollTo=AVzirwGqpmva
def train_IMDB1(config):
    train_x = np.load('/home/antoine/Projet/NovelTuning/train_x.npy')
    train_y = np.load('/home/antoine/Projet/NovelTuning/train_y.npy')
    val_x = np.load('/home/antoine/Projet/NovelTuning/val_x.npy')
    val_y = np.load('/home/antoine/Projet/NovelTuning/val_y.npy')
    test_x = np.load('/home/antoine/Projet/NovelTuning/test_x.npy')
    test_y = np.load('/home/antoine/Projet/NovelTuning/test_y.npy')
    len_vocab_to_int = 74072
    ## print out the shapes of your resultant feature data
    from torch.utils.data import TensorDataset, DataLoader
    print(train_x.shape)
    import torch

    # create Tensor datasets
    train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
    valid_data = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
    test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

    # dataloaders
    batch_size = 50

    # make sure to SHUFFLE your data
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size)
    test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)
    # obtain one batch of training data
    dataiter = iter(train_loader)
    sample_x, sample_y = dataiter.next()

    # First checking if GPU is available
    train_on_gpu=torch.cuda.is_available()


    #sigmoid_func_uniq = get_sigmoid_func(config.get("sigmoid_func", 0))





        # Instantiate the model w/ hyperparams
    vocab_size = len_vocab_to_int + 1 # +1 for zero padding + our word tokens
    output_size = 1
    embedding_dim = 400 
    hidden_dim = 256
    n_layers = 2

    net = SentimentRNN(embedding_dim, hidden_dim, output_size, n_layers,
                           0.3,
                           1, vocab_size)  
    print(net)
    class SentimentRNN2(nn.Module):
    """
    The RNN model that will be used to perform Sentiment analysis.
    """

    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
        """
        Initialize the model by setting up the layers.
        """
        super(SentimentRNN2, self).__init__()

        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        # embedding and LSTM layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers,
                            dropout=drop_prob, batch_first=True)
        
        # dropout layer
        self.dropout = nn.Dropout(0.3)
        
        # linear and sigmoid layer
        self.fc = nn.Linear(hidden_dim, output_size)
        self.sig = nn.Sigmoid()

    def forward(self, x, hidden):
        """
        Perform a forward pass of our model on some input and hidden state.
        """
        batch_size = x.size(0)
        
        # embeddings and lstm_out
        embeds = self.embedding(x)
        lstm_out, hidden = self.lstm(embeds, hidden)
        
        # stack up lstm outputs
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
        
        # dropout and fully connected layer
        out = self.dropout(lstm_out)
        out = self.fc(out)
        
        # sigmoid function
        sig_out = self.sig(out)
        
        # reshape to be batch_size first
        sig_out = sig_out.view(batch_size, -1)
        sig_out = sig_out[:, -1] # get last batch of labels
        
        # return last sigmoid output and hidden state
        return sig_out, hidden
    
    
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if(train_on_gpu):
          hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
                   weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
        else:
          hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                   weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
        
        return hidden
        
    net = SentimentRNN2(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)


        # loss and optimization functions
    lr=0.001

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)

    counter = 0
    print_every = 1
    clip=5 # gradient clipping

    # move model to GPU, if available
    if(train_on_gpu):
        net.cuda()

    net.train()
    # train for some number of epochs
    EPOCH_SIZE = 32 *4 *8
    TEST_SIZE = 32 *2 * 4

        # training params

    epochs = 4 # 3-4 is approx where I noticed the validation loss stop decreasing

    counter = 0
    print_every = 10
    clip=5 # gradient clipping

    # move model to GPU, if available
    if(train_on_gpu):
        net.cuda()

    net.train()
    # train for some number of epochs
    for e in range(epochs):
        # initialize hidden state
        h = net.init_hidden(batch_size)

        # batch loop
        for inputs, labels in train_loader:
            counter += 1

            if(train_on_gpu):
                inputs, labels = inputs.cuda(), labels.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()

            # get the output from the model
            output, h = net(inputs, h)

            # calculate the loss and perform backprop
            loss = criterion(output.squeeze(), labels.float())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            optimizer.step()

            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for inputs, labels in valid_loader:

                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])

                    if(train_on_gpu):
                        inputs, labels = inputs.cuda(), labels.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output.squeeze(), labels.float())

                    val_losses.append(val_loss.item())

                net.train()
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.6f}...".format(loss.item()),
                      "Val Loss: {:.6f}".format(np.mean(val_losses)))
    


train_IMDB1(4)

IndentationError: expected an indented block (<ipython-input-7-7f57bd76a2d0>, line 55)