# Setup

In [2]:
%matplotlib inline

import torch
import random
import numpy as np
import pandas as pd
import seaborn as sns

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

from torch.autograd import Variable
from torchvision import datasets, transforms

sns.set()

In [None]:
torch.nn.

# Hyperparameter space

In [3]:
LAYER_SPACE = {
    'nb_units':{'lb': 128, 'ub':1024, 'mutate': 0.15},
    'dropout_rate': {'lb': 0.0, 'ub': 0.7, 'mutate': 0.2},
    'activation': {'func': ['linear','tanh','relu','sigmoid','elu'], 'mutate':0.2}
}

In [4]:
NET_SPACE = {
    'nb_layers': {'lb': 1, 'ub': 3, 'mutate': 0.15},
    'lr': {'lb': 0.001, 'ub':0.1, 'mutate': 0.15},
    'weight_decay': {'lb': 0.00001, 'ub': 0.0004, 'mutate':0.2},
    'optimizer': {'func': ['sgd', 'adam', 'adadelta','rmsprop'], 'mutate': 0.2}
}

# Randomise network

In [5]:
def random_value(space):
    """Returns random value from space."""
    
    val = None
    
    if 'func' in space: #randomise optimiser or activation function
        val = random.sample(space['func'], 1)[0] 
    
    elif isinstance(space['lb'], int): #randomise number of units or layers
        val = random.randint(space['lb'], space['ub'])
    
    else: #randomise percentages, i.e. dropout rates or weight decay
        val = random.random() * (space['ub'] - space['lb']) + space['lb']
    
    return val

In [6]:
def randomize_network(layer_space, net_space): 
    """Returns a randomised neural network"""
    net = {}
    
    for key in net_space.keys():
        net[key] = random_value(net_space[key])
        
    layers = []
    
    for i in range(net['nb_layers']):
        layer = {}
        for key in layer_space.keys():
            layer[key] = random_value(layer_space[key])
        layers.append(layer)
        net['layers'] = layers
        
    return net

In [7]:
randomize_network(LAYER_SPACE, NET_SPACE)

{'layers': [{'activation': 'tanh',
   'dropout_rate': 0.055676525173730444,
   'nb_units': 132},
  {'activation': 'relu', 'dropout_rate': 0.4407416076783757, 'nb_units': 668},
  {'activation': 'elu', 'dropout_rate': 0.3728920305155153, 'nb_units': 982}],
 'lr': 0.0854667753745106,
 'nb_layers': 3,
 'optimizer': 'adam',
 'weight_decay': 9.064557628116618e-05}

# Mutate network

In [42]:
def mutate_net(net, layer_space, net_space):
    
    # mutate optimizer
    for k in ['lr', 'weight_decay', 'optimizer']:
        if random.random() < net_space[k]['mutate']:
            net[k] = random_value(net_space[k])
    
    
    # mutate layers
    for layer in net['layers']:
        for k in layer_space.keys():
            if random.random() < layer_space[k]['mutate']:
                layer[k] = random_value(layer_space[k])
                
                
    # mutate number of layers -- 50% add 50% remove
    if random.random() < net_space['nb_layers']['mutate']:
        if net['nb_layers'] <= net_space['nb_layers']['ub']:
            if random.random()< 0.5 and \
            net['nb_layers'] < net_space['nb_layers']['ub']:
                layer = {}
                for key in layer_space.keys():
                    layer[key] = random_value(layer_space[key])
                net['layers'].append(layer)      
            else:
                if net['nb_layers'] > 1:
                    net['layers'].pop()

                
            # value & id update
            net['nb_layers'] = len(net['layers'])         
            
    return net

# NetBuilder

In [77]:
class Flatten(nn.Module):
    """Flattens input to vector size (batchsize, 1)
    (for use in NetFromBuildInfo)."""

    def __init__(self):
        super(Flatten, self).__init__()

    def forward(self, x):
        return x.view(x.size(0), -1)

In [197]:
class NetFromBuildInfo(nn.Module):
    def __init__(self, build_info):
        super(NetFromBuildInfo, self).__init__()
        
        self.activation_dict = {
            'tanh': nn.Tanh(),
            'relu': nn.ReLU(),
            'sigmoid': nn.Sigmoid(),
            'elu': nn.ELU()
            }

        #NETWORK DEFINITION
        
        self.previous_units = 28 * 28 #MNIST shape
        
        self.model = nn.Sequential()
        self.model.add_module('flatten', Flatten())
         
        for i, layer_info in enumerate(build_info['layers']):
            i = str(i)
            
            self.model.add_module(
                'fc_' + i,
                nn.Linear(previous_units, layer_info['nb_units'])
                )
            
            self.previous_units = layer_info['nb_units']
            
            self.model.add_module(
                'dropout_' + i,
                nn.Dropout(p=layer_info['dropout_rate'])
                )
            if layer_info['activation'] == 'linear':
                continue #linear activation is identity function
            self.model.add_module(
                layer_info['activation']+ i,
                self.activation_dict[layer_info['activation']])

        self.model.add_module(
            'logits',
            nn.Linear(previous_units, 10) #10 MNIST classes
            )
        
        
        ##OPTIMIZER

        self.opt_args = {#'params': self.model.parameters(),
                 'weight_decay': build_info['weight_decay'],
                 'lr': build_info['lr']
                 }
        
        self.optimizer_dict = {
            'adam': optim.Adam(self.model.parameters(),**self.opt_args),
            'rmsprop': optim.RMSprop(self.model.parameters(),**self.opt_args),
            'adadelta':optim.Adadelta(self.model.parameters(),**self.opt_args),
            'sgd': optim.SGD(self.model.parameters(), **self.opt_args, momentum=0.9) #momentum to train faster
            }

        self.optimizer = self.optimizer_dict[build_info['optimizer']]
        
        
    def forward(self, x):
        x = self.model(x)
        return F.log_softmax(x, dim=1)

In [210]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [212]:
count_parameters(test) #trainable params

1559583

# Evolution optimiser

In [None]:
from operator import itemgetter
import copy
from worker import CustomWorker, Scheduler
        

class TournamentOptimizer:
    """Define a tournament play selection process."""

    def __init__(self, population_sz, init_fn, mutate_fn, nb_workers=2):
        
        self.init_fn = init_fn
        self.mutate_fn = mutate_fn
        self.nb_workers = nb_workers
        
        # population
        self.population_sz = population_sz
        self.population = [init_fn() for i in range(population_sz)]        
        self.evaluations = np.zeros(population_sz)
        
        # book keeping
        self.elite = []
        self.stats = []
        self.history = []

    def step(self):
        """Tournament evolution step."""
        print('\nPopulation sample:')
        for i in range(0,self.population_sz,2):
            print(self.population[i]['nb_layers'],
                  self.population[i]['layers'][0]['nb_units'])
        self.evaluate()
        children = []
        print('\nPopulation mean:{} max:{}'.format(
            np.mean(self.evaluations), np.max(self.evaluations)))
        n_elite = 2
        sorted_pop = np.argsort(self.evaluations)[::-1]
        elite = sorted_pop[:n_elite]
        
        # print top@n_elite scores
        # elites always included in the next population
        self.elite = []
        print('\nTop performers:')
        for i,e in enumerate(elite):
            self.elite.append((self.evaluations[e], self.population[e]))    
            print("{}-score:{}".format( str(i), self.evaluations[e]))   
            children.append(self.population[e])
        # tournament probabilities:
        # first p
        # second p*(1-p)
        # third p*((1-p)^2)
        # etc...
        p = 0.85 # winner probability 
        tournament_size = 3
        probs = [p*((1-p)**i) for i in range(tournament_size-1)]
        # a little trick to certify that probs is adding up to 1.0
        probs.append(1-np.sum(probs))
        
        while len(children) < self.population_sz:
            pop = range(len(self.population))
            sel_k = random.sample(pop, k=tournament_size)
            fitness_k = list(np.array(self.evaluations)[sel_k])
            selected = zip(sel_k, fitness_k)
            rank = sorted(selected, key=itemgetter(1), reverse=True)
            pick = np.random.choice(tournament_size, size=1, p=probs)[0]
            best = rank[pick][0]
            model = self.mutate_fn(self.population[best])
            children.append(model)

        self.population = children
        
        # if we want to do a completely completely random search per epoch
        # self.population = [randomize_network(bounded=False) for i in range(self.population_sz) ]

    def evaluate(self):
        """evaluate the models."""
        
        workerids = range(self.nb_workers)
        workerpool = Scheduler(workerids, self.use_cuda )
        self.population, returns = workerpool.start(self.population)

        self.evaluations = returns
        self.stats.append(copy.deepcopy(returns))
        self.history.append(copy.deepcopy(self.population)) 