In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from timeit import default_timer as tic

tf.logging.set_verbosity(tf.logging.ERROR)


  from ._conv import register_converters as _register_converters


In [2]:
def antifix(x):
    """Similar to numpy's fix function, but rounds away from 0"""
    return (np.sign(x) * np.ceil(np.absolute(x))).astype(int)
    

def shift(count=None):
    """Get an array of ints from a normal distribution centered around 0 with a stddev of 1
    --returns a single int rather than an array if count is None
    """
    return constrain(antifix(np.random.normal(size=count)), -3, 3)

def either(p=None):
    """Shortcut for a boolean choice with probabilities"""
    return np.random.choice(2, p=p)

def constrain(x, low, high, decimals=2):
    """Constrains the value to the given range (inclusive) and rounds to number of decimals"""
    value = None
    if x < low:
        value = low
    elif x > high:
        value = high
    else:
        value = x
    return np.around(value, decimals)
    
def distrib(x):
    """Softmax, but doubles values to increase disparity"""
    x_exp = np.exp(np.array(x) * 2)
    return x_exp / x_exp.sum()

def init_drop(p=None):
    """Return a random initial dropout"""
    return np.around(np.random.uniform(0, 0.5), 2)

activations = [None, tf.nn.tanh, tf.nn.relu, tf.nn.leaky_relu, tf.nn.sigmoid]
def init_act(p=None):
    """Return a random initial activation function"""
    return np.random.choice(np.arange(len(activations)), p=p)
    
def init_hsize():
    """Return a random initial fully-connected layer size
    values are [4,10] since hidden size will be 2^x for whatever x is returned"""
    return np.random.randint(4,10 + 1)

def init_lr():
    return constrain(np.random.exponential(scale=0.09), 0.001, 0.3, decimals=4)

In [3]:
"""V2.0 - Tree-like structure, but no branching yet"""
class Node(object):
    
    def __init__(self):
        self.child = None  # will only be 1 child for this version
        self.depth = 0

    def __iadd__(self, node):
        if self.child:
            node.depth += 1
            self.child += node
        else:
            self.child = node
        return self
    
    def get(self, depth):
        if self.child is None:
            return None
        if depth==0:
            return self.child
        else:
            return self.child.get(depth-1)
        
class DenseNode(Node):
    
    def __init__(self, config=None):
        Node.__init__(self)
        
        if config:
            self.dropout = config[0]
            self.activation_func = config[1]
            self.size = config[2]
        else:
            self.dropout = init_drop()
            self.activation_func = init_act()
            self.size = init_hsize()
        
        self.type = "Dense"
            
    def show(self):
        acts = ['Linear','Tanh','ReLU','LeakyReLU','Sigmoid']
        print("\tSize: {}\tActivation: {}\tDropout: {}".format(2**self.size, acts[self.activation_func], self.dropout))
        if(self.child):
            self.child.show()
            
    def __call__(self, x, mode):
        dense = tf.layers.dense(inputs=x, units=2**self.size, activation=activations[self.activation_func])
        if self.dropout != 0:
                dense = tf.layers.dropout(inputs=dense, rate=self.dropout, training=mode == tf.estimator.ModeKeys.TRAIN)
        if self.child:
            return self.child(dense, mode)
        return dense
    
    def trim(self, depth):
        if depth==0:
            self.child = None
        else:
            self.child.trim(depth-1)
    
    def mutate(self, mutation_rate):
        if np.random.rand() < mutation_rate:
            self.size = constrain(self.size + shift(), 4, 10)
            
        if np.random.rand() < mutation_rate:
            self.dropout = constrain(self.dropout + np.random.normal(scale=0.05), 0, 0.5)
            
        if np.random.rand() < mutation_rate:
            self.activation_func = init_act()
            
        if self.child:
            self.child.mutate(mutation_rate)
            
    def crossover(self, other, p):
        config = [self.dropout, self.activation_func, self.size]
        if either(p):
            config[0] = other.dropout
        if either(p):
            config[1] = other.activation_func
        if either(p):
            config[2] = other.size
        return config
    
    def config(self):
        return [self.dropout, self.activation_func, self.size]
    
    
class Tree(object):
    """Acts as the root as well"""
    def __init__(self, name, mutation_rate=0.1, grow_prob=0.3, shrink_prob=0.15, num_nodes=0, load=False):
        self.x = tf.placeholder(tf.float32, shape=[None, 784])
        self.child = None  # will only be 1 child for now
        self.name = name
        
        self.size = 0
        self.fitness = 0
        self.mutation_rate = mutation_rate
        self.grow_prob = grow_prob
        self.shrink_prob = shrink_prob
        self.age = 0  # Total number of epochs seen
        self.accuracy = -1
        self.loss = -1
        self.train_time = -1
        
        self.learning_rate = init_lr()#0.001

        for i in range(num_nodes):
            self += DenseNode()
            
        if load:
            self.classifier = tf.estimator.Estimator(
                model_fn=self.model_fn,
                model_dir="./"+self.name
            )
        else:
            self.classifier = tf.estimator.Estimator(
                model_fn=self.model_fn
            )
            
    def show(self):
        print("---------------------------------------------")
        print(self.name+"\tlr:{}".format(self.learning_rate))
        if(self.fitness!=0):
            print("\tFitness: {}\tAccuracy: {}\tLoss: {}".format(self.fitness, self.accuracy, self.loss))
        self.child.show()
        print("---------------------------------------------")

    def update_fitness(self):
        self.fitness = self.accuracy
    
    def name(self):
        return self.name
    
    def train(self, input_fn, verbose=False, steps=20000):
        logging_hooks = None
        if verbose:
            logged_vars = {"probabilities": "softmax_tensor"}
            logging_hooks = [tf.train.LoggingTensorHook(tensors=logged_vars, every_n_iter=2000)]

        start = tic()
        self.classifier.train(
            input_fn=input_fn,
            steps=steps,
            hooks=logging_hooks
        )
        self.train_time = tic()-start
        self.age += steps
        print("Time to train model \"{}\": {:.4f}".format(self.name, self.train_time))
        
    def test(self, input_fn):
        results = self.classifier.evaluate(input_fn)
        self.accuracy = results['accuracy']
        self.age = results['global_step']
        self.loss = results['loss']
        self.update_fitness()
        return results
        
    def model_fn(self, features, labels, mode):
        input_layer = tf.reshape(features["x"], [-1, 784])
        
        hidden = self.child(input_layer, mode)
        
        logits =tf.layers.dense(inputs=hidden, units=10)
        
        predictions = {
            "classes": tf.argmax(input=logits, axis=1),
            "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
        }
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
        
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
            train_op = optimizer.minimize(
                loss=loss,
                global_step=tf.train.get_global_step()
            )
            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
        
        eval_metric_ops = {
            "accuracy":tf.metrics.accuracy(
                labels=labels,
                predictions=predictions['classes']
            )
        }
        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            eval_metric_ops=eval_metric_ops
        )
        
    def __call__(self):
        result = self.children[0](self.x)
        logits = tf.layers.dense(inputs=result, units=10, name="logits")
        
        classes = tf.argmax(input=logits, axis=1)
        probs = tf.nn.softmax(logits, name="softmax_tensor")
        
    def __iadd__(self, node):  # +=
        self.size += 1
        if self.child:
            node.depth += 1
            self.child += node
        else:
            self.child = node
        return self
    
    def get_mode(self):
        return self.mode
    
    def __getitem__(self, depth):
        if self.child is None:
            raise IndexError("There is no layer at depth {}.".format(depth))
        if depth==0:
            return self.child
        else:
            result = self.child.get(depth-1)
            if result is None:
                raise IndexError("There is no layer at depth {}.".format(depth))
            return result
        
    def mutate(self):
        if np.random.rand() < self.mutation_rate:
            self.learning_rate = constrain(np.exp(np.log(self.learning_rate) + shift()), 0.0001, 0.5, decimals=4)
        if either(np.array([1-self.shrink_prob, self.shrink_prob])):
            self.child.trim(self.size-1)
        if either(np.array([1-self.grow_prob, self.grow_prob])):
            self += DenseNode()
        if self.child:
            self.child.mutate(self.mutation_rate)
            
    def crossover(self, other, name, mode='even'):
        """Crossover to get offspring of two trees
        mode -- either even or biased. 
        even to have equal chance of using either parent for each trait or 
        biased to weight the decision based on relative fitness
        """
        offspring = Tree(name, self.mutation_rate)
        if mode=='even':
            p = None
        else:
            total_fit = self.fitness + other.fitness
            p = np.array([self.fitness / total_fit, other.fitness / total_fit])

        offspring.learning_rate = other.learning_rate if either(p) else self.learning_rate

        for i in range(min(self.size, other.size)):
            offspring += DenseNode(self[i].crossover(other[i], p))
        tail = either(p)
        if other.size > self.size and tail:
            for i in range(self.size, other.size):
                offspring += DenseNode(other[i].config())
        elif self.size > other.size and not tail:
            for i in range(other.size, self.size):
                offspring += DenseNode(self[i].config())
        return offspring
        
        
        

In [4]:
class Population(object):
    def __init__(self, pop_size=10, cross='even', mutation_rate=0.001):
        self.adult = 20000
        self.elder = 40000
        self.era_len = 5000
        self.elites = 1
        self.deaths = 3

        mnist = tf.contrib.learn.datasets.load_dataset("mnist")
        train_data = mnist.train.images
        train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
        valid_data = mnist.test.images[:int(mnist.test.images.shape[0]*0.5)]
        test_data = mnist.test.images[int(mnist.test.images.shape[0]*0.5):]
        valid_labels = np.asarray(mnist.test.labels[:int(mnist.test.labels.shape[0]*0.5)], dtype=np.int32)
        test_labels = np.asarray(mnist.test.labels[int(mnist.test.labels.shape[0]*0.5):], dtype=np.int32)

        # Input for training
        self.train_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x":train_data},
            y=train_labels,
            batch_size=100,
            num_epochs=None,
            shuffle=True
        )

        # Input to get fitness of each individual
        self.test_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x":valid_data},
            y=valid_labels,
            num_epochs=1,
            shuffle=False
        )

        # Input for final testing after evolution
        self.results_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x":test_data},
            y=test_labels,
            num_epochs=1,
            shuffle=False
        )
        
        self.pop = []
        for i in range(pop_size):
            new_tree = Tree('g0-'+str(i), num_nodes=1)
            new_tree.show()
            self.pop.append(new_tree)

    def evolve(self, eras):
        history = []
        start = tic()
        for e in range(eras):
            print("Starting Era {}...".format(e))
            era_hist = []
            ofage = 0
            for tree in self.pop:
                if tree.age < self.elder:
                    tree.train(self.train_fn, steps=self.era_len)
                if tree.age >= self.adult:
                    ofage+=1
                    tree.test(self.test_fn)
                    era_hist.append(tree.fitness)
            if ofage >= self.elites+self.deaths:
                self.pop = sorted(self.pop, key=lambda x:x.fitness, reverse=True)
                best = [self.pop[0], self.pop[1]]
                count = 0
                for i in range(len(self.pop)-1, -1, -1):
                    if self.pop[i].age >= self.adult:
                        self.pop[i] = best[0].crossover(best[1], 'g{}-{}'.format(e, count))
                        self.pop[i].mutate()
                        self.pop[i].show()
                        count+=1
                    if count >= self.deaths:
                        break
            if self.pop[0].fitness > 0:
                print("Best fitness for era {}: {:.4f}".format(e, best[0].fitness))
            history.append(era_hist)
        self.pop[0].test(self.results_fn)
        print("Final best fitness after {} eras: {} = {:.4f}".format(eras, self.pop[0].name, self.pop[0].fitness))
        print("Total training time: {:.4f}".format(tic()-start))
        for i in range(len(self.pop)):
            print("Tree #{}".format(i))
            self.pop[i].show()
        return history

In [None]:
pool = Population()
hist = pool.evolve(20)

Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz
---------------------------------------------
g0-0	lr:0.0638
	Size: 16	Activation: Tanh	Dropout: 0.04
---------------------------------------------
---------------------------------------------
g0-1	lr:0.2652
	Size: 256	Activation: Sigmoid	Dropout: 0.13
---------------------------------------------
---------------------------------------------
g0-2	lr:0.0418
	Size: 32	Activation: Tanh	Dropout: 0.08
---------------------------------------------
---------------------------------------------
g0-3	lr:0.0104
	Size: 16	Activation: Linear	Dropout: 0.01
---------------------------------------------
---------------------------------------------
g0-4	lr:0.2952
	Size: 32	Activation: Tanh	Dropout: 0.43
---------------------------------------------
---------------------------------------------
g0-5	lr:0.1026

Time to train model "g5-0": 9.5093
Time to train model "g9-2": 9.2941
Time to train model "g9-1": 8.5241
Time to train model "g9-0": 9.5736
Time to train model "g8-2": 8.6359
Time to train model "g8-1": 5.8188
Time to train model "g8-0": 6.7320
Time to train model "g7-2": 6.6860
Time to train model "g7-1": 9.0949
Time to train model "g7-0": 5.6355
Best fitness for era 10: 0.9730
Starting Era 11...
Time to train model "g5-0": 9.6515
Time to train model "g9-2": 9.1211


Growth rate = 0.1, Shrink Rate = 0.1, Mutation Rate = 0.001

Final best fitness after 20 eras: g13-1 0.9902
Total training time: 1671.30
    Tree #0
    ---------------------------------------------
    g13-1	lr:0.1779
        Fitness: 0.9901	Accuracy: 0.9902	Loss: 0.0593
        Size: 512	Activation: Tanh	Dropout: 0.01
        Size: 32	Activation: LeakyReLU	Dropout: 0.5
    ---------------------------------------------