In [None]:
# import modules
import numpy as np
from random import choice
seed = 42 # goto number
np.random.seed(seed)

from keras.datasets import mnist
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

In [None]:
# load data and split it into training, dev, and test sets
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
X_train, X_dev, Y_train, Y_dev = train_test_split(X_train, Y_train, test_size=1/6, random_state=seed)

# preprocess data
num_of_pixels = X_train.shape[1]*X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], num_of_pixels).astype('float32')
X_dev = X_dev.reshape(X_dev.shape[0], num_of_pixels).astype('float32')
X_test = X_test.reshape(X_test.shape[0], num_of_pixels).astype('float32')
X_train = X_train / 255
X_dev = X_dev / 255
X_test = X_test / 255

Y_train = np_utils.to_categorical(Y_train)
Y_dev = np_utils.to_categorical(Y_dev)
Y_test = np_utils.to_categorical(Y_test)
num_classes = Y_test.shape[1]

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [None]:
# create hyperparameter grid

batch_size = 50
epochs = 10
neurons = np.arange(30,151)
layers = np.arange(2,5)
loss = ['categorical_crossentropy','categorical_hinge']
opt = ['sgd','rmsprop','adagrad','adadelta','adam','adamax','nadam']
act = ['relu','sigmoid']
number_of_models = len(neurons)*len(layers)*len(loss)*len(opt)*len(act)

number_of_models # way too many to test, so we will take random subset of initial population to try

10164

In [None]:
# create initial population
pop_size = int(number_of_models / 100)
ch = np.random.choice # here for abbrev. purposes
pop = [(ch(neurons),ch(layers),ch(loss),ch(opt),ch(act)) for i in range(pop_size)]

In [None]:
# Find most fit model using Genetic Algorithm

# translate hyperparameter tuple to actual model
def model_from_hyper(hyperparameters):
  neurons,layers,loss,opt,act = hyperparameters
  
  # first split neurons-num_classes up among layers-1
  layers -= 1
  neurons -= num_classes - layers # subtract by layers now for zero check later
  seperator = [0]*neurons + [1]*(layers-1)
  np.random.shuffle(seperator)
  neuron_list = [0]*layers
  i = 0
  for n in seperator:
    if not n:
      neuron_list[i] += 1
    else:
      i += 1
  neuron_list = np.sort(neuron_list)[::-1]
  
  # make sure there is at least 1 neuron per layer
  model = Sequential()
  model.add(Dense(neuron_list[0]+1,input_dim=num_of_pixels,activation=act))
  for l in range(1,layers-1):
    model.add(Dense(neuron_list[l]+1,activation=act))
  model.add(Dense(num_classes, activation='softmax'))
  model.compile(loss=loss, optimizer=opt, metrics=['accuracy'])
  
  return model

# here to save time
acc_dict = {}

# we will use accuracy as our fitness metric
def fitness(individual):
  # get pre-saved accuracy if available
  acc = acc_dict.get(individual)
  
  if not acc:
    model = model_from_hyper(individual)
    model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, verbose=0)
  
    # save accuracy
    acc = model.evaluate(X_dev, Y_dev, verbose=0)[1]
    acc_dict[individual] = acc
  
  return acc

# child will have the 'average' hyperparameter of parents
def crossover(p1,p2):
  neurons = int((p1[0]+p2[0])/2)
  layers = int((p1[1]+p2[1])/2)
  loss = np.random.choice((p1[2],p2[2]))
  opt = np.random.choice((p1[3],p2[3]))
  act = np.random.choice((p1[4],p2[4]))
  
  return (neurons, layers, loss, opt, act)

# adds 'random' noise to hyperparameters of child population
def mutation(population, child_indices, mutation_frac):
  child_pop_size = len(child_indices)
  np.random.shuffle(child_indices)
  
  # mutate fraction of children
  for i in child_indices[:int(child_pop_size*mutation_frac)]:
    new_neurons = int(population[i][0] + np.random.random())
    new_layers = int(population[i][1] + np.random.random())
    new_loss = population[i][2]
    if np.random.random() > 0.5:
      new_loss = loss[len(loss) - loss.index(new_loss) - 1]
    new_opt = population[i][3]
    if np.random.random() > 0.5:
      new_opt = opt[len(opt) - opt.index(new_opt) - 1]
    new_act = population[i][4]
    if np.random.random() > 0.5:
      new_act = act[len(act) - act.index(new_act) - 1]
    population[i] = (new_neurons,new_layers,new_loss,new_opt,new_act)
      
  return population

# the fun part
def GA(population, tournament_size=10, max_generation_num=10,
       mutation_frac=0.25, survivers=.9):
  for i in range(max_generation_num):
    # 90% of the population survives
    indices = np.arange(pop_size)
    np.random.shuffle(indices)
    indices = indices[:int(pop_size*(1-survivers))]
    for j in indices:
      tournament = [choice(population) for i in range(tournament_size)]
      p1 = max(tournament, key=fitness)
      
      tournament = [choice(population) for i in range(tournament_size)]
      p2 = max(tournament, key=fitness)
      
      population[j] = crossover(p1,p2) #replace dead individual
    # only mutate new individuals
    population = mutation(population, indices, mutation_frac)
  return population

king_hyperparameters = max(GA(pop), key=fitness)
king = model_from_hyper(king_hyperparameters)

In [None]:
# test the king
king.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, verbose=0)
scores = king.evaluate(X_test, Y_test, verbose=0)
print("Accuracy of the King: %.2f%%" % (scores[1]*100))
print("Hyperparameters of the King: ", king_hyperparameters)

Accuracy of the King: 97.68%
Hyperparameters of the King:  (128, 2, 'categorical_crossentropy', 'adam', 'relu')
