[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Neuvork/Engeneering-thesis/blob/master/results.ipynb)

# This notebook is only example of use of our library to perform evolution on neural networks

In [1]:
import matplotlib.pyplot as plt
from Engeneeringthesis.kernels import *
import numpy as np
import time
from IPython.display import clear_output
import copy
import cupy as cp
import tensorflow_datasets as tfds
import tensorflow as tf
from Engeneeringthesis.NeuralNetwork import Neural_Network
from Engeneeringthesis.Cma_es import CMA_ES
from Engeneeringthesis.Caged_CMA_ES import Caged_CMA_ES
from Engeneeringthesis.Logs import Logs
from tensorflow.keras.models import Model
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import mnist

In [2]:
mempool = cp.get_default_memory_pool()
pinned_mempool = cp.get_default_pinned_memory_pool()
def cuda_memory_clear():
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()          

## Dataset preparation

In [3]:
(ds_train, ds_test), ds_info = tfds.load(
    'cifar10',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)


def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., label

ds_train = ds_train.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(32)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)

ds_test = ds_test.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(32)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)


In [None]:
#ensuring that algorithm won't see any images that tensorflow haven't seen
TRAINING_SIZE = 0 #global value for division in evaluate function
images = []
labels = []

for batch in ds_train:
  for image, label in zip(batch[0], batch[1]):
    #for rgb images:
    temp = image.numpy().copy()
    image = image.numpy().reshape(image.shape[2], image.shape[0], image.shape[1])
    if image.shape[0] == 3:
      image[0, :, :] = temp[:,:,0]
      image[1, :, :] = temp[:,:,1]
      image[2, :, :] = temp[:,:,2]
    images.append(cp.array(image, dtype =cp.float32))
    labels.append(label)
    TRAINING_SIZE += 1

for batch in ds_test:
  for image, label in zip(batch[0], batch[1]):
    #for rgb images:
    temp = image.numpy().copy()
    image = image.numpy().reshape(image.shape[2], image.shape[0], image.shape[1])
    if image.shape[0] == 3:
      image[0, :, :] = temp[:,:,0]
      image[1, :, :] = temp[:,:,1]
      image[2, :, :] = temp[:,:,2]
    images.append(cp.array(image, dtype =cp.float32))
    labels.append(label)
images = cp.array(images, dtype = cp.float32)
labels = cp.array(labels)
print(images.shape)
train_ds_mnist = {"image" : images, "label" : labels }

## Training network with TensorFlow
Of course there is no need to pretrain network, epecially when evaluate function cannot be derivated it is impoosible to perform previous training

In [None]:
model = tf.keras.models.Sequential([                    
  tf.keras.layers.Conv2D(filters= 4, kernel_size = 3, activation='tanh', use_bias=False),
  tf.keras.layers.MaxPool2D(),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(10, use_bias=False)
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(0.0008),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

model.fit(
    ds_train,
    epochs=20,
    validation_data=ds_test,
)

## Alocating population
We tested our algorithm on network with no bias after convolutional layer and with structirue of:



1.   Convolutional layer with 4 filters
2.   Dense layer

### Important
Do not use copy conv or copy linear in Neural Networks made of combinations different than conv and linear or just linear, these parsers are not adapted to copy from TensorFlow different kinds of neural networks. Also set use_bias to False when creating population.


In [None]:
POPULATION_SIZE = 2048
input_size = train_ds_mnist['image'][0].shape
population = Neural_Network(POPULATION_SIZE,  input_size, 
                            [
                             ['conv', (4, 3, 3), [1.,1.]],
                             ['linear', 10, [1.,1.]]
                             ],
                             use_bias=False)

## Measure of individuals in population for algorithm

In [7]:
def evaluate_population(population, train_ds):
    global TRAINING_SIZE
    create_input_time = 0
    preds_time = 0
    points_count_time = 0
    j  = 0
    train_dataset = {'image' : train_ds['image'][:TRAINING_SIZE],
                      'label' : train_ds['label'][:TRAINING_SIZE] }
    validation_dataset = {'image' : train_ds['image'][TRAINING_SIZE:],
                      'label' : train_ds['label'][TRAINING_SIZE:] }
    train_scores = cp.zeros(population.population_size, dtype = cp.uint32)
    i = 0
    for image, label in zip(cp.array(train_dataset['image']), cp.array(train_dataset['label'])):
        if i % 10 == 0:
            clear_output()
            print("Train dataset done in: " + str(i/len(train_dataset['image'])) + "%")
        i+=1
        start = time.time()
        create_input_time += time.time() - start
        start = time.time()
        preds = population.forward(image)
        preds_time += time.time() - start
        start = time.time()
        train_scores += preds == label
        points_count_time += time.time() - start
        j += 1

    validation_scores = cp.zeros(population.population_size, dtype = cp.uint32)
    i = 0
    for image, label in zip(cp.array(validation_dataset['image']), cp.array(validation_dataset['label'])):
        if i % 10 == 0:
            clear_output()
            print("Validation dataset done in: " + str(i/len(train_dataset['image'])) + "%")
        i+=1
        start = time.time()
        create_input_time += time.time() - start
        start = time.time()
        preds = population.forward(image)
        preds_time += time.time() - start
        start = time.time()
        validation_scores += preds == label
        points_count_time += time.time() - start
        j += 1
      

    return train_scores/len(train_dataset['image']), validation_scores/len(validation_dataset['image'])

# Parsers of layers from TensorFlow to population
 Due to differences in implenetation of convolutional layers we needed to implement methods that will transfer layers from one object to antoher, for tests we used only one convolutional layer with 4 filters and dense layer.

In [8]:
def copy_conv_layer(model_layer_num, population_layer_num, individual_num=0):
  global model
  global population
  model_layer = model.layers[model_layer_num].weights[0]
  for output_filter_number in range(population.layers[population_layer_num][1].shape[1]):
    for input_filter_number in range(population.layers[population_layer_num][1].shape[2]):
      population.layers[population_layer_num][1][individual_num, output_filter_number, input_filter_number, :, :] = cp.array(model_layer[:, :, input_filter_number, output_filter_number].numpy(), dtype=cp.float32)

In [9]:
def inv(perm):
    inverse = [0] * len(perm)
    for i, p in enumerate(perm):
        inverse[p] = i
    return inverse


def copy_linear_layer(model_layer_num, population_layer_num, individual_num=0):
  global model
  global population
  model_layer = model.layers[model_layer_num].weights[0].numpy()
  population_layer = cp.zeros(shape=model_layer.shape, dtype = cp.float32)
  prev_shape = population.input_sizes[population_layer_num -1]
  s1 = prev_shape[2]
  s2 = prev_shape[3]
  permutation = np.zeros(shape = (population_layer.shape[0]), dtype = np.int32)
  for i in range(prev_shape[1]): 
    for j in range(prev_shape[2]): 
      for k in range(prev_shape[3]): 
        permutation[j * prev_shape[2] * prev_shape[1] + k * prev_shape[1] + i] = i*s1*s2 + j*s2 + k


  inverted = inv(permutation)
  for i in range(len(inverted)):
    population_layer[i,:] = cp.array(model_layer[inverted[i], :], dtype=cp.float32)

  population.layers[population_layer_num][1][individual_num] = population_layer

In [10]:
for i in range(20):
  copy_conv_layer(0, 0, i)
  copy_linear_layer(3, 2, i)

## Ensuring that results in both models are the same (test of parsers)

In [None]:
model.evaluate(
    ds_train
)

In [None]:
population_score = evaluate_population(population,train_ds_mnist)
cp.max(population_score[0])

### Important
Sometimes pyplot change scale of plots, so for example if after few iterations covariance matrix values are from range $[-0.0001,0.0001]$ and then they suddenly jump to range $[-10,10]$ check if scale did not change.

In [None]:
logs = Logs([('matrix','covariance'),('population', 'population'),('number','sigma'),
                      ('vector','isotropic'),('vector','anisotropic'),('vector','mean'),
                      ('number','best-train-score'), ('number','best-validation-score'),
                       ('vector', 'mean_act - mena_prev')])

## Racing with Tensorflow

### Important
We do not recommend playing with hyperparameters of CMA_ES init and fit function call, they are picked by us to show good results, and are also found by running numerous experiments so we can not promise that picking other hyperparameters will also yield good results.

In [None]:
classifier = CMA_ES(population, .01, evaluate_population, logs, hp_loops_number=4)

In [None]:
classifier.fit(train_ds_mnist, POPULATION_SIZE//64, POPULATION_SIZE, 100)

## MNIST from zero

In [13]:
train_ds_mnist = tfds.load("mnist", split = "train", shuffle_files=True, batch_size=-1)
test_ds_mnist = tfds.load("mnist", split = "test", shuffle_files=True, batch_size=-1)

train_ds_mnist = tfds.as_numpy(train_ds_mnist)
test_ds_mnist = tfds.as_numpy(test_ds_mnist)

train_ds_mnist = {"image" : cp.array(train_ds_mnist["image"]/255., dtype=cp.float32), "label" : cp.array(train_ds_mnist["label"]) }
test_ds_mnist = {"image" : cp.array(test_ds_mnist["image"]/255., dtype=cp.float32), "label" : cp.array(test_ds_mnist["label"]) }
train_ds_mnist['image'] = train_ds_mnist['image'].reshape((60000, 1, 28, 28))

In [None]:
POPULATION_SIZE = 2048
input_size = train_ds_mnist['image'][0].shape
population = Neural_Network(POPULATION_SIZE,  input_size, 
                            [
                             ['linear', 10, [1.,1.]]
                             ],
                             use_bias=False)

We do not recommend to change logs. Of course You can do it, but then You will need to change loging in CMA-ES file (inside fit function). Also please be aware that after changing file from library in colaboratory, you will need to reset runtime so this file will be correctly imported.

In [None]:
logs = Logs([('matrix','covariance'),('population', 'population'),('number','sigma'),
                      ('vector','isotropic'),('vector','anisotropic'),('vector','mean'),
                      ('number','best-train-score'), ('number','best-validation-score'),
                       ('vector', 'mean_act - mena_prev')])

In [16]:
classifier = CMA_ES(population, .11, evaluate_population, logs, param_dimensionality = 21000)

Be patient when waiting for plots, MNIST should work relatively fast, but CIFAR10 is slow. Expect that one iteration of algorithm will take few minutes. Plots are generated after each iteration, You need to scrool output down to see all of them.

In [None]:
classifier.fit(train_ds_mnist, POPULATION_SIZE//64, POPULATION_SIZE, 300)