In [1]:
import tensorflow as tf
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd

from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model, clone_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import SGD

In [2]:
# XOR problem

x = [[0,0], [1,0], [0,1], [1,1]]
y = [0,1,1,0]

x = np.asarray(x)
y = np.asarray(y)
y = to_categorical(y)

x, y

(array([[0, 0],
        [1, 0],
        [0, 1],
        [1, 1]]), array([[1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.]], dtype=float32))

In [None]:
# ===== Functions =====

In [3]:
def create_random_model(): # change architecture manually
    x_in = Input(shape=(2,))
    s = Dense(5, activation='relu')(x_in)
    s = Dense(5, activation='relu')(s)
    s = Dense(2, activation='softmax')(s)

    model = Model(inputs=x_in, outputs=s)
    return model

In [4]:
def perturb_weights(weights, mag=0.5):
    for i in range(len(weights)):
        random_perturbation = (np.random.rand(*weights[i].shape) - 0.5)*2*mag
        weights[i] += random_perturbation
    return weights

In [5]:
def find_losses(models): # list/array of models,
    losses = []

    for i in range(len(models)):
        y_pred = models[i].predict(x)
        loss = categorical_crossentropy(y, y_pred, label_smoothing=0.)
        losses.append(loss.numpy())

    l = np.array(losses)
    l = np.sum(l, axis=1)
    return l   # numpy array

In [6]:
def find_best_models(models, num=5): # list/array of models, int
    l = find_losses(models)
    model_scores = pd.DataFrame(data={"Model": models, "Loss": l})
    top_performers = model_scores.sort_values("Loss").index[0:num]
    
    print("best loss: %f" %(find_losses([models[top_performers[0]]])))
    
    top_models = []
    for i in range(len(models)):
        if i in top_performers:
            top_models.append(models[i])
            
    return top_models  # list of models

In [7]:
def mutate_models(top_models, multiplicity, mag=0.005): # list of models, int, float
    new_models = []
    for i in range(len(top_models)):
        for _ in range(multiplicity):
            new_model = clone_model(top_models[i])
            w = perturb_weights(top_models[i].get_weights(), mag=mag)
            new_model.set_weights(w)
            new_models.append(new_model)

    return new_models  # list of models

In [None]:
# ===== Reference model w/ gradient descent =====

In [9]:
reference_model = create_random_model()
optim = SGD(lr=0.05)
reference_model.compile(optimizer=optim, loss='categorical_crossentropy')
reference_model.fit(x,y, epochs=500, batch_size=10, verbose=False)

<tensorflow.python.keras.callbacks.History at 0x1fa9d48edd8>

In [10]:
reference_model.predict(x)

array([[0.97700924, 0.02299074],
       [0.00708226, 0.9929178 ],
       [0.00915171, 0.9908483 ],
       [0.9770055 , 0.02299451]], dtype=float32)

In [11]:
find_losses([reference_model])

array([0.06282344], dtype=float32)

In [12]:
mutated_reference = mutate_models([reference_model], 20, mag=0.005)

In [13]:
find_losses(mutated_reference)

array([0.06258045, 0.06443436, 0.06343748, 0.06387063, 0.06430197,
       0.0642468 , 0.06339432, 0.063348  , 0.06574213, 0.06341061,
       0.06344171, 0.0634529 , 0.06316434, 0.06321329, 0.06371651,
       0.06334789, 0.06403564, 0.06209774, 0.06423651, 0.0633956 ],
      dtype=float32)

In [None]:
# ===== Models de novo =====

In [16]:
models = [create_random_model() for _ in range(50)]

In [17]:
history = []

for i in range(25):
    best = find_best_models(models)
    
    history.append(find_losses(best))
    models = mutate_models(best, 10, mag=0.05)

best loss: 2.646842
best loss: 2.621670
best loss: 2.560894
best loss: 2.510812
best loss: 2.431355
best loss: 2.360946
best loss: 2.311098
best loss: 2.234711
best loss: 2.210665
best loss: 2.157472
best loss: 2.096515
best loss: 2.065003
best loss: 1.955508
best loss: 1.909362
best loss: 1.869195
best loss: 1.819166
best loss: 1.788042
best loss: 1.749300
best loss: 1.710319
best loss: 1.681564
best loss: 1.640366
best loss: 1.622774
best loss: 1.604389
best loss: 1.561597
best loss: 1.494629


In [15]:
best[0].predict(x)

array([[0.6150469 , 0.38495305],
       [0.42090976, 0.57909024],
       [0.28721654, 0.7127835 ],
       [0.5790243 , 0.42097563]], dtype=float32)