In [106]:
import keras
import keras.layers as layers
import numpy as np
import copy

## Genetic Algorithm implementation

In [92]:
from typing import List, Tuple, Dict

class MLPSearchSpace:
    num_hidden_range: Tuple[int, int]
    activation_funcs: List[str]
    layer_size_range: Tuple[int, int]

    def __init__(self, num_hidden_range=[1, 4], activation_funcs=['relu', 'sigmoid', 'tanh', 'softplus', 'leaky_relu'], layer_size_range=[8, 64]):
        assert len(num_hidden_range) == 2 and isinstance(num_hidden_range[0], int) and isinstance(num_hidden_range[1], int) and num_hidden_range[0] <= num_hidden_range[1]
        assert len(activation_funcs) != 0 and all([func in ['relu', 'sigmoid', 'tanh', 'softplus', 'leaky_relu']] for func in activation_funcs)
        assert len(layer_size_range) == 2 and isinstance(layer_size_range[0], int) and isinstance(layer_size_range[1], int) and layer_size_range[0] <= layer_size_range[1]

        self.num_hidden_range = num_hidden_range
        self.activation_funcs = activation_funcs
        self.layer_size_range = layer_size_range

In [4]:
class Dataset:
    X_train: np.array
    Y_train: np.array
    X_test: np.array
    Y_test: np.array

    def __init__(self,
                 X_train: np.array,
                 Y_train: np.array,
                 X_test: np.array,
                 Y_test: np.array):
      self.X_train = X_train
      self.Y_train = Y_train
      self.X_test = X_test
      self.Y_test = Y_test

In [91]:
class MLPOptimizer:
    batch_size: int
    epochs: int
    validation_split: float
    loss: str
    optimizer: str
    metrics: List[str]

    def __init__(self, batch_size: int = 128, epochs: int = 20, validation_split: float = .1, loss: str = 'categorical_crossentropy', optimizer: str = 'adam', metrics: List[str] = ['accuracy']):
        self.batch_size = batch_size
        self.epochs = epochs
        self.validation_split = validation_split
        self.loss = loss
        self.optimizer = optimizer
        self.metrics = metrics

In [93]:
class DNN(keras.Model):
    hidden_layers: List[layers.Dense]
    output_layer: layers.Dense
    output_layer_config: Tuple[int, str]

    def __init__(self, hidden_layers, output_layer_config):
        super().__init__()
        self.hidden_layers = hidden_layers.copy()
        self.output_layer = layers.Dense(output_layer_config[0], activation=output_layer_config[1])
        self.output_layer_config = output_layer_config

    def build(self, input_shape):
        for layer in self.hidden_layers:
            layer.build(input_shape)
            input_shape = layer.compute_output_shape(input_shape)
        self.output_layer.build(input_shape)
        self.built = True

    def __copy__(self):
        copy_hidden_layers = []
        for layer in self.hidden_layers:
            copy_hidden_layers.append(layers.Dense.from_config(layer.get_config()))
        return DNN(copy_hidden_layers, self.output_layer_config)

    def call(self, inputs):
        for layer in self.hidden_layers: inputs = layer(inputs)
        return self.output_layer(inputs)

In [8]:
def softmax(arr: np.array) -> np.array:
    return np.exp(arr) / np.sum(np.exp(arr))

In [94]:
class GeneticMLP:
    _PM: float = .2

    dataset: Dataset
    population: List[DNN]
    fitness: List[float]
    input_dim: int
    output_layer_config: Tuple[int, str]
    optimizer: MLPOptimizer
    search_space: MLPSearchSpace
    population_size: int
    rnd: np.random.RandomState
    best_solution: DNN
    best_fitness: float

    def _generate_model(self) -> DNN:
        num_hidden = self.rnd.randint(*self.search_space.num_hidden_range)
        hidden_layers = []
        for layer_idx in range(num_hidden):
            layer_size = self.rnd.randint(*self.search_space.layer_size_range)
            activation = self.rnd.choice(self.search_space.activation_funcs)
            hidden_layers.append(layers.Dense(layer_size, activation=activation))
        return DNN(hidden_layers=hidden_layers, output_layer_config=self.output_layer_config)

    def _cross(self, model1: DNN, model2: DNN) -> DNN:
        child: DNN = copy.copy(model1)
        for i, _ in enumerate(model1.hidden_layers):
            if self.rnd.random() < .5:
                j: int = self.rnd.randint(0, len(model2.hidden_layers))
                child.hidden_layers[i] = model2.hidden_layers[j]
        return child

    def _mutate(self, model: DNN) -> None:
        for i, layer in enumerate(model.hidden_layers):
            if (self.rnd.random() < self._PM):
                new_activation: str = rnd.choice(self.search_space.activation_funcs)
                cfg = layer.get_config()
                cfg['activation'] = new_activation
                model.hidden_layers[i] = layers.Dense.from_config(cfg)

    def evaluate(self, population: List[DNN]) -> List[int]:
        fitness = [0] * len(population)
        for i, model in enumerate(population):
            eval_model = copy.copy(model)
            eval_model.compile(loss=self.optimizer.loss, optimizer=self.optimizer.optimizer, metrics=self.optimizer.metrics)
            eval_model.fit(self.dataset.X_train, self.dataset.Y_train, batch_size=self.optimizer.batch_size, epochs=self.optimizer.epochs, validation_split=self.optimizer.validation_split, verbose=False)
            _, test_accuracy, *_ = eval_model.evaluate(self.dataset.X_test, self.dataset.Y_test, verbose=0)
            fitness[i] = test_accuracy
        return fitness

    def generate(self) -> None:

        # Evaluate population
        self.fitness = self.evaluate(self.population)

        # Crossover
        children: List[DNN] = []
        for _ in range(self.population_size // 2):
            parent1_id, parent2_id = self.rnd.choice(list(range(self.population_size)), size=2, replace=False, p=softmax(self.fitness))
            child = self._cross(self.population[parent1_id], self.population[parent2_id])
            children.append(child)

        # Mutation
        for child in children:
            self._mutate(child)

        # Evaluate the best individual
        child_fitness = self.evaluate(children)
        self.population.extend(children)
        self.fitness.extend(child_fitness)

        best_idx = np.argmax(self.fitness)
        best_individual = self.population[best_idx]

        if self.fitness[best_idx] > self.best_fitness:
            self.best_solution = copy.copy(best_individual)
            self.best_fitness = self.fitness[best_idx]

        # Tournament selection for the next iteration
        new_population = []
        for _ in range(self.population_size):
            candidate1_id, candidate2_id = self.rnd.choice(list(range(len(self.population))), size=2, replace=False, p=softmax(self.fitness))
            winner_id = candidate1_id if (self.fitness[candidate1_id] > self.fitness[candidate2_id]) else candidate2_id
            new_population.append(self.population[winner_id])
        self.population = new_population

    def __init__(self,
                 dataset: Dataset,
                 input_dim: int = 784,
                 output_layer_config: Tuple[int, str] = (10, 'softmax'),
                 optimizer: MLPOptimizer = MLPOptimizer(),
                 search_space: MLPSearchSpace = MLPSearchSpace(),
                 population_size: int = 10,
                 rnd=np.random.RandomState()):
        self.dataset = dataset
        self.input_dim = input_dim
        self.output_layer_config = output_layer_config
        self.optimizer = optimizer
        self.search_space = search_space
        self.population_size = population_size
        self.rnd = rnd
        self.population = []
        self.fitness = [0.0] * self.population_size
        self.best_solution = None
        self.best_fitness = 0.0
        for i in range(population_size):
            model = self._generate_model()
            self.population.append(model)

    def run(self, generations: int = 1, verbose=False) -> None:
        for i in range(generations):
            print(f"Generation {i+1} started")
            self.generate()
            print(f"End of generation {i+1}. Best individual score: {self.best_fitness}.")


In [95]:
# Model / data parameters
num_classes = 10
input_shape = (784,)

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [96]:
gp = GeneticMLP(dataset=Dataset(X_train=x_train, Y_train=y_train, X_test=x_test, Y_test=y_test))

In [97]:
gp.run(generations=2, verbose=True)

Generation 1 started
End of generation 1. Best individual score: 0.9710000157356262.
Generation 2 started
End of generation 2. Best individual score: 0.9710000157356262.
Generation 3 started


KeyboardInterrupt: 

In [99]:
model = gp.best_solution
model.build((784, ))
model.summary()

In [105]:
for layer in model.hidden_layers:
    print(layer.get_config()['activation'])

relu
leaky_relu
leaky_relu


<h2> MNIST dataset </h2>

In [None]:
# Model / data parameters
num_classes = 10
input_shape = (784,)

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


<h2> Keras Sequential Layers </h2>

In [114]:
model_sequential = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Dense(45, activation='relu'),
        layers.Dense(44, activation='leaky_relu'),
        layers.Dense(59, activation='leaky_relu'),
        layers.Dense(num_classes, activation='softmax')
    ]
)
model_sequential.summary()

In [115]:
batch_size = 128
epochs = 20

model_sequential.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'recall'])

In [116]:
model_sequential.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=.1)

Epoch 1/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.7556 - loss: 0.8315 - recall: 0.6011 - val_accuracy: 0.9520 - val_loss: 0.1731 - val_recall: 0.9412
Epoch 2/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9417 - loss: 0.2043 - recall: 0.9289 - val_accuracy: 0.9608 - val_loss: 0.1433 - val_recall: 0.9537
Epoch 3/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.9539 - loss: 0.1520 - recall: 0.9471 - val_accuracy: 0.9650 - val_loss: 0.1208 - val_recall: 0.9608
Epoch 4/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.9652 - loss: 0.1180 - recall: 0.9598 - val_accuracy: 0.9677 - val_loss: 0.1157 - val_recall: 0.9638
Epoch 5/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9703 - loss: 0.0974 - recall: 0.9657 - val_accuracy: 0.9670 - val_loss: 0.1088 - val_recall: 0.9637


<keras.src.callbacks.history.History at 0x7a44ebc00390>

In [117]:
score = model_sequential.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.11296997964382172
Test accuracy: 0.9747999906539917


<h2> Python Functions Implementation </h2>

In [118]:
inputs = keras.Input(shape=input_shape)
x = layers.Dense(45, activation='relu')(inputs)
x = layers.Dense(44, activation='leaky_relu')(x)
x = layers.Dense(59, activation='leaky_relu')(x)
outputs =layers.Dense(10, activation='softmax')(x)
model_func = keras.Model(inputs=inputs, outputs=outputs)

model_func.summary()

In [119]:
batch_size = 128
epochs = 20

model_func.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'recall'])

In [120]:
model_func.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=.1)

Epoch 1/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.7657 - loss: 0.8048 - recall: 0.6115 - val_accuracy: 0.9480 - val_loss: 0.1820 - val_recall: 0.9365
Epoch 2/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9387 - loss: 0.2110 - recall: 0.9262 - val_accuracy: 0.9580 - val_loss: 0.1480 - val_recall: 0.9528
Epoch 3/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9557 - loss: 0.1505 - recall: 0.9480 - val_accuracy: 0.9637 - val_loss: 0.1315 - val_recall: 0.9577
Epoch 4/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9620 - loss: 0.1292 - recall: 0.9559 - val_accuracy: 0.9678 - val_loss: 0.1184 - val_recall: 0.9630
Epoch 5/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9702 - loss: 0.1008 - recall: 0.9656 - val_accuracy: 0.9685 - val_loss: 0.1092 - val_recall: 0.9647


<keras.src.callbacks.history.History at 0x7a44eb957290>

In [121]:
score = model_func.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.11916504055261612
Test accuracy: 0.9706000089645386


<h2> Python Class Implementation </h2>

In [122]:
class DNN(keras.Model):
    def __init__(self, input_dim=784):
        super().__init__()
        self.dense1 = layers.Dense(45, activation='relu', input_shape=(input_dim,))
        self.dense2 = layers.Dense(44, activation='leaky_relu')
        self.dense3 = layers.Dense(59, activation='leaky_relu')
        self.dense4 = layers.Dense(10, activation='softmax')

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        return x

In [123]:
model_class = DNN()
model_class.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [124]:
batch_size = 128
epochs = 15

model_class.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'recall'])

In [125]:
model_class.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=.1)

Epoch 1/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.7712 - loss: 0.7797 - recall: 0.6293 - val_accuracy: 0.9540 - val_loss: 0.1647 - val_recall: 0.9430
Epoch 2/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9425 - loss: 0.1926 - recall: 0.9325 - val_accuracy: 0.9615 - val_loss: 0.1391 - val_recall: 0.9560
Epoch 3/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9565 - loss: 0.1481 - recall: 0.9489 - val_accuracy: 0.9667 - val_loss: 0.1172 - val_recall: 0.9608
Epoch 4/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9662 - loss: 0.1175 - recall: 0.9609 - val_accuracy: 0.9643 - val_loss: 0.1151 - val_recall: 0.9607
Epoch 5/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9721 - loss: 0.0955 - recall: 0.9677 - val_accuracy: 0.9710 - val_loss: 0.1010 - val_recall: 0.9685


<keras.src.callbacks.history.History at 0x7a44e1b4c490>

In [126]:
score = model_class.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.1086619421839714
Test accuracy: 0.9700999855995178
