In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

# Setup train and test splits
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='../data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='../data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

# Get some random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)
print("Training label shape: ", labels.shape)  # (32,) -- 32 numbers (all 0-9)
print("First 5 training labels: ", labels[:5])  # [5, 0, 4, 1, 9]

# Convert to "one-hot" vectors
num_classes = 10
y_train = torch.nn.functional.one_hot(labels, num_classes=num_classes)
print(y_train[:5])

# splitting dataset for faster learning and debugging
X_train, y_train = images[:10000], y_train[:10000]
X_test, y_test = next(iter(test_loader))

# input and output shape
input_shape = X_train[1].shape
num_classes = 10
# Flatten the images
image_vector_size = 28 * 28
print(image_vector_size)
X_train = X_train.view(X_train.size(0), -1)
X_test = X_test.view(X_test.size(0), -1)
print(y_train.shape)
print(X_train.shape)

# DNA[0] = depth
# DNA[1] = neurons_per_layer
# DNA[2] = activations
# DNA[3] = optimizer
# DNA[4] = losses
DNA_parameter = [[5, 6, 7, 8, 9, 10],
                 [16, 32, 64, 128, 256, 512, 1024],
                 ["tanh", "softmax", "relu", "sigmoid", "elu", "selu", "softplus", "softsign", "hard_sigmoid", "linear"],
                 ["sgd", "rmsprop", "adagrad", "adadelta", "adam", "adamax", "nadam"],
                 ["mse_loss", "l1_loss", "cross_entropy", "nll_loss", "poisson_nll_loss", "kl_div_loss", "bce_loss", "bce_with_logits_loss"]
                ]


Training label shape:  torch.Size([32])
First 5 training labels:  tensor([4, 3, 3, 9, 5])
tensor([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]])
784
torch.Size([32, 10])
torch.Size([32, 784])


In [16]:
import torch
import numpy as np

import torch.nn as nn
import torch.optim as optim

class Network(nn.Module):
    def __init__(self, input_shape, classes, DNA_param, epochs):
        super(Network, self).__init__()
        
        self.architecture_DNA = []  # to save current parameters
        self.fitness = []
        self.acc_history = []
        self.input_shape = input_shape
        self.classes = classes
        self.epochs = epochs

        # unfold DNA_parameters:
        depth = DNA_param[0]
        neurons_per_layer = DNA_param[1]
        activations = DNA_param[2]
        optimizers = DNA_param[3]
        losses = DNA_param[4]

        layers = []
        network_depth = np.random.choice(depth)
        self.architecture_DNA.append(network_depth)

        for i in range(network_depth):
            if i == 0:
                neurons = np.random.choice(neurons_per_layer)
                activation = np.random.choice(activations)
                self.architecture_DNA.append([neurons, activation])
                layers.append(nn.Linear(self.input_shape, neurons))
                layers.append(self.get_activation(activation))
            elif i == network_depth - 1:
                activation = np.random.choice(activations)
                self.architecture_DNA.append(activation)
                layers.append(nn.Linear(neurons, self.classes))
                layers.append(self.get_activation(activation))
            else:
                neurons = np.random.choice(neurons_per_layer)
                activation = np.random.choice(activations)
                self.architecture_DNA.append([neurons, activation])
                layers.append(nn.Linear(neurons, neurons))
                layers.append(self.get_activation(activation))

        self.model = nn.Sequential(*layers)

        loss = np.random.choice(losses)
        optimizer = np.random.choice(optimizers)
        self.architecture_DNA.append([loss, optimizer])
        self.loss_fn = self.get_loss(loss)
        self.optimizer = self.get_optimizer(optimizer, self.model.parameters())

    def get_activation(self, activation):
        activations = {
            'tanh': nn.Tanh(),
            'softmax': nn.Softmax(dim=1),
            'relu': nn.ReLU(),
            'sigmoid': nn.Sigmoid(),
            'elu': nn.ELU(),
            'selu': nn.SELU(),
            'softplus': nn.Softplus(),
            'softsign': nn.Softsign(),
            'hard_sigmoid': nn.Hardsigmoid(),
            'linear': nn.Identity()
        }
        return activations[activation]

    def get_loss(self, loss):
        losses = {
            'mse_loss': nn.MSELoss(),
            'l1_loss': nn.L1Loss(),
            'cross_entropy': nn.CrossEntropyLoss(),
            'nll_loss': nn.NLLLoss(),
            'poisson_nll_loss': nn.PoissonNLLLoss(),
            'kl_div_loss': nn.KLDivLoss(),
            'bce_loss': nn.BCELoss(),
            'bce_with_logits_loss': nn.BCEWithLogitsLoss()
        }
        return losses[loss]

    def get_optimizer(self, optimizer, parameters):
        optimizers = {
            'sgd': optim.SGD(parameters, lr=0.01),
            'rmsprop': optim.RMSprop(parameters, lr=0.01),
            'adagrad': optim.Adagrad(parameters, lr=0.01),
            'adadelta': optim.Adadelta(parameters, lr=0.01),
            'adam': optim.Adam(parameters, lr=0.01),
            'adamax': optim.Adamax(parameters, lr=0.01),
            'nadam': optim.NAdam(parameters, lr=0.01)
        }
        return optimizers[optimizer]

    def create_children(self, children_DNA):
        layers = []
        children_depth = children_DNA[0]
        for i in range(children_depth):
            if i == 0:
                layers.append(nn.Linear(self.input_shape, children_DNA[1][0]))
                layers.append(self.get_activation(children_DNA[1][1]))
            elif i == children_depth - 1:
                layers.append(nn.Linear(children_DNA[i][0], self.classes))
                layers.append(self.get_activation(children_DNA[children_depth]))
            else:
                layers.append(nn.Linear(children_DNA[i][0], children_DNA[i+1][0]))
                layers.append(self.get_activation(children_DNA[i+1][1]))

        self.model = nn.Sequential(*layers)
        self.loss_fn = self.get_loss(children_DNA[-1][0])
        self.optimizer = self.get_optimizer(children_DNA[-1][1], self.model.parameters())
        self.architecture_DNA = children_DNA

    def give_fitness(self):
        return self.fitness

    def train(self, X_train, y_train):
        self.model.train()
        for epoch in range(self.epochs):
            self.optimizer.zero_grad()
            outputs = self.model(X_train)
            loss = self.loss_fn(outputs, y_train)
            loss.backward()
            self.optimizer.step()

    def test(self, X_test, y_test):
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(X_test)
            _, predicted = torch.max(outputs.data, 1)
            correct = (predicted == y_test).sum().item()
            self.fitness = correct / len(y_test)
            self.acc_history.append(self.fitness)

    def give_DNA(self):
        return self.architecture_DNA

    def architecture(self):
        print(self.model)

In [17]:
class GeneticAlgorithm:
    def __init__(self, population_size, mutation_rate, generations = 50, Epochs = 2):
        self.population_size = population_size
        self.mutation_rate = mutation_rate
        self.generations = generations
        self.training_epochs = Epochs
        self.population = None
        self.children_population_DNA = []
        self.acces = []
        self.norm_acces = []
        
    def create_population(self):
        self.population = [Network(image_vector_size, num_classes, DNA_parameter,self.training_epochs) for i in range(self.population_size)]
    
    def train_generation(self):
        for member in self.population:
                member.train()
                
    def predict(self):
        for member in self.population:
                member.test()
                self.acc.append(member.give_fitness())
    
    def normalize(self):
        sum_ = sum(self.acc)
        self.norm_acc = [i/sum_ for i in self.acc] 
        #print("\nNormalization sum: ",sum(self.norm_acc))
        #assert sum(self.norm_acc) == 1
        
    def clear_losses(self):
        self.norm_acc = []
        self.acc = []
        
    def mutate(self):
        for child_DNA in self.children_population_DNA:
            for i in range(len(child_DNA)):
                if np.random.random() < self.mutation_rate:
                    print("\nMutation!")
                    if i == 0:
                        new_depth = np.random.choice(DNA_parameter[0])
                        child_DNA[0] = new_depth
                    
                    if i == len(child_DNA)-2:
                        new_output_activation = np.random.choice(DNA_parameter[2])
                        child_DNA[-2] = new_output_activation
                    
                    if i == len(child_DNA)-1:
                        # random flip if loss or activation shall be changed
                        if np.random.random() < 0.5:
                            new_loss = np.random.choice(DNA_parameter[4])
                            child_DNA[-1][0] = new_loss
                        else:
                            new_optimizer = np.random.choice(DNA_parameter[3])
                            child_DNA[-1][1] = new_optimizer
                    if i != 0 and i !=len(child_DNA)-2 and i != len(child_DNA)-1:
                    #else:
                        # 3/2 flif if number of neurons or activation function mutates:
                        #print(child_DNA)
                        if np.random.random() < 0.33:
                            #print(child_DNA[i][1])
                            new_activation = np.random.choice(DNA_parameter[2])
                            #print(new_activation)
                            child_DNA[i][1] = new_activation
                        else:
                            #print(child_DNA[i][0])
                            new_neuron_count = np.random.choice(DNA_parameter[1])
                            child_DNA[i][0] = new_neuron_count
                            #print(new_neuron_count)
                    #print("After mutation ", child_DNA)

    def reproduction(self):
        """ 
        Reproduction through midpoint crossover method 
        """
        population_idx = [i for i in range(len(self.population))]
        for i in range(len(self.population)):
        #selects two parents probabilistic accroding to the fitness score
            if sum(self.norm_acc) != 0:
                parent1 = np.random.choice(population_idx, p = self.norm_acc)
                parent2 = np.random.choice(population_idx, p = self.norm_acc)
            else:
              # if there are no "best" parents choose randomly 
                parent1 = np.random.choice(population_idx)
                parent2 = np.random.choice(population_idx)

            # picking random midpoint for crossing over name/DNA
            parent1_DNA = self.population[parent1].give_DNA()
            parent2_DNA = self.population[parent2].give_DNA()
            #print(parent1_DNA)
            
            mid_point_1 = np.random.choice([i for i in range(2,len(parent1_DNA)-2)])
            mid_point_2 = np.random.choice([i for i in range(2,len(parent2_DNA)-2)])
            # adding DNA-Sequences of the parents to final DNA
            child_DNA = parent1_DNA[:mid_point_1] + parent2_DNA[mid_point_2:]
            new_nn_depth = len(child_DNA)-2 # minus 2 because of depth parameter[0] and loss parameter[-1]
            child_DNA[0] = new_nn_depth
            self.children_population_DNA.append(child_DNA)
        # old population gets the new and proper weights
        self.mutate()
        # keras.backend.clear_session() ## delete old models to free memory
        for i in range(len(self.population)):
            self.population[i].create_children(self.children_population_DNA[i])
        
        
    
    def run_evolution(self):
        for episode in range(self.generations):
            print("\n--- Generation {} ---".format(episode))
            self.clear_losses()
            self.train_generation()
            self.predict()
            if episode != self.generations -1:
                self.normalize()
                self.reproduction()
                
            else:
                pass
            self.children_population_DNA = []
        # plotting history:
        for a in range(self.generations):
            for member in self.population:
                plt.plot(member.acc_history)
        plt.xlabel("Generations")
        plt.ylabel("Accuracy")
        plt.show()

In [18]:
GA = GeneticAlgorithm(population_size = 4,mutation_rate = 0.03, generations = 6,Epochs=1)
GA.create_population()
GA.run_evolution()

ValueError: optimizer got an empty parameter list