In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
mkdir /data/data/datasets

mkdir: cannot create directory ‘/data/data/datasets’: File exists


In [3]:
import copy
import sys
sys.path.append("/data/libs/Neuvol/")

import numpy as np
from tqdm.notebook import tqdm_notebook

In [4]:
import torch
import neuvol

# Initialize neuvol lib

In [5]:
# distribution - probability manager of parameters and layers type, use to select randomly new layer and its parameters
# or to select new mutation
distribution = neuvol.Distribution()

# use to add mutation to the individ
mutator = neuvol.MutatorBase

# used to cross two invidids
crossed = neuvol.crossing.Crosser()

# options of input data. Only classification head supported now
# set memory limit as the size of your gpu memory - this option limits the complexity of architectures
options = {'classes': 10, 'shape': (None, 3, 32, 32), 'memory_limit': 14000}

# classification head
fin = neuvol.layer.Layer('dense', distribution, options={'input_rank': 3})
fin.config['units'] = 10
fin.config['activation'] = 'softmax'
fin.config['input_rank'] = 2

# you can set layers to appear in random selection
# it makes sense to use only 2d convolutions and max-pooling for images
# and lstm for textual data, but not required
distribution.set_layer_status('cnn2', active=True)
distribution.set_layer_status('max_pool2', active=True)
distribution.set_layer_status('lstm', active=False)
distribution.set_layer_status('max_pool', active=False)
distribution.set_layer_status('cnn', active=False)
distribution.set_layer_status('dense', active=True)
distribution.set_layer_status('decnn2', active=False)
distribution.set_layer_status('dropout', active=True)

# Load data

In [6]:
import torchvision
import torchvision.transforms as transforms

In [7]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [8]:
batch_size = 8

trainset = torchvision.datasets.CIFAR10(root='/data/data/datasets/', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='/data/data/datasets/', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [10]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

In [11]:
images.shape

torch.Size([8, 3, 32, 32])

# Loss, optimizers

In [12]:
import torch.optim as optim
from sklearn.metrics import accuracy_score


criterion = torch.nn.CrossEntropyLoss()

def optimizer(net):
    optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    return optimizer

# Evaluation function

In [13]:
def evaluation(net, device='cpu', limit_train_part=0.1):
    net.to(device)

    opti = optimizer(net)
    for epoch in range(3):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            if i >= int(len(trainloader) * limit_train_part):
                break
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            opti.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs.to(device))
            loss = criterion(outputs, labels.to(device))
            loss.backward()
            opti.step()
            
    net.eval()
    dataiter = iter(testloader)
    images, labels = dataiter.next()
    
    correct = 0
    total = 0
    outputs = []
    real = []
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs.extend(net(images.to(device)).argmax(-1).cpu().numpy())
            real.extend(labels.cpu().numpy())
            
    acc = accuracy_score(real, outputs)
    torch.cuda.ipc_collect()
    torch.cuda.empty_cache()
    return acc

# Evolution

In [16]:
population_size = 20

# number of best individs, which are used for crossing
best_N = 5

# dont apply mutations for the best_N individs
keep_best_N_unchanged = True

# create at least crossing_part * population_size new individs with crossing
crossing_part = 0.5
crossing_part = int((population_size - best_N * int(keep_best_N_unchanged)) * crossing_part)
mutation_probability = 1.0

# add at least 5 layers for the first population
initial_grown = 5
evolution_epochs = 10
device = 'cuda:0'

# use only this part of training data to reduce evaluation time
train_part = 0.3

In [17]:
# initialization of population
population = [neuvol.individs.individ_image.IndividImage(0, options, fin, distribution=distribution) for i in range(population_size)]

In [18]:
# add layers for the first population
for i in range(initial_grown):
    for individ in population:
        mutator.grown(individ, distribution)

In [19]:
# initial fit assessment
for individ in population:
    try:
        individ_graph = individ.init_net()
    except MemoryError:
#             print('Network is too big for the memory')
        individ.result = 0.0
    except KeyboardInterrupt:
        raise
    except Exception as e:
        print(e)
        individ.result = 0.0
    else:
        accuracy = evaluation(individ_graph, device, train_part)
        individ_graph.cpu()
        del(individ_graph)
        individ.result = accuracy

In [20]:
for epoch in range(0, evolution_epochs+1):
    print('Epoch: {}/{}'.format(epoch, evolution_epochs))
    print('Evaluation step')            
    best_N_individs = sorted(population, key=lambda x: -x.result)[:best_N]
    print('Current best: {}'.format(' ,'.join([str(ind.result) for ind in best_N_individs])))
    
    new_population = population
    
    print('Crossing')
    for j in range(crossing_part):
        best_first = best_N_individs[np.random.randint(best_N)]
        best_second = best_N_individs[np.random.randint(best_N)]
        try:
            # select depth of subgraph to extract
            depth = np.random.randint(1, min(best_first.matrix.shape[0] - 2, best_second.matrix.shape[0] - 2))
            # select start point of graph to parse
            start_point = np.random.randint(1, (min(best_first.matrix.shape[0] - 2, best_second.matrix.shape[0] - 2)))
            new_individ = crossed.cross(copy.deepcopy(best_first), copy.deepcopy(best_second), start_point, depth)
        
            if new_individ is not None:
                new_population.append(new_individ)
        except KeyboardInterrupt:
            raise
        except:
            pass
        else:
            print('Crossed')
    
    print('Mutation')
    for individ in new_population:
        try:
            mutator.mutate(individ, distribution)
            individ.architecture.freeze_state()
        except KeyboardInterrupt:
            raise
        except:
            pass

    if keep_best_N_unchanged:
        new_population.extend(best_N_individs)
    
    print('Evaluation')
    for j, individ in enumerate(new_population):
        print('Ind {}/{}'.format(j, len(new_population)))
        try:
            individ_graph = individ.init_net()
            accuracy = evaluation(individ_graph, device, train_part)
            individ_graph.cpu()
            del(individ_graph)
        except MemoryError:
    #             print('Network is too big for the memory')
            individ.result = 0.0
        except KeyboardInterrupt:
            raise
        except Exception as e:
            print(e)
            individ.result = 0.0
        else:
            individ.result = accuracy
            
    population = sorted(new_population, key=lambda x: -x.result)[:population_size]

Epoch: 0/10
Evaluation step
Current best: 0.4818 ,0.4515 ,0.4488 ,0.4456 ,0.4395
Crossing
Crossed
Crossed
Crossed
Crossed
Crossed
Crossed
Crossed
Mutation
Evaluation
Ind 0/28
Ind 1/28
Ind 2/28
Ind 3/28
Ind 4/28
Ind 5/28
Ind 6/28
Ind 7/28
Ind 8/28
Ind 9/28
Ind 10/28
Ind 11/28
Ind 12/28
Ind 13/28
Ind 14/28
Ind 15/28
Ind 16/28
Ind 17/28
Ind 18/28
Ind 19/28
Ind 20/28
Ind 21/28
Ind 22/28
'input_units'
Ind 23/28
Ind 24/28
Ind 25/28
Ind 26/28
Ind 27/28
Epoch: 1/10
Evaluation step
Current best: 0.4911 ,0.4911 ,0.4715 ,0.4715 ,0.4604
Crossing
Crossed
Crossed
Crossed
Crossed
Crossed
Crossed
Crossed
Mutation
Evaluation
Ind 0/29
Ind 1/29
Ind 2/29
Ind 3/29
Ind 4/29
Ind 5/29
Ind 6/29
Ind 7/29
Ind 8/29
Ind 9/29
Ind 10/29
Ind 11/29
Ind 12/29
Ind 13/29
Ind 14/29
Ind 15/29
Ind 16/29
'input_units'
Ind 17/29
Ind 18/29
Ind 19/29
Ind 20/29
list index out of range
Ind 21/29
Ind 22/29
Ind 23/29
Ind 24/29
Ind 25/29
Ind 26/29
Ind 27/29
Ind 28/29
Epoch: 2/10
Evaluation step
Current best: 0.4685 ,0.4641 ,0.4641 ,

# Print resulting accuracy of population

In [24]:
for i in new_population:
    print(i.result)

0.4578
0.4578
0.4537
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.4493
0.4519
0.4573
0.4713
0.4552
0.451
0.4631
0.4468
0.0
0.0
0.0
0.4578
0.4578
0.4537
0.0
0.0


# Save resulting population

In [None]:
for ind in population:
    _ = ind.matrix # we need to call obj.matrix attribute to be sure that all mutations are applied
    tmp = ind.dump() # serialize individ architecture and other meta information
    neuvol.utils.dump(tmp, 'population_dump/{}/epoch{}_name{}_result{}.json'.format(epoch, epoch, ind.name, ind.result))