In [1]:

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchsummary import summary
import numpy as np

print("==> Check devices..")
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Current device: ",device)

#Also can print your current GPU id, and the number of GPUs you can use.
print("Our selected device: ", torch.cuda.current_device())
print(torch.cuda.device_count(), " GPUs is available")

==> Check devices..
Current device:  cuda
Our selected device:  0
1  GPUs is available


In [2]:
#The transform function for train data
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

#The transform function for test data
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

#Use API to load CIFAR10 train dataset
trainset = torchvision.datasets.CIFAR10(root='D:\\dataset\\cifar10', train=True, download=False, transform=transform_train)

#Use API to load CIFAR10 test dataset
testset = torchvision.datasets.CIFAR10(root='D:\\dataset\\cifar10', train=False, download=False, transform=transform_test)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
shuffle=True, num_workers=2)

testloader = torch.utils.data.DataLoader(testset, batch_size=32,
shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [3]:
class Net(nn.Module):

    #define the layers
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        
    #concatenate these layers
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [4]:
models = []
for i in range(2):
    models.append(Net())

In [5]:
#loss function
criterion = nn.CrossEntropyLoss()
#optimization algorithm
optimizer = []
for i in range(2):
    optimizer.append(optim.SGD(models[i].parameters(), lr=0.001, momentum=0.9))

In [6]:
def testInd(ind):
    ind.eval()
    ind = ind.to(device)
    correct = 0
    running_loss = 0.0
    iter_count = 0
    class_correct = [0 for i in range(10)]
    class_total = [0 for i in range(10)]
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.to(device) 
            labels = labels.to(device)
            outputs = ind(images)
            _, pred = outputs.max(1)
            correct += pred.eq(labels).sum().item()
            c_eachlabel = pred.eq(labels).squeeze()
            loss = criterion(outputs, labels)
            iter_count += 1
            running_loss += loss.item()
            for i in range(len(labels)):# 32 is batch size
                cur_label = labels[i]
                class_correct[cur_label] += c_eachlabel[i].item()
                class_total[cur_label] += 1
    ind = ind.to('cpu')
    return 100 * correct/len(testset)

In [7]:
fitness = []
with torch.no_grad():
    for ind in models:
        fitness.append(testInd(ind))
    print(fitness)

[10.01, 9.76]


In [8]:
# crossover
children = []
child1 = Net()
child2 = Net()
mask = np.array([0,1,0,1,0])
                
child1.conv1.weight = models[0].conv1.weight if mask[0] else models[1].conv1.weight
child1.conv1.bias   = models[0].conv1.bias   if mask[0] else models[1].conv1.bias
child2.conv1.weight = models[1].conv1.weight if mask[0] else models[0].conv1.weight
child2.conv1.bias   = models[1].conv1.bias   if mask[0] else models[0].conv1.bias

child1.conv2.weight = models[0].conv2.weight if mask[1] else models[1].conv2.weight
child1.conv2.bias   = models[0].conv2.bias   if mask[1] else models[1].conv2.bias
child2.conv2.weight = models[1].conv2.weight if mask[1] else models[0].conv2.weight
child2.conv2.bias   = models[1].conv2.bias   if mask[1] else models[0].conv2.bias

child1.fc1.weight = models[0].fc1.weight if mask[2] else models[1].fc1.weight
child1.fc1.bias   = models[0].fc1.bias   if mask[2] else models[1].fc1.bias
child2.fc1.weight = models[1].fc1.weight if mask[2] else models[0].fc1.weight
child2.fc1.bias   = models[1].fc1.bias   if mask[2] else models[0].fc1.bias

child1.fc2.weight = models[0].fc2.weight if mask[3] else models[1].fc2.weight
child1.fc2.bias   = models[0].fc2.bias   if mask[3] else models[1].fc2.bias
child2.fc2.weight = models[1].fc2.weight if mask[3] else models[0].fc2.weight
child2.fc2.bias   = models[1].fc2.bias   if mask[3] else models[0].fc2.bias

child1.fc3.weight = models[0].fc3.weight if mask[4] else models[1].fc3.weight
child1.fc3.bias   = models[0].fc3.bias   if mask[4] else models[1].fc3.bias
child2.fc3.weight = models[1].fc3.weight if mask[4] else models[0].fc3.weight
child2.fc3.bias   = models[1].fc3.bias   if mask[4] else models[0].fc3.bias

children.append(child1)
children.append(child2)
if True:
    del models
    torch.cuda.empty_cache()
    models = children
    optimizer = []
    for i in range(2):
        optimizer.append(optim.SGD(models[i].parameters(), lr=0.001, momentum=0.9))

In [9]:
## Before Mutation
with torch.no_grad():
    # update fitness list
    fitness = []
    for ind in models:
        fitness.append(testInd(ind))
    print(fitness)

[9.88, 9.99]


In [10]:
# Mutation
for idx in range(len(models)):
    running_loss = 0.0
    correct = 0
    models[idx].to(device)
    models[idx].train()
    for i, (inputs, labels) in enumerate(trainloader, 0):

        inputs = inputs.to(device) 
        labels = labels.to(device) 

        optimizer[idx].zero_grad()

        outputs = models[idx](inputs)
        _, pred = outputs.max(1)
        correct += pred.eq(labels).sum().item()

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer[idx].step()

        # print statistics
        running_loss += loss.item()
        if i % 200 == 199:
            print('[{}, {:5d}] loss: {:.3f}'.format(idx, i + 1, running_loss / 200))
            running_loss = 0.0
    print('Model {} training accuracy: {:.4f}'.format(idx, 100.*correct/len(trainset)))
    models[idx].to('cpu')
    torch.cuda.empty_cache()

[0,   200] loss: 2.300
[0,   400] loss: 2.275
[0,   600] loss: 2.212
[0,   800] loss: 2.101
[0,  1000] loss: 1.995
[0,  1200] loss: 1.925
[0,  1400] loss: 1.880
Model 0 training accuracy: 22.6620
[1,   200] loss: 2.296
[1,   400] loss: 2.243
[1,   600] loss: 2.123
[1,   800] loss: 2.034
[1,  1000] loss: 1.987
[1,  1200] loss: 1.918
[1,  1400] loss: 1.863
Model 1 training accuracy: 23.3000


In [11]:
## After mutation
with torch.no_grad():
    # update fitness list
    fitness = []
    for ind in models:
        fitness.append(testInd(ind))
    print(fitness)

[33.18, 35.52]
