### PairWise Combinations Structure
These elements are the neural's net building block. Using the parameters N,F and two specific layer ( which are allowed from the Convnet ), this layers will transform it's
inputs with size 2*x*y to 2*x*y*F

        I_1 ---> C_1 -> F*O_1  |
                               |-> 2*F*x*y
        I_2 ---> C_2 -> F*O_2  |

Embidding N elements, one can compute the cell input and output dimention and dynamically make a pairewise combination unit_which is a simple net_ and combine cells to build
nets as written in the paper.



In [142]:
import torch, torch.nn as nn, torch.nn.functional as F, copy, random,math,torch.optim as optim,torch,torchvision,torchvision.transforms as transforms
MAX_PS_TYPE = 17
NAS_NET_LAYERS = 5
MIN_NUM_OF_STATES = 2
MAX_NUM_OF_STATES = 4

In [150]:

def load_and_normalize_cifar10():
    transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    batch_size = 4

    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                            shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                            shuffle=False, num_workers=2)

    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    
    return trainloader,testloader,classes

def train_network(net,trainloader,log=False):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    for epoch in range(2):    
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                if log : print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0

    if log : print('Finished Training')

def test_network(net,testloader,log=False):
    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    if log : print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
    return correct/total

def operation(arch):
    # arch = (I,F,T)
    if arch[2]==0: 
        rc = random.randint(0, 2)
        if rc == 0:
            return nn.Conv2d(arch[0],arch[1],kernel_size=3,stride=1,padding=1)
        if rc == 1:
            return nn.Conv2d(arch[0],arch[1],kernel_size=5,stride=1,padding=2)
        if rc == 2:
            return nn.Conv2d(arch[0],arch[1],kernel_size=7,stride=1,padding=3)
    if arch[2] in [x+1 for x in range(5)]: return nn.AvgPool2d(kernel_size=3,stride=1,padding=1)
    if arch[2] in [x+5 for x in range(5)]: return nn.MaxPool2d(kernel_size=3,stride=1,padding=1)
    if arch[2] in [x+9 for x in range(5)]: return nn.Identity()
    if arch[2] in [x+13 for x in range(5)]: return nn.BatchNorm2d(num_features=arch[0])
    if arch[2]==18: return nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
    
class PairwiseCombination(nn.Module):
    def __init__(self,ARCH=[]):
        # ARCH = [arch1=(IC1,FC1,T1),arch2=(IC2,FC2,T2)]
        super(PairwiseCombination, self).__init__()
        self.op1 = operation(ARCH[0])
        self.op2 = operation(ARCH[1])
        
    def forward(self, x1, x2):
        x1 = F.relu(self.op1(x1))
        x2 = F.relu(self.op2(x2))
        return torch.cat(tensors=(x1,x2),dim=1)

# DAG structure
# [[-1^2],[-1^2],[(0|1)^2],[(0|1|2)^2],[(0|1|2|3)^2],[(0|1|2|3|4)^2],[(0|1|2|3|4|5)^2]]... where [n] is number of nodes
# normal_cell_set = {[pc_list, dag]...[pc_list, dag]}
# reduce_cell_set = {[pc_list, dag]...[pc_list, dag]}

class Cell(nn.Module):
    def __init__(self, ps_ARCH_list, dag_list, reduce=False):
        super(Cell, self).__init__()
        self.ps_list = [PairwiseCombination(ARCH) for ARCH in ps_ARCH_list[2:]]
        self.reduce = reduce
        self.dag_list = dag_list
        self.ps_ARCH_list = ps_ARCH_list

    # ps_list = [-1,-1, ps_net1, ps_net2,...]
    def forward(self, x):
        s = [x,x]
        for into,frm in enumerate(self.dag_list[2:-1]): 
            s += [ self.ps_list[into].forward(s[frm[0]], s[frm[1]]) ]
        # concatinate result
        s += [ torch.cat(tensors=[s[state] for state in self.dag_list[-1]],dim=1) ]
        if self.reduce: return operation(arch=[-1,-1,MAX_PS_TYPE+1]).forward(s[-1])
        else: return s[-1]

def generate_cell(f,x0, reduce=False,log=False,random_dag_list=True,random_ps_list=True,dag_list=[],arch_list=[]):
    # random number of states
    num_of_states = random.randint(MIN_NUM_OF_STATES, MAX_NUM_OF_STATES)
    # random DAG
    if random_dag_list:
        DAG = [-1,-1] + [[random.randint(0, 1+max_state),random.randint(0, 1+max_state)] for max_state in range(num_of_states)]
        # final node
        final_nodes = list(range(num_of_states+2))[2:]
        if log : print(final_nodes)
        for s_pair in DAG[2:]: final_nodes = list(set(final_nodes) - set(s_pair))
        DAG += [final_nodes]
    else : DAG = dag_list
    if log : print(DAG)
    # initiate ps list
    ps_list = [-1,-1]
    # initiate arch list
    ARCH_list = [-1,-1]
    # initiate input candidate list
    input_candidate_list = [x0, x0]
    # iterate through DAG and build
    for index,ps in enumerate(DAG[2:-1]):
        # update IC1, IC2
        IC1, IC2 = (input_candidate_list[ps[0]].shape)[1],(input_candidate_list[ps[1]].shape)[1]
        if random_ps_list:
            # make two random ps
            T1,T2 = [random.randint(0, MAX_PS_TYPE),random.randint(0, MAX_PS_TYPE)]
        else : 
            T1,T2 = arch_list[index+2][0][2],arch_list[index+2][1][2]
        if log : print('ps={}, IC1={}, IC2={}, T_1={}, T_2={}'.format(ps,IC1,IC2,T1,T2))
        # make a random ps
        random_ps = PairwiseCombination(ARCH=[(IC1,f*IC1,T1),(IC2,f*IC2,T2)])
        # update ARCH list
        ARCH_list += [[(IC1,f*IC1,T1),(IC2,f*IC2,T2)]]
        # add ps to ps_list
        ps_list += [random_ps]
        # update ouput dimention
        input_candidate_list += [random_ps.forward(input_candidate_list[ps[0]],input_candidate_list[ps[1]])]
    if log : print('----------------------------end-of-cell-log-----')
    return Cell(ps_ARCH_list=ARCH_list,dag_list=DAG,reduce=reduce)

# each final network is made of 3 stack of normal cells and 2 stack of reduce cell
# N = number of cells in each stack
# F = number of conv output filters
# normal_stack_1
# reduce_stack_1
#      ⋮
# normal_stack_3
# fully connected network
# soft-max layer
# 
#            | n1_1 -> r1_1 -> n2_1 -> r2_1 -> n3_1  |           |
#   input => | n1_2 -> r1_2 -> n2_2 -> r2_2 -> n3_2  |   fully   |   soft-max  | => output
#            |  ⋮        ⋮       ⋮       ⋮        ⋮    | connected |  multiclass 
#            | n1_N -> r1_N -> n2_N -> r2_N -> n3_N  |           |
#       
class NASNet(nn.Module):
    def __init__(self,x,N=1,f=1,log=False):
        super(NASNet, self).__init__()
        # makes stacks here
        self.normal_cell_stack_1 = [generate_cell(f,x,log=log) for _ in range(N)]
        self.reduce_cell_stack_1_input = [normal_cell.forward(x) for normal_cell in self.normal_cell_stack_1]
        self.reduce_cell_stack_1 = [generate_cell(f,i,reduce=True,log=log) for i in self.reduce_cell_stack_1_input]
        self.normal_cell_stack_2_input = [reduce_cell.forward(cell_input) for reduce_cell,cell_input in zip(self.reduce_cell_stack_1,self.reduce_cell_stack_1_input)]
        self.normal_cell_stack_2 = [generate_cell(f,i,log=log) for i in self.normal_cell_stack_2_input]
        self.reduce_cell_stack_2_input = [normal_cell.forward(cell_input) for normal_cell,cell_input in zip(self.normal_cell_stack_2,self.normal_cell_stack_2_input)]
        self.reduce_cell_stack_2 = [generate_cell(f,i,reduce=True,log=log) for i in self.reduce_cell_stack_2_input]
        self.normal_cell_stack_3_input = [reduce_cell.forward(cell_input) for reduce_cell,cell_input in zip(self.reduce_cell_stack_2,self.reduce_cell_stack_2_input)]
        self.normal_cell_stack_3 = [generate_cell(f,i,log=log) for i in self.normal_cell_stack_3_input]
        self.layers =  [
                        self.normal_cell_stack_1, 
                        self.reduce_cell_stack_1,
                        self.normal_cell_stack_2,
                        self.reduce_cell_stack_2,
                        self.normal_cell_stack_3
                       ]
        # add fully connected classifier here
        classifierNetDim = math.prod(torch.cat(tensors=[normal_cell.forward(cell_input) for normal_cell,cell_input in zip(self.normal_cell_stack_3,self.normal_cell_stack_3_input)],dim=1).size())
        self.fc1 = nn.Linear(classifierNetDim, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        # self.classifier_input = torch.cat(tensors = [normal_cell.forward(cell_input) for normal_cell,cell_input in zip(self.normal_cell_stack_3,self.normal_cell_stack_3_input)],dim=1)
        # print(self.classifier_input.shape)
        self.N = N  
        self.log = log
    def forward(self, x):
        layer_1_out = [ncell.forward(input) for ncell,input in zip(self.normal_cell_stack_1,[x for _ in range(self.N)])]
        layer_2_out = [rcell.forward(input) for rcell,input in zip(self.reduce_cell_stack_1,layer_1_out)]
        layer_3_out = [ncell.forward(input) for ncell,input in zip(self.normal_cell_stack_2,layer_2_out)]
        layer_4_out = [rcell.forward(input) for rcell,input in zip(self.reduce_cell_stack_2,layer_3_out)]
        layer_5_out = [ncell.forward(input) for ncell,input in zip(self.normal_cell_stack_3,layer_4_out)]
        if self.log: print([x.size() for x in layer_5_out])
        if self.log: print(torch.cat(tensors=layer_5_out ,dim=1).size())
        classifier_in = torch.flatten(torch.cat(tensors=layer_5_out ,dim=1), 1)
        if self.log: print(classifier_in.size())
        x = F.relu(self.fc1(classifier_in))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class NASAlgo():
    def __init__(self,x0,trainloader, testloader,N=1,f=1):
        super(NASAlgo, self).__init__()
        self.N = N
        self.f = f
        self.x0 = x0
        self.trainloader = trainloader
        self.testloader = testloader

    def μ1(self, nasnet):
        # perfroms mutation by changing one random cell's DAG structure
        layer = nasnet.layers[random.randint(0, len(nasnet.layers)-1)]
        cell = layer[random.randint(0, len(layer)-1)]
        print(cell.dag_list)
        DAG = copy.deepcopy(cell.dag_list)
        psI = random.randint(2, len([-1,-1]+cell.ps_list)-2)
        DAG[psI][random.randint(0,1)] = random.randint(0,psI-1)
        print(psI)
        print(DAG)
        print(cell.reduce)
        return generate_cell(self.f,self.x0, reduce=cell.reduce,log=False,random_dag_list=False,random_ps_list=False,dag_list=DAG,arch_list=cell.ps_ARCH_list)
        
    def μ2(self, nasnet):
        # performs mutation by changing one random cell's PS operator
        layer = nasnet.layers[random.randint(0, len(nasnet.layers)-1)]
        cell = layer[random.randint(0, len(layer)-1)]
        ARCH_LIST = copy.deepcopy(cell.ps_ARCH_list)
        ARCH_LIST[random.randint(2, len(cell.ps_ARCH_list)-1)][random.randint(0, 1)][3] = random.randint(0, MAX_PS_TYPE)
        return generate_cell(self.f,self.x0, reduce=cell.reduce,log=False,random_dag_list=False,random_ps_list=False,dag_list=cell.dag_list,arch_list=ARCH_LIST)
    
    def μ(self, nasnet):
        # randomely performs either μ1 or μ2
        if random.randint(0,1)==1:
            return self.μ1(nasnet)
        else: 
            return self.μ2(nasnet)

    def φ(self, nasnet):
        # measures fitness based on test_accuracy and resource use
        # first train the network
        train_network(nasnet, self.trainloader)
        # then test the network
        # add other fitness parameters if required
        return test_network(nasnet, self.testloader)
        
    
    def EA(self, population_size, sample_size, iteration):
        for _ in range(iteration):
            sample_space = [(nasnet, self.φ(nasnet)) for nasnet in [NASNet(self.x0,N=self.N,f=self.f) for _ in range(population_size)]]
            sample = [sample_space[x] for x in random.sample(range(0, population_size-1), sample_size)]
            parent = sorted(sample, key=lambda tup: tup[1])[0]
            child  = self.μ(parent)
            child_fitness = self.φ(child)
            sample_space.pop()
            sample_space+=[(child,child_fitness)]
        return sorted(sample, key=lambda tup: tup[1])[0]


In [None]:
# load data
trainloader,testloader,_ = load_and_normalize_cifar10()
# init algorithm
NAS = NASAlgo(x0=trainloader[0],trainloader=trainloader, testloader=testloader,N=1,f=1)
# perform algorithm
best_net = NAS.EA(population_size=50, sample_size=10, iteration=100)

In [159]:

x1 = torch.rand((1, 1, 28, 28))
x2 = torch.rand((1, 3, 28, 28))

# f = 3
# x = PairwiseCombination(ARCH=[(x1.shape[1],x1.shape[1]*f,0),(x2.shape[1],x2.shape[1]*f,4)])
# # image:(S, C, H, W)
# y  = x.forward(x1,x2)

# nc1 = generate_cell(f=2,x0=x1, reduce=False,log=True,random_dag_list=True,random_ps_list=True,dag_list=[],arch_list=[])
# y1 = nc1.forward(x1)

net1 = NASNet(x1,N=1,f=1,log=True)
y = net1.forward(x1)
print(y.shape)

# algo = NASAlgo(x0=x1,trainloader=None, testloader=None,N=1,f=1)
# net1 = algo.μ1(net1)
# y = net1.forward(x1)

print(x1.shape)
# print(x2.shape)
# print(y1.shape)

# print(x1.shape)
print(y.shape)

# print(math.prod(x1.size()))





[2, 3]
[-1, -1, [0, 1], [2, 2], [3]]
ps=[0, 1], IC1=1, IC2=1, T_1=1, T_2=0
ps=[2, 2], IC1=2, IC2=2, T_1=16, T_2=16
----------------------------end-of-cell-log-----
[2, 3]
[-1, -1, [0, 1], [0, 2], [3]]
ps=[0, 1], IC1=4, IC2=4, T_1=13, T_2=3
ps=[0, 2], IC1=4, IC2=8, T_1=15, T_2=6
----------------------------end-of-cell-log-----
[2, 3, 4]
[-1, -1, [1, 1], [1, 0], [3, 3], [2, 4]]
ps=[1, 1], IC1=12, IC2=12, T_1=11, T_2=17
ps=[1, 0], IC1=12, IC2=12, T_1=12, T_2=16
ps=[3, 3], IC1=24, IC2=24, T_1=15, T_2=6
----------------------------end-of-cell-log-----
[2, 3, 4]
[-1, -1, [1, 0], [1, 2], [2, 3], [4]]
ps=[1, 0], IC1=72, IC2=72, T_1=2, T_2=8
ps=[1, 2], IC1=72, IC2=144, T_1=15, T_2=13
ps=[2, 3], IC1=144, IC2=216, T_1=2, T_2=3
----------------------------end-of-cell-log-----
[2, 3, 4]
[-1, -1, [0, 1], [0, 1], [3, 2], [4]]
ps=[0, 1], IC1=360, IC2=360, T_1=11, T_2=1
ps=[0, 1], IC1=360, IC2=360, T_1=6, T_2=1
ps=[3, 2], IC1=720, IC2=720, T_1=8, T_2=17
----------------------------end-of-cell-log-----
