### Macro Architecture 
First, we define the ResNet-like Macro architecture. 

**The goal of this macro-architecture is to represent all resnet-like architectures, including Wide-Resnets and ResNext.**

The Macro-architecture is defined as illustrated in the following figure. 

<img src="resnet_like.png" alt="macro-architecture" width="80%"/>

Search Space size: >8000000

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import operator
import functools

In [2]:
class ResidualBranch(nn.Module):
    def __init__(self, in_channels, out_channels, filter_size, stride, branch_index):
        super(ResidualBranch, self).__init__()

        self.residual_branch = nn.Sequential()

        self.residual_branch.add_module('Branch_{}:ReLU_1'.format(branch_index),
                                        nn.ReLU(inplace=False))
        self.residual_branch.add_module('Branch_{}:Conv_1'.format(branch_index),
                                        nn.Conv2d(in_channels,
                                                  out_channels,
                                                  kernel_size=filter_size,
                                                  stride=stride,
                                                  padding=round(filter_size / 3),
                                                  bias=False))
        self.residual_branch.add_module('Branch_{}:BN_1'.format(branch_index),
                                        nn.BatchNorm2d(out_channels))
        self.residual_branch.add_module('Branch_{}:ReLU_2'.format(branch_index),
                                        nn.ReLU(inplace=False))
        self.residual_branch.add_module('Branch_{}:Conv_2'.format(branch_index),
                                        nn.Conv2d(out_channels,
                                                  out_channels,
                                                  kernel_size=filter_size,
                                                  stride=1,
                                                  padding=round(filter_size / 3),
                                                  bias=False))
        self.residual_branch.add_module('Branch_{}:BN_2'.format(branch_index),
                                        nn.BatchNorm2d(out_channels))

    def forward(self, x):
        return self.residual_branch(x)

In [24]:
class SkipConnection(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super(SkipConnection, self).__init__()

        self.s1 = nn.Sequential()
        self.s1.add_module('Skip_1_AvgPool',
                           nn.AvgPool2d(1, stride=stride))
        self.s1.add_module('Skip_1_Conv',
                           nn.Conv2d(in_channels,
                                     int(out_channels / 2),
                                     kernel_size=1,
                                     stride=1,
                                     padding=0,
                                     bias=False))

        self.s2 = nn.Sequential()
        self.s2.add_module('Skip_2_AvgPool',
                           nn.AvgPool2d(1, stride=stride))
        self.s2.add_module('Skip_2_Conv',
                           nn.Conv2d(in_channels,
                                     int(out_channels / 2) if out_channels % 2 == 0 else int(out_channels / 2) + 1,
                                     kernel_size=1,
                                     stride=1,
                                     padding=0,
                                     bias=False))

        self.batch_norm = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        out1 = F.relu(x, inplace=False)
        out1 = self.s1(out1)

        out2 = F.pad(x[:, :, 1:, 1:], (0, 1, 0, 1))
        out2 = self.s2(out2)

        out = torch.cat([out1, out2], dim=1)
        out = self.batch_norm(out)

        return out


class BasicBlock(nn.Module):
    def __init__(self, n_input_plane, n_output_plane, filter_size, res_branches, stride):
        super(BasicBlock, self).__init__()

        self.branches = nn.ModuleList([ResidualBranch(n_input_plane, n_output_plane, filter_size, stride, branch + 1) for branch in range(res_branches)])

        self.skip = nn.Sequential()
        if n_input_plane != n_output_plane or stride != 1:
            self.skip.add_module('Skip_connection',
                                 SkipConnection(n_input_plane, n_output_plane, stride))
                                 

    def forward(self, x):
        out = sum([self.branches[i](x) for i in range(len(self.branches))])
        return out + self.skip(x)


class ResidualGroup(nn.Module):
    def __init__(self, block, n_input_plane, n_output_plane, n_blocks, filter_size, res_branches, stride):
        super(ResidualGroup, self).__init__()
        self.group = nn.Sequential()
        self.n_blocks = n_blocks

        self.group.add_module('Block_1',
                              block(n_input_plane,
                                    n_output_plane,
                                    filter_size,
                                    res_branches,
                                    stride=1))

        # The following residual block do not perform any downsampling (stride=1)
        for block_index in range(2, n_blocks + 1):
            block_name = 'Block_{}'.format(block_index)
            self.group.add_module(block_name,
                                  block(n_output_plane,
                                        n_output_plane,
                                        filter_size,
                                        res_branches,
                                        stride=1))

    def forward(self, x):
        return self.group(x)


class Network(nn.Module):
    def __init__(self, config):
        super(Network, self).__init__()

        self.M = config["M"]
        self.residual_blocks = {'Group_1': config["R1"],
                                'Group_2': config["R2"],
                                'Group_3': config["R3"],
                                'Group_4': config["R4"],
                                'Group_5': config["R5"]
                                }
 
        self.widen_factors = {'Group_1': config["widenfact1"],
                              'Group_2': config["widenfact2"],
                              'Group_3': config["widenfact3"],
                              'Group_4': config["widenfact4"],
                              'Group_5': config["widenfact5"]
                              }

        self.res_branches = {'Group_1': config["B1"],
                             'Group_2': config["B2"],
                             'Group_3': config["B3"],
                             'Group_4': config["B4"],
                             'Group_5': config["B5"]
                             }

        self.conv_blocks =  {'Group_1': config["convblock1"],
                             'Group_2': config["convblock2"],
                             'Group_3': config["convblock3"],
                             'Group_4': config["convblock4"],
                             'Group_5': config["convblock5"]
                             }
        
        self.filters_size = {'Group_1': 3,
                             'Group_2': 3,
                             'Group_3': 3,
                             'Group_4': 3,
                             'Group_5': 3
                             }
        
        self.model = nn.Sequential()
        block = BasicBlock
        self.blocks = nn.Sequential()
        self.blocks.add_module('Conv_0',
                              nn.Conv2d(3,
                                        config["out_channel0"],
                                        kernel_size=3,
                                        stride=1,
                                        padding=1,
                                        bias=False))
        
        self.blocks.add_module('BN_0',
                              nn.BatchNorm2d(config["out_channel0"]))

        feature_maps_in = int(round(config["out_channel0"] // self.widen_factors['Group_1']))
        
        self.blocks.add_module('Group_1',
                              ResidualGroup(block, 
                                            config["out_channel0"], 
                                            feature_maps_in, 
                                            self.residual_blocks['Group_1'], 
                                            self.filters_size['Group_1'],
                                            self.res_branches['Group_1'],
                                            1))
        feature_maps_out = int(round(feature_maps_in // self.widen_factors['Group_2']))
        for m in range(2, self.M + 1):
            feature_maps_out = int(round(feature_maps_in // self.widen_factors['Group_{}'.format(m)]))
            self.blocks.add_module('Group_{}'.format(m),
                                  ResidualGroup(block, 
                                                feature_maps_in, 
                                                feature_maps_out, 
                                                self.residual_blocks['Group_{}'.format(m)],
                                                self.filters_size['Group_{}'.format(m)],
                                                self.res_branches['Group_{}'.format(m)],
                                                2 if m in (self.M, self.M - 1) else 1))
            feature_maps_in = feature_maps_out
        
        self.feature_maps_out = feature_maps_out
        self.blocks.add_module('ReLU_0',
                              nn.ReLU(inplace=True))
        self.blocks.add_module('AveragePool',
                              nn.AvgPool2d(8, stride=1))
        
        self.model.add_module("Main_blocks" ,self.blocks)
        input_dim = (3,100,100)
        self.fc_len  = functools.reduce(operator.mul, list(self.blocks(torch.rand(1, *input_dim)).shape))
        
        self.fc = nn.Linear(self.fc_len, 1)

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [25]:
from config_space import ConfigSpace
CS = ConfigSpace("VWW") 
config = CS.sample_arch()
network = Network(config)

print(config)

{'out_channel0': 8, 'M': 2, 'R1': 15, 'R2': 9, 'R3': 14, 'R4': 7, 'R5': 8, 'convblock1': 2, 'widenfact1': 0.5417221212839691, 'B1': 3, 'convblock2': 2, 'widenfact2': 0.6772060325149005, 'B2': 2, 'convblock3': 2, 'widenfact3': 0.6308970789587962, 'B3': 2, 'convblock4': 2, 'widenfact4': 0.6156845773812882, 'B4': 2, 'convblock5': 1, 'widenfact5': 0.5212398403701596, 'B5': 1}


In [26]:
from torchinfo import summary

batch_size = 32
summary(network, input_size=(batch_size, 3, 100, 100))

Layer (type:depth-idx)                                            Output Shape              Param #
Network                                                           --                        --
├─Sequential: 1-1                                                 [32, 20, 93, 93]          --
│    └─Sequential: 2-1                                            [32, 20, 93, 93]          --
├─Sequential: 1-2                                                 [32, 20, 93, 93]          (recursive)
├─Sequential: 1                                                   --                        --
│    └─Sequential: 2                                              --                        --
│    │    └─Conv2d: 3-1                                           [32, 8, 100, 100]         216
├─Sequential: 1                                                   --                        --
│    └─Conv2d: 2-2                                                [32, 8, 100, 100]         (recursive)
├─Sequential: 1           

In [27]:
network

Network(
  (model): Sequential(
    (Main_blocks): Sequential(
      (Conv_0): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (BN_0): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (Group_1): ResidualGroup(
        (group): Sequential(
          (Block_1): BasicBlock(
            (branches): ModuleList(
              (0): ResidualBranch(
                (residual_branch): Sequential(
                  (Branch_1:ReLU_1): ReLU()
                  (Branch_1:Conv_1): Conv2d(8, 14, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                  (Branch_1:BN_1): BatchNorm2d(14, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                  (Branch_1:ReLU_2): ReLU()
                  (Branch_1:Conv_2): Conv2d(14, 14, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                  (Branch_1:BN_2): BatchNorm2d(14, eps=1e-05, momentum=0.1, affine=True, track_running_stats

In [18]:
# test model 
dummy_input = torch.randn([32, 3, 100, 100])
network.eval()
np_torch_out = network(dummy_input).data.numpy()
print(np_torch_out)
print(np_torch_out.shape)

[[0.05744118]
 [0.05584985]
 [0.0652326 ]
 [0.05277881]
 [0.07493815]
 [0.06976712]
 [0.06408133]
 [0.05842937]
 [0.0481573 ]
 [0.06557176]
 [0.05069201]
 [0.06370313]
 [0.06594738]
 [0.05834076]
 [0.06269496]
 [0.05391512]
 [0.063798  ]
 [0.06442809]
 [0.06060513]
 [0.07303403]
 [0.05658451]
 [0.05539465]
 [0.0764723 ]
 [0.05637769]
 [0.06180061]
 [0.0570996 ]
 [0.07486883]
 [0.07805192]
 [0.06270741]
 [0.06700943]
 [0.0730772 ]
 [0.06493098]]
(32, 1)


In [19]:
import pandas as pd

In [20]:
# Sample Architectures from search space 

configs =  []
for i in range(1000):
    config = CS.sample_arch()
    configs.append(config)
    
ss = pd.DataFrame(configs)
    
ss.head()

Unnamed: 0,out_channel0,M,R1,R2,R3,R4,R5,convblock1,widenfact1,B1,...,B2,convblock3,widenfact3,B3,convblock4,widenfact4,B4,convblock5,widenfact5,B5
0,8,1,10,7,13,11,8,2,0.782263,4,...,1,1,0.654444,4,2,0.698971,1,2,0.772416,1
1,8,4,13,9,11,15,9,2,0.797842,1,...,1,2,0.517445,2,1,0.799832,2,2,0.544205,4
2,12,3,9,1,1,6,12,1,0.752592,2,...,3,2,0.658073,1,2,0.719918,2,2,0.706077,3
3,8,1,7,8,10,0,4,2,0.588439,2,...,3,2,0.731988,3,2,0.54539,2,1,0.575489,1
4,64,2,11,10,5,9,13,2,0.587691,1,...,2,1,0.620929,2,2,0.798232,3,1,0.625117,1


In [41]:
pd.to_csv(ss)

AttributeError: module 'pandas' has no attribute 'to_csv'