In [2]:
#export
from torch import Tensor, randint
import torch.nn as nn
from ..layer import Flatten
from ..function import ceil_power_of_two

import numpy as np
from math import ceil

In [1]:
from torch import Tensor, randint
import torch.nn as nn
from include.layer import Flatten
from include.function import ceil_power_of_two

import numpy as np
from math import ceil

In [2]:
#export
NOISE_RATIO = 1e-4
COPY_RATIO = 1e-1

In [70]:
#export
class Edge:
    def __init__(self, src, dest, layer=None, identical=False):
        self.src = src
        self.dest = dest
        self.identical = identical
        if not identical:
            self.layer = layer
    
    def as_layer(self):
        return self.layer
    
    def updated_src(self, in_shape, out_shape, expanded=[]):
        return out_shape, expanded
    
    def updated_dest(self, in_shape, out_shape, expanded=[]):
        return in_shape, expanded
    
    def calculate_output(self, in_shape):
        return in_shape
    
    def verify_output(self, in_shape, out_shape):
        expected_output = self.calculate_output(in_shape)
        if expected_output == None: return False
        if len(expected_output) != len(out_shape): return False
        for e, o in zip(expected_output, out_shape):
            if e > 0 and o > 0:
                if e != o: return False
        return True

In [57]:
#export
def add_noise(weight):
    noise_range = np.ptp(weight.flatten()) * NOISE_RATIO
    noise = np.random.uniform(-noise_range/2.0, noise_range/2.0, weight.shape)
    return np.add(weight, noise).float()

In [58]:
#export
class FlattenEdge(Edge):
    def __init__(self, src, dest):
        super(FlattenEdge, self).__init__(src, dest, layer=Flatten(), identical=False)
    
    def updated_src(self, in_shape, out_shape, expanded=[]):
        out_shape = self.calculate_output(in_shape)
        feature_size = out_shape[0] // in_shape[0]
        
        new_expanded = []
        for o, c in expanded:
            if o < 0:
                origin = (-1,) * feature_size
            else:
                origin = range(o*feature_size, (o+1)*feature_size)
            copied = range(c*feature_size, (c+1)*feature_size)
            new_expanded.extend((o, c) for o, c in zip(origin, copied))
            
        return out_shape, new_expanded
    
    def calculate_output(self, in_shape):
        total = 1
        nf = in_shape[0]
        for i in in_shape:
            total = total * i
        out_shape = (total,)
        return out_shape

In [59]:
#export
class BatchNormEdge(Edge):
    
    def __init__(self, src, dest, num_features):
        self.args = {
            'num_features': num_features,
        }
        super(BatchNormEdge, self).__init__(src, 
                                          dest, 
                                          layer=nn.BatchNorm2d(**self.args), 
                                          identical=False)
    def set_zeros(self):
        nn.init.constant_(self.layer.weight, 0)
        
    def set_identical(self):
        self.layer.reset_running_stats()
        self.layer.reset_parameters()
        nn.init.constant_(self.layer.bias, 0)
        nn.init.uniform_(self.layer.weight, a=1.0-NOISE_RATIO, b=1.0+NOISE_RATIO)
        
    def updated_src(self, in_shape, out_shape, expanded=[]):
        
        nf = in_shape[0]
        extended_params = {
            'weight': Tensor(nf),
            'bias': Tensor(nf)
        }
        
        # Get original parameters.
        params = dict((name, value.data) for (name, value) in self.layer.named_parameters())
        
        # Expand original parameters
        for key in params.keys():
            extended_params[key][:len(params[key])] = params[key]
            for o, c in expanded:
                if o < 0:
                    extended_params[key][c].fill_(0.)
                else:
                    extended_params[key][c] = extended_params[key][o]
            extended_params[key] = add_noise(extended_params[key])
        
        # Update argument
        self.args['num_features'] = nf
        
        # Update the layer
        new_layer = nn.BatchNorm2d(**self.args)
        new_layer.weight.data = extended_params['weight']
        new_layer.bias.data = extended_params['bias']
        self.layer = new_layer
        
        # Generate new output shape
        out_shape = in_shape
        return out_shape, expanded
    
    def updated_dest(self, in_shape, out_shape, expanded=[]):
        
        nf = out_shape[0]
        extended_params = {
            'weight': Tensor(nf),
            'bias': Tensor(nf)
        }
        
         # Get original parameters.
        params = dict((name, value.data) for (name, value) in self.layer.named_parameters())
        
        # Expand original parameters
        for key in params.keys():
            extended_params[key][:len(params[key])] = params[key]
            for o, c in expanded:
                if o < 0:
                    extended_params[key][c].fill_(0.)
                else:
                    extended_params[key][c] = extended_params[key][o]
            extended_params[key] = add_noise(extended_params[key])
        
        # Update argument
        self.args['num_features'] = nf
        
        # Update the layer
        new_layer = nn.BatchNorm2d(**self.args)
        new_layer.weight.data = extended_params['weight']
        new_layer.bias.data = extended_params['bias']
        self.layer = new_layer
        
        # Generate new input shape
        in_shape = out_shape
        return in_shape, expanded
    
    def calculate_output(self, in_shape):
        if in_shape[0] != self.args['num_features']: return None
        else: return in_shape

In [60]:
#export
class ReluEdge(Edge):
    def __init__(self, src, dest):
        self.args = {
        }
        super(ReluEdge, self).__init__(src,                                       
                                       dest, 
                                       layer=nn.ReLU(), 
                                       identical=False)   

In [61]:
#export
class PoolingEdge(Edge):
    def _get_layer(self):
        pass
    
    def __init__(self, src, dest, kernel_size):
        kernel_size = (kernel_size,) * 2 if isinstance(kernel_size, int) else kernel_size
        stride = kernel_size
        padding = (0,) * 2
        
        self.args = {
            'kernel_size': kernel_size,
            'stride': stride,
            'padding': padding,
            'ceil_mode': True
        }
        
        super(PoolingEdge, self).__init__(src, 
                                          dest, 
                                          layer=self._get_layer()(**self.args), 
                                          identical=False)
    
    def updated_src(self, in_shape, out_shape, expanded=[]):
        out_shape = self.calculate_output(in_shape)

        return out_shape, expanded
    
    def updated_dest(self, in_shape, out_shape, expanded=[]):
        exp_shape = self.calculate_output(in_shape)
        if(out_shape[1:] != exp_shape[1:]):
            stride = tuple(ceil_power_of_two(i/o) for i, o in zip(in_shape[1:], out_shape[1:]))
            kernel_size = stride
            padding = (0,) * 2
            self.args['kernel_size'] = kernel_size
            self.args['padding'] = padding
            self.args['stride'] = stride
            
            self.layer = self._get_layer()(**self.args)
        
        in_shape = (out_shape[0],) + in_shape[1:]
            
        return in_shape, expanded
        
    def calculate_output(self, in_shape):
        nf = (in_shape[0],)
        attr = zip(in_shape[1:],
                   self.args['kernel_size'],
                   self.args['stride'],
                   self.args['padding'])
        ch = tuple([ceil((x + 2 * pd - ks) / st) + 1 for x, ks, st, pd in attr])
        out_shape = nf + ch
        return out_shape

In [62]:
#export
class MaxPoolingEdge(PoolingEdge):
    def _get_layer(self):
        return nn.MaxPool2d

In [63]:
#export
class AvgPoolingEdge(PoolingEdge):
    def _get_layer(self):
        return nn.AvgPool2d

In [64]:
#export
class AdaptivePoolingEdge(Edge):
    def __init__(self, src, dest, output_size):
        pass
    
    def _set_args(self, output_size):
        self.args={
            'output_size': output_size
        }
    
    def updated_src(self, in_shape, expanded=[]):
        return self.args['output_size'], expanded
    
    def calculate_output(self, in_shape):
        return self.args['output_size']

In [13]:
#export
class AdaptiveMaxPoolingEdge(AdaptivePoolingEdge):
    def __init__(self, src, dest, output_size):
        self._set_args(output_size)
        super(AdaptivePoolingEdge, self).__init__(src,
                                                  dest,
                                                  layer=nn.AdaptiveMaxPool2d(**self.args),
                                                  identical=False)

In [14]:
#export
class AdaptiveAvgPoolingEdge(AdaptivePoolingEdge):
    def __init__(self, src, dest, output_size):
        self._set_args(output_size)
        super(AdaptivePoolingEdge, self).__init__(src,
                                                  dest,
                                                  layer=nn.AdaptiveAvgPool2d(**self.args),
                                                  identical=False)

In [17]:
#export
class LinearEdge(Edge):
    def __init__(self, src, dest, in_features, out_features, bias=True):
        self.args = {
            'in_features': in_features,
            'out_features': out_features,
            'bias': bias
        }
        super(LinearEdge, self).__init__(src,
                                         dest,
                                         layer=nn.Linear(**self.args),
                                         identical=False)
    
    def set_identical(self):
        if self.args['in_features'] != self.args['out_features']:
            raise Exception('Cannot set identical weight')
        nn.init.constant_(self.layer.weight.data, 0.)
        nn.init.constant_(self.layer.bias.data, 0.)
        
        ni = self.args['in_features']
        for i in range(ni):
            self.layer.weight.data[i,i] = 1
            
        self.layer.weight.data = add_noise(self.layer.weight.data)
    
    def updated_src(self, in_shape, out_shape, expanded=[]):
        ni = in_shape[0]
        
        # Get original parameters
        params = dict((name, value.data) for (name, value) in self.layer.named_parameters())
        
        # Expand original parameters
        expanded_weight = Tensor(self.args['out_features'], ni)
        expanded_weight[:,:self.args['in_features']] = params['weight']
        for o, c in expanded:
            if o < 0:
                expanded_weight[:,c].fill_(0.)
            else:
                expanded_weight[:,c] = params['weight'][:,o]
        expanded_weight = add_noise(expanded_weight)
        
        # Update the args
        self.args['in_features'] = ni
        
        # Update the layer
        new_layer = nn.Linear(**self.args)
        new_layer.weight.data = expanded_weight
        new_layer.bias.data = params['bias']
        self.layer = new_layer
        
        # Generate new output shape
        out_shape = self.calculate_output(in_shape)
        return out_shape, []
    
    def updated_dest(self, out_shape):
        raise Excpetion('unimplemented')
    
    def calculate_output(self, in_shape):
        if in_shape[0] != self.args['in_features']: return None
        return (self.args['out_features'],)

In [16]:
#export
class ConvEdge(Edge):
    def __init__(self, src, dest, 
                 in_channels, out_channels, kernel_size, stride=1, bias=True):
        kernel_size = (kernel_size,) * 2 if isinstance(kernel_size, int) else kernel_size
        padding = tuple(ks//2 for ks in kernel_size)
        stride = (stride,) * 2 if isinstance(stride, int) else stride
        
        self.args = {
            'in_channels': in_channels,
            'out_channels': out_channels,
            'kernel_size': kernel_size,
            'stride': stride,
            'padding': padding,
            'bias': bias
        }
        super(ConvEdge, self).__init__(src, 
                                       dest,
                                       layer=nn.Conv2d(**self.args),                                           
                                       identical=False)
        
    def set_identical(self):
        if self.args['in_channels'] != self.args['out_channels']:
            raise Exception('Cannot set identical weight')
        center = tuple(i // 2 for i in self.args['kernel_size'])
        nf = self.args['in_channels']
        
        nn.init.constant_(self.layer.weight, 0.)
        if self.args['bias']: nn.init.constant_(self.layer.bias, 0.)
        for i in range(nf):
            self.layer.weight.data[i,i,center[0],center[1]] = 1.
        
        self.layer.weight.data = add_noise(self.layer.weight.data)
        
    def updated_src(self, in_shape, out_shape, expanded=[]):
        ni = in_shape[0]
        nf = self.args['out_channels']
        
        # Update the args
        self.args['in_channels'] = ni
    
        # Adjust stride
        exp_shape = self.calculate_output(in_shape)
        if exp_shape[1:] != out_shape[1:]:
            stride = tuple(self._calculate_stride(i, o) for i, o in zip(in_shape[1:], out_shape[1:]))
            self.args['stride'] = stride
        
        # Get original parameters
        params = dict((name, value.data) for (name, value) in self.layer.named_parameters())
        
        # Expand original parameters
        expanded_weight = Tensor(nf, ni, *self.args['kernel_size'])
        expanded_weight[:, :self.args['in_channels'], ...] = params['weight']
        for o, c in expanded:
            if o < 0:
                expanded_weight[:, c, ...].fill_(0.)
            else:
                expanded_weight[:, c, ...] = params['weight'][:, o, ...]
        expanded_weight = add_noise(expanded_weight)
    
        # Update the layer
        new_layer = nn.Conv2d(**self.args)
        new_layer.weight.data = expanded_weight
        if self.args['bias']: new_layer.bias.data = params['bias']
        self.layer = new_layer
        
        # Generate new output shape
        out_shape = self.calculate_output(in_shape)
        return out_shape, []
    
    def updated_dest(self, in_shape, out_shape, expanded=[]):
        ni = self.args['in_channels']
        nf = out_shape[0] if out_shape[0] > 0 else self.args['out_channels']
        prev_nf = self.args['out_channels']

        # Update the args
        self.args['out_channels'] = nf
        
        # Adjust stride
        exp_shape = self.calculate_output(in_shape)
        if exp_shape[1:] != out_shape[1:]:
            stride = tuple(ceil_power_of_two(i, o) for i, o in zip(in_shape[1:], out_shape[1:]))
            self.args['stride'] = stride
        
        # Get original parameters
        params = dict((name, value.data) for (name, value) in self.layer.named_parameters())
        
        # Expand original parameters
        expanded_params = {}
        expanded_params['weight'] = Tensor(nf, ni, *self.args['kernel_size'])
        if self.args['bias']: expanded_params['bias'] = Tensor(nf)
        
        rand = randint(prev_nf, (nf - prev_nf,))
        for key in params.keys():
            expanded_params[key][:prev_nf, ...] = params[key]
        
            for o, c in expanded:
                if o < 0:
                    expanded_params[key][c, ...].fill_(0.)
                else:
                    expanded_params[key][c, ...] = expanded_params[key][o, ...] * COPY_RATIO
                    expanded_params[key][o, ...] = expanded_params[key][o, ...] * (1 - COPY_RATIO)
            add_noise(expanded_params[key])
        
        # Update the layer
        new_layer = nn.Conv2d(**self.args)
        new_layer.weight.data = expanded_params['weight']
        if self.args['bias']: new_layer.bias.data = expanded_params['bias']
        self.layer = new_layer
        
        # Generate new input shape
        in_shape = (ni,) + in_shape[1:]
        return in_shape, []
    
    def calculate_output(self, in_shape):
        if(in_shape[0] != self.args['in_channels']): return None
        nf = self.args['out_channels']
        attr = zip(in_shape[1:],
                   self.args['kernel_size'],
                   self.args['stride'],
                   self.args['padding'])
        ch = tuple([(x + 2 * pd - ks) // st + 1 for x, ks, st, pd in attr])
        out_shape = (nf,) + ch
        return out_shape

In [94]:
# export
class IdenticalEdge(Edge):
    def __init__(self, src, dest):
        super(IdenticalEdge, self).__init__(src,
                                            dest,
                                            layer=None,
                                            identical=True)

In [72]:
!python nb2py.py edges.ipynb

Converted edges.ipynb to exp/nb_edges.py


# Test

Flatten

In [65]:
e = FlattenEdge(1, 2)

In [66]:
e.updated_src((1, 32, 64), (256)) # expected: 1*32*64=2048

((2048,), [])

BatchNorm

In [22]:
e = BatchNormEdge(1, 2, num_features=4)
dict(e.layer.named_parameters())

{'weight': Parameter containing:
 tensor([0.4726, 0.2227, 0.4838, 0.9731], requires_grad=True),
 'bias': Parameter containing:
 tensor([0., 0., 0., 0.], requires_grad=True)}

In [23]:
e.updated_src((8, 16, 16), (16, 16, 16), expanded=[(0, 4), (0, 5), (1, 6), (2, 7)])
# expected: ((8, 16, 16), [(0, 4), (0, 5), (1, 6), (2, 7)])

((8, 16, 16), [(0, 4), (0, 5), (1, 6), (2, 7)])

In [24]:
dict(e.layer.named_parameters())

{'weight': Parameter containing:
 tensor([0.4726, 0.2227, 0.4838, 0.9731, 0.4725, 0.4726, 0.2227, 0.4838],
        requires_grad=True), 'bias': Parameter containing:
 tensor([0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)}

In [25]:
e = BatchNormEdge(1, 2, num_features=4)
dict(e.layer.named_parameters())

{'weight': Parameter containing:
 tensor([0.8387, 0.7745, 0.4892, 0.8002], requires_grad=True),
 'bias': Parameter containing:
 tensor([0., 0., 0., 0.], requires_grad=True)}

In [27]:
e.updated_dest((4, 16, 16), (8, 16, 16), expanded=[(0, 4), (0, 5), (1, 6), (2, 7)])
# expected: ((8, 16, 16), [(0, 4), (0, 5), (1, 6), (2, 7)])

((8, 16, 16), [(0, 4), (0, 5), (1, 6), (2, 7)])

In [28]:
dict(e.layer.named_parameters())

{'weight': Parameter containing:
 tensor([0.8388, 0.7745, 0.4893, 0.8002, 0.8388, 0.8387, 0.7745, 0.4892],
        requires_grad=True), 'bias': Parameter containing:
 tensor([0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)}

ReLU

In [29]:
e = ReluEdge(1, 2)

In [30]:
e.updated_src((4, 32, 32), (8, 32, 32)) # expected: ((4, 32, 32), [])

((4, 32, 32), [])

In [31]:
e.updated_dest((4, 32, 32), (8, 32, 32)) # expected: ((8, 32, 32), [])

((8, 32, 32), [])

Conv2d

In [79]:
e = ConvEdge(1, 2, in_channels=1, out_channels=2, kernel_size=3)
dict(e.layer.named_parameters())

{'weight': Parameter containing:
 tensor([[[[ 0.1053, -0.1115,  0.2315],
           [ 0.2143, -0.0862, -0.0453],
           [-0.1009,  0.2997,  0.1473]]],
 
 
         [[[-0.2723, -0.1759, -0.1692],
           [-0.1313,  0.2518, -0.0463],
           [-0.0440, -0.3183,  0.0468]]]], requires_grad=True),
 'bias': Parameter containing:
 tensor([0.0240, 0.0457], requires_grad=True)}

In [80]:
e.updated_src((3, 64, 64), (2, 32, 32), expanded=[(0, 1), (0, 2)])
# expected: ((2, 32, 32), [(0, 1), (0, 2)])

((2, 32, 32), [])

In [81]:
dict(e.layer.named_parameters())

{'weight': Parameter containing:
 tensor([[[[ 0.1053, -0.1115,  0.2315],
           [ 0.2143, -0.0862, -0.0453],
           [-0.1008,  0.2997,  0.1473]],
 
          [[ 0.1053, -0.1115,  0.2316],
           [ 0.2143, -0.0862, -0.0453],
           [-0.1009,  0.2997,  0.1473]],
 
          [[ 0.1053, -0.1115,  0.2315],
           [ 0.2143, -0.0862, -0.0453],
           [-0.1009,  0.2997,  0.1473]]],
 
 
         [[[-0.2722, -0.1759, -0.1692],
           [-0.1313,  0.2517, -0.0464],
           [-0.0441, -0.3183,  0.0468]],
 
          [[-0.2723, -0.1759, -0.1692],
           [-0.1313,  0.2517, -0.0463],
           [-0.0440, -0.3183,  0.0468]],
 
          [[-0.2723, -0.1759, -0.1692],
           [-0.1312,  0.2517, -0.0464],
           [-0.0440, -0.3183,  0.0468]]]], requires_grad=True),
 'bias': Parameter containing:
 tensor([0.0240, 0.0457], requires_grad=True)}

In [68]:
e = ConvEdge(1, 2, in_channels=1, out_channels=2, kernel_size=3)
dict(e.layer.named_parameters())

{'weight': Parameter containing:
 tensor([[[[-0.1795, -0.1808, -0.0885],
           [ 0.1792, -0.0700,  0.0561],
           [ 0.2513,  0.2168,  0.2726]]],
 
 
         [[[-0.2957, -0.0678, -0.0680],
           [ 0.2473,  0.2039, -0.2607],
           [ 0.1112, -0.0219,  0.2473]]]], requires_grad=True),
 'bias': Parameter containing:
 tensor([0.1575, 0.1056], requires_grad=True)}

In [72]:
e.updated_dest((1, 32, 32), (3, 32, 32), expanded=[(0, 1), (0, 2)])
# expected: ((1, 32, 32), [])

((1, 32, 32), [])

In [73]:
dict(e.layer.named_parameters())

{'weight': Parameter containing:
 tensor([[[[-0.1178, -0.1186, -0.0581],
           [ 0.1176, -0.0459,  0.0368],
           [ 0.1649,  0.1423,  0.1788]]],
 
 
         [[[-0.0145, -0.0146, -0.0072],
           [ 0.0145, -0.0057,  0.0045],
           [ 0.0204,  0.0176,  0.0221]]],
 
 
         [[[-0.0131, -0.0132, -0.0065],
           [ 0.0131, -0.0051,  0.0041],
           [ 0.0183,  0.0158,  0.0199]]]], requires_grad=True),
 'bias': Parameter containing:
 tensor([0.1033, 0.0128, 0.0115], requires_grad=True)}

Linear

In [67]:
e = LinearEdge(1, 2, in_features=8, out_features=4)
dict(e.layer.named_parameters())

{'weight': Parameter containing:
 tensor([[ 0.2308, -0.0498, -0.1879,  0.2135, -0.3400,  0.3394,  0.2523,  0.1279],
         [-0.2935, -0.0145, -0.3510, -0.1728,  0.1548,  0.0996,  0.1312,  0.2018],
         [ 0.1953, -0.0719, -0.0529,  0.0881, -0.1222,  0.2442, -0.1137, -0.0499],
         [-0.2452,  0.3248, -0.2244,  0.0757, -0.2155, -0.1099, -0.2374,  0.0080]],
        requires_grad=True), 'bias': Parameter containing:
 tensor([-0.0986,  0.0423, -0.3287,  0.2617], requires_grad=True)}

In [50]:
e.updated_src((12,), (4,), expanded=[(0, 8), (1, 9), (2, 10), (3, 11)])
#expected output: ((4,), [])

((4,), [])

In [51]:
dict(e.layer.named_parameters())

{'weight': Parameter containing:
 tensor([[-3.3364e-01,  2.1472e-02, -8.7307e-02, -1.9922e-01, -1.4765e-01,
          -2.5906e-04,  2.6748e-01,  2.1161e-01, -3.3358e-01,  2.1466e-02,
          -8.7265e-02, -1.9922e-01],
         [-2.6385e-01, -2.4632e-01, -1.8650e-01,  3.0797e-01,  1.4379e-01,
          -2.5802e-02,  1.2031e-02,  2.5626e-01, -2.6388e-01, -2.4634e-01,
          -1.8650e-01,  3.0802e-01],
         [-3.2306e-01, -3.2754e-01,  1.6925e-01, -2.9471e-01, -2.1961e-01,
           3.2708e-01,  1.8376e-01,  2.1620e-01, -3.2307e-01, -3.2758e-01,
           1.6929e-01, -2.9472e-01],
         [ 2.1766e-01,  2.3562e-02,  1.3202e-01, -3.4360e-01,  2.4459e-01,
          -3.4941e-01, -2.6102e-01,  1.4828e-01,  2.1764e-01,  2.3578e-02,
           1.3202e-01, -3.4361e-01]], requires_grad=True),
 'bias': Parameter containing:
 tensor([-0.1665, -0.2551, -0.3307, -0.0511], requires_grad=True)}

In [56]:
x = (3, 5)

In [57]:
x = (2, 5, *x); x

(2, 5, 3, 5)