In [64]:
import torch
from torchsummary import summary
from exp.nb_layer import *
from math import ceil, sqrt
import graphviz
import numpy as np

In [65]:
#export
import torch
from math import ceil
from exp.nb_layer import *
from exp.nb_graph_transformer import *
from graphviz import Digraph

# Graph

In [49]:
#export
NOISE_RATIO = 1e-4

In [50]:
#export
class Graph:
    def __init__(self, input_shape, output_shape):
        self.nodes={}
        self.edges={}
        self._node_index = 0
        self._edge_index = 0
        
        self.input = self.add_node(input_shape)
        self.output = self.add_node(output_shape)
        
    
    def add_node(self, shape, multi_input=False, layer=None):
        node = Node(shape, multi_input, layer)
        self._node_index = self._node_index + 1
        self.nodes[self._node_index] = node
        return self._node_index
    
    def insert_node(self, src, multi_input=False, layer=None):
        if multi_input:
            new_node = self.add_node(self.nodes[src].shape, multi_input, layer)
            self.nodes[new_nodede].out_edge = self.nodes[src].out_edge
            for edge in self.nodes[new_node].out_edge:
                self.edges[edge].src = new_node
            self.nodes[src].out_edge = []
            self.add_edge(src, new_node, identical=True)
        else:
            # add new node
            new_node = self.add_node(shape=self.nodes[src].shape)
            
            # redirect edges
            self.nodes[new_node].out_edge = self.nodes[src].out_edge
            for edge in self.nodes[new_node].out_edge:
                self.edges[edge].src = new_node
            self.nodes[src].out_edge = []
            self.add_edge(src, new_node, layer=layer, identical=False)
        
        return new_node
    
    def add_edge(self, src, dest, layer=None, identical=False):
        edge = Edge(src, dest, layer, identical)
        
        self._edge_index = self._edge_index + 1
        self.edges[self._edge_index] = edge
        
        self.nodes[src].add_out_edge(self._edge_index)
        self.nodes[dest].add_in_edge(self._edge_index)
        
        return self._edge_index
      
    def visualize(self, filename, path):
        digraph = Digraph(comment="Model")
        for node in self.nodes:
            digraph.node(str(node), label=str(node) + str(self.nodes[node].shape))
        for id in self.edges:
            edge = self.edges[id]
            digraph.edge(str(edge.src), str(edge.dest), label="id" if edge.identical else str(edge.as_layer()))
        
        digraph.format='svg'
        digraph.filename=filename
        digraph.directory=path
        digraph.render(view=False)

    def _node_as_layer(self, id):
        node = self.nodes[id]
        layer = node.layer()
        inputs = []
        
        for edge_id in node.in_edge:
            inputs.append(self.edges[edge_id].src)
        
        return (layer, inputs, id)
    
    def _reverse_traversal(self, id, visited):
#         print('traveling on {}'.format(id))
        ts = []
        visited[id] = True
#         print('current visited: {}'.format(visited))
        for edge_id in self.nodes[id].in_edge:
            edge = self.edges[edge_id]
            if not visited[edge.src]: ts.extend(self._reverse_traversal(edge.src, visited))
            if not edge.identical: ts.append((edge.as_layer(), edge.src, edge.dest))
        if self.nodes[id].multi_input:
            ts.append((self._node_as_layer(id)))
#         print('on {}: {}'.format(id, ts))
        return ts
    
    def generate_model(self):
        visited = {}
        for key in self.nodes.keys():
            visited[key] = False
        
        ts = self._reverse_traversal(2, visited)
#         print(ts)
        return Generator(ts)      

In [51]:
#export
class Node:
    def __init__(self, shape, multi_input=False, layer=None):
        self.shape = shape
        self.multi_input = multi_input
        self.in_edge = []
        self.out_edge = []
        self.layer = layer
        
    def add_in_edge(self, edge):
        self.in_edge.append(edge)
    
    def add_out_edge(self, edge):
        self.out_edge.append(edge)
    
    def num_output(self):
        return len(self.out_edge)
            
    def set_shape(self, shape):
        self.shape = shape

### TODO on Edges:
    After modifying layers, edge should return (output shape) and (expanded features)
    Output shape will be used to compare if next or prev layer should be also modified or not.
    Expanded feature will be used to figure out which channels to be doubled or half

### OR:
    Do with zero-padding if impossible to do.

expanded: represents (original, copied) value.

In [66]:
#export
class Edge:
    def __init__(self, src, dest, layer, identical=False):
        self.src = src
        self.dest = dest
        self.identical = identical
        if not identical:
            self.layer = layer
    
    def as_layer(self):
        return self.layer
    
    def updated_src(self, in_shape, expanded=[]):
        pass
    
    def updated_dest(self, out_shape, expanded=[]):
        pass

In [125]:
#export
def add_noise(weight):
    noise_range = np.ptp(weight.flatten()) * NOISE_RATIO
    noise = np.random.uniform(-noise_range/2.0, noise_range/2.0, weight.shape)
    return np.add(weight, noise)

In [126]:
#export
class FlattenEdge(Edge):
    def __init__(self, src, dest):
        super(FlattenEdge, self).__init__(src, dest, layer=Flatten, identical=False)
    
    def updated_src(self, in_shape, expanded=[]):
        total = 1
        nf = in_shape[0]
        for i in in_shape:
            total = total * i
        out_shape = (total,)
        feature_size = total // nf
        
        new_expanded = []
        for o, c in expanded:
            origin = range(o*feature_size, (o+1)*feature_size)
            copied = range(c*feature_size, (c+1)*feature_size)
            new_expanded.extend((o, c) for o, c in zip(origin, copied))
            
        return out_shape, new_expanded

In [127]:
#export
class BatchNormEdge(Edge):
    
    def __init__(self, src, dest, num_features):
        self.args = {
            'num_features': num_features,
        }
        super(BatchNormEdge, self).__init__(src, 
                                          dest, 
                                          layer=torch.nn.BatchNorm2d(**self.args), 
                                          identical=False)
        
    def updated_src(self, in_shape, expanded=[]):
        
        nf = in_shape[0]
        extended_params = {
            'weight': torch.Tensor(nf),
            'bias': torch.Tensor(nf)
        }
        
        # Get original parameters.
        params = dict((name, value.data) for (name, value) in self.layer.named_parameters())
        
        # Expand original parameters
        for key in params.keys():
            extended_params[key][:len(params[key])] = params[key]
            for o, c in expanded:
                extended_params[key][c] = extended_params[key][o]
            extended_params[key] = add_noise(extended_params[key]).float()
        
        # Update argument
        self.args['num_features'] = nf
        
        # Update the layer
        new_layer = torch.nn.BatchNorm2d(**self.args)
        new_layer.weight.data = extended_params['weight']
        new_layer.bias.data = extended_params['bias']
        self.layer = new_layer
        
        # Generate new output shape
        out_shape = in_shape
        return out_shape, expanded
    
    def updated_dest(self, out_shape, expanded=[]):
        return out_shape, expanded
        

In [81]:
#export
class ReluEdge(Edge):
    def __init__(self, src, dest, num_features):
        self.args = {
        }
        super(ReluEdge, self).__init__(src,                                       
                                       dest, 
                                       layer=torch.nn.ReLU(), 
                                       identical=False)
        
    def updated_src(self, in_shape, expanded=[]):
        return in_shape, expanded
    
    def updated_dest(self, out_shape, expanded=[]):
        return out_shape, expanded
        

In [128]:
#export
class PoolingEdge(Edge):
    def __init__(self):
        pass
    
    def _set_args(self, kernel_size, stride, padding):
        stride = kernel_size if stride == None else stride
        stride = (stride, stride) if isinstance(stride, int) else stride
        kernel_size = (kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
        padding = (padding, padding) if isinstance(padding, int) else padding
        
        self.args = {
            'kernel_size': kernel_size,
            'stride': stride,
            'padding': padding,
            'ceil_mode': True
        }      

    def updated_src(self, in_shape, expanded=[]):
        if len(in_shape) != 3: raise Exception('Unsuitable input shape')

        attr = zip(in_shape[1:], 
                   self.args['kernel_size'], 
                   self.args['stride'], 
                   self.args['padding'])
        out_shape = (in_shape[0],) + tuple(self.calculate_output(x, ks, st, pd) 
                                           for x, ks, st, pd in attr)

        return out_shape, expanded

    def calculate_output(self, value, ks, st, pd):
        return ceil((value + 2 * pd - ks) / st + 1)

In [129]:
#export
class MaxPoolingEdge(PoolingEdge):
    def __init__(self, src, dest, kernel_size, stride=None, padding=0):
        self._set_args(kernel_size=kernel_size, stride=stride, padding=padding)
        super(PoolingEdge, self).__init__(src, 
                                          dest, 
                                          layer=torch.nn.MaxPool2d(**self.args), 
                                          identical=False)

In [130]:
#export
class AvgPoolingEdge(PoolingEdge):
    def __init__(self, src, dest, kernel_size, stride=None, padding=0):
        self._set_args(kernel_size=kernel_size, stride=stride, padding=padding)
        super(PoolingEdge, self).__init__(src, 
                                          dest, 
                                          layer=torch.nn.AvgPool2d(**self.args), 
                                          identical=False)

In [133]:
#export
class LinearEdge(Edge):
    def __init__(self, src, dest, in_features, out_features, bias=True):
        self.args = {
            'in_features': in_features,
            'out_features': out_features,
            'bias': bias
        }
        super(LinearEdge, self).__init__(src,
                                         dest,
                                         layer=torch.nn.Linear(**self.args),
                                         identical=False)
    
    def updated_src(self, in_shape, expanded=[]):
        ni = in_shape[0]
        
        # Get original parameters
        params = dict((name, value.data) for (name, value) in self.layer.named_parameters())
        
        # Expand original parameters
        expanded_weight = torch.Tensor(self.args['out_features'], ni)
        expanded_weight[:,:self.args['in_features']] = params['weight']
        for o, c in expanded:
            expanded_weight[:,c] = params['weight'][:,o]
        expanded_weight = add_noise(expanded_weight).float()
        
        # Update the args
        self.args['in_features'] = ni
        
        # Update the layer
        new_layer = torch.nn.Linear(**self.args)
        new_layer.weight.data = expanded_weight
        new_layer.bias.data = params['bias']
        self.layer = new_layer
        
        # Generate new output shape
        out_shape = (self.args['out_features'],)
        return out_shape, []

In [134]:
class ConvEdge(Edge):
    def __init__(self, src, dest, 
                 in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
        stride = kernel_size if stride == None else stride
        stride = (stride, stride) if isinstance(stride, int) else stride
        kernel_size = (kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
        padding = (padding, padding) if isinstance(padding, int) else padding
        
        self.args = {
            'in_channels': in_channels,
            'out_channels': out_channels,
            'kernel_size': kernel_size,
            'stride': stride,
            'padding': padding,
            'bias': bias
        }
        super(ConvEdge, self).__init__(src, 
                                       dest,
                                       layer=torch.nn.Conv2d(**self.args),                                           
                                       identical=False)
    def updated_src(self, in_shape, expanded=[]):
        ni = in_shape[0]
        nf = self.args['out_channels']
        
        # Get original parameters
        params = dict((name, value.data) for (name, value) in self.layer.named_parameters())
        
        # Expand original parameters
        expanded_weight = torch.Tensor(nf,
                                       ni, 
                                       self.args['kernel_size'][0], 
                                       self.args['kernel_size'][1])
        expanded_weight[:,:self.args['in_channels'],:,:] = params['weight']
        for o, c in expanded:
            expanded_weight[:,c,:,:] = params['weight'][:,o,:,:]
        expanded_weight = add_noise(expanded_weight).float()
        
        # Update the args
        self.args['in_channels'] = ni
        
        # Update the layer
        new_layer = torch.nn.Conv2d(**self.args)
        new_layer.weight.data = expanded_weight
        new_layer.bias.data = params['bias']
        self.layer = new_layer
        
        # Generate new output shape
        attr = zip(in_shape[1:], 
                   self.args['kernel_size'], 
                   self.args['stride'], 
                   self.args['padding'])
        out_shape = (nf,) + tuple([self.calculate_output(x, ks, st, pd) 
                                   for x, ks, st, pd in attr])
        return out_shape, []
    
    def updated_dest(self, out_shape):
        ni = self.args['in_channels']
        nf = out_shape[0]
        prev_nf = self.args['out_channels']
        expanded = []
        
        # Get original parameters
        params = dict((name, value.data) for (name, value) in self.layer.named_parameters())
        
        # Expand original parameters
        expanded_params = {}
        expanded_params['weight'] = torch.Tensor(nf,
                                                 ni,
                                                 self.args['kernel_size'][0],
                                                 self.args['kernel_size'][1])
        if self.args['bias']: expanded_params['bias'] = torch.Tensor(nf)
        
        rand = torch.randint(prev_nf, (nf-prev_nf,))
        for key in params.keys():
            expanded_params[key][:prev_nf, ...] = params[key]
        
            for i in range(prev_nf, nf):
                teacher_id = rand[i-prev_nf]
                expanded_params[key][i, ...] = expanded_params[key][teacher_id, ...] * 0.1
                expanded_params[key][teacher_id, ...] = expanded_params[key][teacher_id, ...] * 0.9
                
            add_noise(expanded_params[key])
            
        # Generate expanded
        for i in range(prev_nf, nf):
            expanded.append((rand[i-prev_nf].item(), i))
        
        # Update the args
        self.args['out_channels'] = nf
        
        # Update the layer
        new_layer = torch.nn.Conv2d(**self.args)
        new_layer.weight.data = expanded_params['weight']
        new_layer.bias.data = expanded_params['bias']
        self.layer = new_layer
        
        # Generate new output shape
        out_shape = (nf,) + out_shape[1:]
        return out_shape, expanded
    
    def calculate_output(self, x, ks, st, pd):
        return (x + 2 * pd - ks)//st + 1      

In [120]:
m = ConvEdge(1, 2, 2, 4, 3)
dict(m.layer.named_parameters())

{'weight': Parameter containing:
 tensor([[[[-1.6480e-01, -4.2588e-02,  8.8091e-02],
           [ 9.7219e-02,  1.4172e-01, -7.5192e-02],
           [ 4.3262e-02, -2.0708e-01, -8.1371e-02]],
 
          [[ 2.9443e-02,  2.2053e-01,  4.1755e-03],
           [ 8.0220e-02,  2.0411e-01,  7.8606e-02],
           [ 2.0686e-01,  1.9042e-02,  2.2630e-01]]],
 
 
         [[[-2.0151e-02,  9.1225e-02, -2.3376e-01],
           [ 1.5859e-01, -1.6063e-01,  2.8137e-02],
           [-1.0477e-01, -6.6642e-02,  6.4592e-02]],
 
          [[-3.4223e-02, -1.6036e-01,  1.6803e-01],
           [-1.6788e-01,  2.2060e-04,  2.1492e-01],
           [ 1.1733e-02,  6.4486e-02, -1.5470e-01]]],
 
 
         [[[ 1.6696e-01,  1.6720e-01,  1.0265e-01],
           [-3.8589e-02, -1.9566e-01,  1.6071e-01],
           [-1.6307e-01,  1.8633e-01, -1.7462e-01]],
 
          [[-1.6424e-01, -1.6820e-01,  1.1688e-01],
           [-7.1783e-02,  1.7585e-01, -1.8183e-01],
           [-2.0020e-01,  1.7580e-01, -8.9867e-02]]],
 
 
    

In [123]:
expanded = m.updated_dest((16, 3, 3))
dict(m.layer.named_parameters())

{'weight': Parameter containing:
 tensor([[[[-1.2014e-01, -3.1046e-02,  6.4219e-02],
           [ 7.0873e-02,  1.0331e-01, -5.4815e-02],
           [ 3.1538e-02, -1.5096e-01, -5.9319e-02]],
 
          [[ 2.1464e-02,  1.6077e-01,  3.0440e-03],
           [ 5.8481e-02,  1.4879e-01,  5.7304e-02],
           [ 1.5080e-01,  1.3881e-02,  1.6497e-01]]],
 
 
         [[[-1.4690e-02,  6.6503e-02, -1.7041e-01],
           [ 1.1562e-01, -1.1710e-01,  2.0512e-02],
           [-7.6378e-02, -4.8582e-02,  4.7087e-02]],
 
          [[-2.4948e-02, -1.1690e-01,  1.2250e-01],
           [-1.2239e-01,  1.6082e-04,  1.5668e-01],
           [ 8.5534e-03,  4.7010e-02, -1.1277e-01]]],
 
 
         [[[ 1.3524e-01,  1.3543e-01,  8.3145e-02],
           [-3.1257e-02, -1.5849e-01,  1.3017e-01],
           [-1.3208e-01,  1.5093e-01, -1.4144e-01]],
 
          [[-1.3304e-01, -1.3624e-01,  9.4672e-02],
           [-5.8144e-02,  1.4244e-01, -1.4728e-01],
           [-1.6216e-01,  1.4240e-01, -7.2793e-02]]],
 
 
    

In [124]:
expanded

((16, 3, 3),
 [(0, 5),
  (2, 6),
  (4, 7),
  (1, 8),
  (4, 9),
  (0, 10),
  (1, 11),
  (2, 12),
  (1, 13),
  (0, 14),
  (3, 15)])

# Export

In [135]:
!python nb2py.py graph.ipynb

Converted graph.ipynb to exp/nb_graph.py


# Test

In [7]:
from fastai.vision import *

## Vanilla CNN

In [8]:
path = untar_data(URLs.MNIST)

In [9]:
data = ImageList.from_folder(path, convert_mode="L").split_by_folder(train="training", valid="testing").label_from_folder().databunch(bs=128)

In [10]:
gr = Graph(input_shape=(1, 28, 28), output_shape=(10,))

In [11]:
next = add_conv_block(gr, gr.input, nf=16, ks=3, pd=1, st=2)

In [12]:
next = add_conv_block(gr, next, nf=32, ks=3, pd=1, st=2)

In [13]:
next = add_flatten_layer(gr, next)

In [14]:
next = add_linear_layer(gr, next, no=4*7*7)

In [15]:
next = add_linear_layer(gr, next, gr.output, no=10)

In [16]:
gr.visualize('cnn', './')

In [17]:
module = gr.generate_model()

In [18]:
learn = Learner(data, module, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [19]:
learn.summary()

Generator
Layer (type)         Output Shape         Param #    Trainable 
Conv2d               [16, 14, 14]         160        True      
______________________________________________________________________
BatchNorm2d          [16, 14, 14]         32         True      
______________________________________________________________________
ReLU                 [16, 14, 14]         0          False     
______________________________________________________________________
Conv2d               [32, 7, 7]           4,640      True      
______________________________________________________________________
BatchNorm2d          [32, 7, 7]           64         True      
______________________________________________________________________
ReLU                 [32, 7, 7]           0          False     
______________________________________________________________________
Flatten              [1568]               0          False     
____________________________________________________

In [None]:
learn.fit_one_cycle(5)

## ResNet CNN

In [20]:
path_cifar = untar_data(URLs.CIFAR)

In [21]:
data_cifar = ImageList.from_folder(path_cifar).split_by_folder(train="train", valid="test").label_from_folder().databunch(bs=128)

In [22]:
gr_cifar = Graph((3, 32, 32), (10,))

In [23]:
add_res_net(gr_cifar, gr_cifar.input, gr_cifar.output, 1, [2, 2, 2, 2])

2

In [24]:
gr_cifar.visualize('res', './')

In [25]:
module = gr_cifar.generate_model()

In [26]:
learn = Learner(data_cifar, module, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [27]:
learn.summary()

Generator
Layer (type)         Output Shape         Param #    Trainable 
Conv2d               [32, 16, 16]         896        True      
______________________________________________________________________
Conv2d               [64, 16, 16]         18,496     True      
______________________________________________________________________
Conv2d               [64, 16, 16]         36,928     True      
______________________________________________________________________
MaxPool2d            [64, 9, 9]           0          False     
______________________________________________________________________
Conv2d               [64, 9, 9]           36,928     True      
______________________________________________________________________
BatchNorm2d          [64, 9, 9]           128        True      
______________________________________________________________________
ReLU                 [64, 9, 9]           0          False     
____________________________________________________

In [None]:
learn.fit_one_cycle(5, )

## DenseNet CNN

In [28]:
path_cifar = untar_data(URLs.CIFAR)

In [29]:
data_cifar = ImageList.from_folder(path_cifar).split_by_folder(train="train", valid="test").label_from_folder().databunch(bs=128)

In [30]:
gr_cifar = Graph((3, 32, 32), (10,))

In [31]:
add_dense_net(gr_cifar, gr_cifar.input, gr_cifar.output)

2

In [32]:
gr_cifar.visualize('dense', './')

In [33]:
module = gr_cifar.generate_model()

In [34]:
learn = Learner(data_cifar, module, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [35]:
learn.summary()

Generator
Layer (type)         Output Shape         Param #    Trainable 
Conv2d               [64, 16, 16]         9,408      True      
______________________________________________________________________
MaxPool2d            [64, 9, 9]           0          False     
______________________________________________________________________
BatchNorm2d          [64, 9, 9]           128        True      
______________________________________________________________________
ReLU                 [64, 9, 9]           0          False     
______________________________________________________________________
Conv2d               [128, 9, 9]          8,192      True      
______________________________________________________________________
BatchNorm2d          [128, 9, 9]          256        True      
______________________________________________________________________
ReLU                 [128, 9, 9]          0          False     
____________________________________________________

In [41]:
learn.fit_one_cycle(20,)

epoch,train_loss,valid_loss,accuracy,time
0,1.344585,1.439104,0.5004,01:02
1,1.181719,1.440325,0.5249,01:01
2,1.686903,92.664459,0.3047,01:06
3,1.739275,1.716761,0.4661,01:05
4,1.514331,13.61487,0.5096,01:03
5,1.277187,1.193492,0.57,01:04
6,0.985121,1.080485,0.6174,00:59
7,0.849237,0.920321,0.6767,01:07
8,0.743322,1.363522,0.533,01:03
9,0.642034,1.113415,0.6477,00:55


In [45]:
learn.fit_one_cycle(20, max_lr=1e-4)

epoch,train_loss,valid_loss,accuracy,time
0,0.000843,1.462713,0.7891,01:02
1,0.000762,1.493414,0.7903,01:01
2,0.000672,1.538346,0.7891,01:02
3,0.000426,1.614513,0.7887,00:57
4,0.000733,1.678316,0.7855,01:04
5,0.000371,1.738605,0.7867,00:59


KeyboardInterrupt: 

In [8]:
torch.randint(10, (10,)).float() / 1e1

tensor([0.5000, 0.4000, 0.4000, 0.7000, 0.3000, 0.4000, 0.3000, 0.7000, 0.6000,
        0.0000])

In [92]:
dict(torch.nn.Conv2d(3, 16, 3).named_parameters())

{'weight': Parameter containing:
 tensor([[[[ 5.1411e-02, -4.0102e-02,  1.4054e-01],
           [ 1.0954e-01,  4.5523e-02,  1.6302e-01],
           [ 3.5304e-02,  2.5018e-04,  1.1435e-01]],
 
          [[ 1.5500e-01,  8.9932e-02,  8.3834e-03],
           [ 8.7250e-02,  1.2378e-01,  1.8330e-02],
           [ 1.5120e-03,  5.6584e-02, -1.3523e-02]],
 
          [[-1.4683e-01,  5.8540e-02, -1.8603e-01],
           [-1.7999e-01, -6.4203e-02,  5.0209e-02],
           [ 1.8808e-01,  4.4955e-03,  6.7790e-02]]],
 
 
         [[[-1.7515e-01, -2.5866e-02, -5.1076e-02],
           [ 9.5340e-02, -4.2842e-02,  7.4488e-02],
           [ 1.0444e-01, -1.7367e-01, -2.0583e-02]],
 
          [[-3.8579e-03,  2.0767e-02, -1.1771e-01],
           [-1.5477e-01,  5.6127e-02, -1.2078e-01],
           [ 9.2482e-02, -5.3869e-02,  1.4729e-02]],
 
          [[-1.3150e-01,  1.7922e-02,  1.4004e-01],
           [-1.1202e-01, -1.8952e-01, -1.2195e-01],
           [ 1.2893e-01, -1.9195e-01, -6.3861e-02]]],
 
 
       