In [1]:
import torch
from torchsummary import summary
from exp.nb_layer import *
from math import ceil
import graphviz

In [2]:
#export
import torch
from math import ceil
from exp.nb_layer import *
from exp.nb_graph_transformer import *
from graphviz import Digraph

# Graph

In [101]:
#export
NOISE_RATIO = 1e-4

In [40]:
#export
class Graph:
    def __init__(self, input_shape, output_shape):
        self.nodes={}
        self.edges={}
        self._node_index = 0
        self._edge_index = 0
        
        self.input = self.add_node(input_shape)
        self.output = self.add_node(output_shape)
        
    
    def add_node(self, shape, multi_input=False, layer=None):
        node = Node(shape, multi_input, layer)
        self._node_index = self._node_index + 1
        self.nodes[self._node_index] = node
        return self._node_index
    
    def insert_node(self, src, multi_input=False, layer=None):
        if multi_input:
            new_node = self.add_node(self.nodes[src].shape, multi_input, layer)
            self.nodes[new_nodede].out_edge = self.nodes[src].out_edge
            for edge in self.nodes[new_node].out_edge:
                self.edges[edge].src = new_node
            self.nodes[src].out_edge = []
            self.add_edge(src, new_node, identical=True)
        else:
            # add new node
            new_node = self.add_node(shape=self.nodes[src].shape)
            
            # redirect edges
            self.nodes[new_node].out_edge = self.nodes[src].out_edge
            for edge in self.nodes[new_node].out_edge:
                self.edges[edge].src = new_node
            self.nodes[src].out_edge = []
            self.add_edge(src, new_node, layer=layer, identical=False)
        
        return new_node
    
    def add_edge(self, src, dest, layer=None, identical=False):
        edge = Edge(src, dest, layer, identical)
        
        self._edge_index = self._edge_index + 1
        self.edges[self._edge_index] = edge
        
        self.nodes[src].add_out_edge(self._edge_index)
        self.nodes[dest].add_in_edge(self._edge_index)
        
        return self._edge_index
      
    def visualize(self, filename, path):
        digraph = Digraph(comment="Model")
        for node in self.nodes:
            digraph.node(str(node), label=str(node) + str(self.nodes[node].shape))
        for id in self.edges:
            edge = self.edges[id]
            digraph.edge(str(edge.src), str(edge.dest), label="id" if edge.identical else str(edge.as_layer()))
        
        digraph.format='svg'
        digraph.filename=filename
        digraph.directory=path
        digraph.render(view=False)

    def _node_as_layer(self, id):
        node = self.nodes[id]
        layer = node.layer()
        inputs = []
        
        for edge_id in node.in_edge:
            inputs.append(self.edges[edge_id].src)
        
        return (layer, inputs, id)
    
    def _reverse_traversal(self, id, visited):
#         print('traveling on {}'.format(id))
        ts = []
        visited[id] = True
#         print('current visited: {}'.format(visited))
        for edge_id in self.nodes[id].in_edge:
            edge = self.edges[edge_id]
            if not visited[edge.src]: ts.extend(self._reverse_traversal(edge.src, visited))
            if not edge.identical: ts.append((edge.as_layer(), edge.src, edge.dest))
        if self.nodes[id].multi_input:
            ts.append((self._node_as_layer(id)))
#         print('on {}: {}'.format(id, ts))
        return ts
    
    def generate_model(self):
        visited = {}
        for key in self.nodes.keys():
            visited[key] = False
        
        ts = self._reverse_traversal(2, visited)
#         print(ts)
        return Generator(ts)      

In [204]:
#export
class Node:
    def __init__(self, shape, multi_input=False, layer=None):
        self.shape = shape
        self.multi_input = multi_input
        self.in_edge = []
        self.out_edge = []
        self.layer = layer
        
    def add_in_edge(self, edge):
        self.in_edge.append(edge)
    
    def add_out_edge(self, edge):
        self.out_edge.append(edge)
    
    def num_output(self):
        return len(self.out_edge)
            
    def set_shape(self, shape):
        self.shape = shape

### TODO on Edges:
    After modifying layers, edge should return (output shape) and (expanded features)
    Output shape will be used to compare if next or prev layer should be also modified or not.
    Expanded feature will be used to figure out which channels to be doubled or half

### OR:
    Do with zero-padding if impossible to do.

In [182]:
#export
class Edge:
    def __init__(self, src, dest, layer, identical=False):
        self.src = src
        self.dest = dest
        self.identical = identical
        if not identical:
            self.layer = layer
    
    def as_layer(self):
        return self.layer
    
    def updated_src(self, in_shape, expanded=[]):
        pass
    
    def updated_dest(self, out_shape, expanded=[]):
        pass

In [183]:
def add_noise(weight):
    noise_range = np.ptp(weight.flatten()) * NOISE_RATIO
    noise = np.random.uniform(-noise_range/2.0, noise_range/2.0, weight.shape)
    return np.add(weight, noise)
    

In [184]:
class FlattenEdge(Edge):
    def __init__(self, src, dest):
        super(FlattenEdge, self).__init__(src, dest, layer=Flatten, identical=False)
    
    def updated_src(self, in_shape):
        total = 1
        for i in in_shape:
            total = total * i
        out_shape = (total,)
        return out_shape

In [203]:
class BatchNormEdge(Edge):
    
    def __init__(self, src, dest, num_features):
        self.args = {
            'num_features': num_features,
        }
        super(BatchNormEdge, self).__init__(src, 
                                          dest, 
                                          layer=torch.nn.BatchNorm2d(**self.args), 
                                          identical=False)
        
    def updated_src(self, in_shape, expanded=[]):
        
        # Check for # of expanded layers
        if self.args['num_features'] + len(expanded) != in_shape[0]: 
            raise Exception('Unsuitable expanded lines')
        
        # Get original parameters.
        params = dict((name, value.data) for (name, value) in self.layer.named_parameters())
        append_params = {}
        
        # Expand original parameters
        for key in params.keys():
            noised_params = add_noise(params[key][expanded]).float()
            params[key] = torch.cat([params[key], noised_params])
        
        # Update the layer
        new_layer = torch.nn.BatchNorm2d(in_shape[0])
        new_layer.weight.data = params['weight']
        new_layer.bias.data = params['bias']
        self.layer = new_layer
        
        # Update argument
        self.args['num_features'] = in_shape[0]
        
        # Generate new output shape
        out_shape = in_shape
        return out_shape
        

In [188]:
class ReluEdge(Edge):
    def __init__(self, src, dest, num_features):
        self.args = {
        }
        super(ReluEdge, self).__init__(src,                                       
                                       dest, 
                                       layer=torch.nn.ReLU(), 
                                       identical=False)
        
    def updated_src(self, in_shape, expanded=[]):
        return in_shape
    
    def updated_dest(self, out_shape, expanded=[]):
        return out_shape
        

In [195]:
class PoolingEdge(Edge):
    def __init__(self):
        pass
    
    def _set_args(self, kernel_size, stride, padding):
        stride = kernel_size if stride == None else stride
        stride = (stride, stride) if isinstance(stride, int) else stride
        kernel_size = (kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
        padding = (padding, padding) if isinstance(padding, int) else padding
        
        self.args = {
            'kernel_size': kernel_size,
            'stride': stride,
            'padding': padding,
            'ceil_mode': True
        }      

    def updated_src(self, in_shape, expanded=[]):
        if len(in_shape) != 3: raise Exception('Unsuitable input shape')

        attr = zip(in_shape[1:], self.args['kernel_size'], self.args['stride'], self.args['padding'])
        out_shape = (in_shape[0],) + tuple(self.calculate_output(x, ks, st, pd) for x, ks, st, pd in attr)

        return out_shape

    def calculate_output(self, value, ks, st, pd):
        return ceil((value + 2 * pd - ks) / st + 1)

In [196]:
class MaxPoolingEdge(PoolingEdge):
    def __init__(self, src, dest, kernel_size, stride=None, padding=0):
        self._set_args(kernel_size=kernel_size, stride=stride, padding=padding)
        super(PoolingEdge, self).__init__(src, 
                                          dest, 
                                          layer=torch.nn.MaxPool2d(**self.args), 
                                          identical=False)

In [197]:
class AvgPoolingEdge(PoolingEdge):
    def __init__(self, src, dest, kernel_size, stride=None, padding=0):
        self._set_args(kernel_size=kernel_size, stride=stride, padding=padding)
        super(PoolingEdge, self).__init__(src, 
                                          dest, 
                                          layer=torch.nn.AvgPool2d(**self.args), 
                                          identical=False)

In [None]:
class LinearEdge(Edge):
    def __init__(self, src, dest, in_features, out_features, bias=True):
        self.args = {
            'in_features': in_features,
            'out_features': out_features,
            'bias': bias
        }
        super(LinearEdge, self).__init__(src,
                                         dest,
                                         layer=torch.nn.Linear(**self.args),
                                         identical=False)
    
    def updated_src(self, in_shape, expanded=[]):
        

In [None]:
class ConvEdge(Edge):
    def __init__(self, src, dest, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
        stride = kernel_size if stride == None else stride
        stride = (stride, stride) if isinstance(stride, int) else stride
        kernel_size = (kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
        padding = (padding, padding) if isinstance(padding, int) else padding
        
        self.args = {
            'in_channels': in_channels,
            'out_channels': out_channels,
            'kernel_size': kernel_size,
            'stride': stride,
            'padding': padding,
            'bias': bias
        }
        super(MaxPoolingEdge, self).__init__(src, 
                                          dest, 
                                          layer=torch.nn.Conv2d(**self.args), 
                                          identical=False)
    def updated_src(self, in_shape, expanded=[]):
        
        return
    def updated_dest(self, out_shape, expanded=[]):
        return

# Export

In [42]:
!python nb2py.py graph.ipynb

Converted graph.ipynb to exp/nb_graph.py


# Test

In [7]:
from fastai.vision import *

## Vanilla CNN

In [8]:
path = untar_data(URLs.MNIST)

In [9]:
data = ImageList.from_folder(path, convert_mode="L").split_by_folder(train="training", valid="testing").label_from_folder().databunch(bs=128)

In [10]:
gr = Graph(input_shape=(1, 28, 28), output_shape=(10,))

In [11]:
next = add_conv_block(gr, gr.input, nf=16, ks=3, pd=1, st=2)

In [12]:
next = add_conv_block(gr, next, nf=32, ks=3, pd=1, st=2)

In [13]:
next = add_flatten_layer(gr, next)

In [14]:
next = add_linear_layer(gr, next, no=4*7*7)

In [15]:
next = add_linear_layer(gr, next, gr.output, no=10)

In [16]:
gr.visualize('cnn', './')

In [17]:
module = gr.generate_model()

In [18]:
learn = Learner(data, module, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [19]:
learn.summary()

Generator
Layer (type)         Output Shape         Param #    Trainable 
Conv2d               [16, 14, 14]         160        True      
______________________________________________________________________
BatchNorm2d          [16, 14, 14]         32         True      
______________________________________________________________________
ReLU                 [16, 14, 14]         0          False     
______________________________________________________________________
Conv2d               [32, 7, 7]           4,640      True      
______________________________________________________________________
BatchNorm2d          [32, 7, 7]           64         True      
______________________________________________________________________
ReLU                 [32, 7, 7]           0          False     
______________________________________________________________________
Flatten              [1568]               0          False     
____________________________________________________

In [None]:
learn.fit_one_cycle(5)

## ResNet CNN

In [20]:
path_cifar = untar_data(URLs.CIFAR)

In [21]:
data_cifar = ImageList.from_folder(path_cifar).split_by_folder(train="train", valid="test").label_from_folder().databunch(bs=128)

In [22]:
gr_cifar = Graph((3, 32, 32), (10,))

In [23]:
add_res_net(gr_cifar, gr_cifar.input, gr_cifar.output, 1, [2, 2, 2, 2])

2

In [24]:
gr_cifar.visualize('res', './')

In [25]:
module = gr_cifar.generate_model()

In [26]:
learn = Learner(data_cifar, module, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [27]:
learn.summary()

Generator
Layer (type)         Output Shape         Param #    Trainable 
Conv2d               [32, 16, 16]         896        True      
______________________________________________________________________
Conv2d               [64, 16, 16]         18,496     True      
______________________________________________________________________
Conv2d               [64, 16, 16]         36,928     True      
______________________________________________________________________
MaxPool2d            [64, 9, 9]           0          False     
______________________________________________________________________
Conv2d               [64, 9, 9]           36,928     True      
______________________________________________________________________
BatchNorm2d          [64, 9, 9]           128        True      
______________________________________________________________________
ReLU                 [64, 9, 9]           0          False     
____________________________________________________

In [None]:
learn.fit_one_cycle(5, )

## DenseNet CNN

In [28]:
path_cifar = untar_data(URLs.CIFAR)

In [29]:
data_cifar = ImageList.from_folder(path_cifar).split_by_folder(train="train", valid="test").label_from_folder().databunch(bs=128)

In [30]:
gr_cifar = Graph((3, 32, 32), (10,))

In [31]:
add_dense_net(gr_cifar, gr_cifar.input, gr_cifar.output)

2

In [32]:
gr_cifar.visualize('dense', './')

In [33]:
module = gr_cifar.generate_model()

In [34]:
learn = Learner(data_cifar, module, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [35]:
learn.summary()

Generator
Layer (type)         Output Shape         Param #    Trainable 
Conv2d               [64, 16, 16]         9,408      True      
______________________________________________________________________
MaxPool2d            [64, 9, 9]           0          False     
______________________________________________________________________
BatchNorm2d          [64, 9, 9]           128        True      
______________________________________________________________________
ReLU                 [64, 9, 9]           0          False     
______________________________________________________________________
Conv2d               [128, 9, 9]          8,192      True      
______________________________________________________________________
BatchNorm2d          [128, 9, 9]          256        True      
______________________________________________________________________
ReLU                 [128, 9, 9]          0          False     
____________________________________________________

In [41]:
learn.fit_one_cycle(20,)

epoch,train_loss,valid_loss,accuracy,time
0,1.344585,1.439104,0.5004,01:02
1,1.181719,1.440325,0.5249,01:01
2,1.686903,92.664459,0.3047,01:06
3,1.739275,1.716761,0.4661,01:05
4,1.514331,13.61487,0.5096,01:03
5,1.277187,1.193492,0.57,01:04
6,0.985121,1.080485,0.6174,00:59
7,0.849237,0.920321,0.6767,01:07
8,0.743322,1.363522,0.533,01:03
9,0.642034,1.113415,0.6477,00:55


In [45]:
learn.fit_one_cycle(20, max_lr=1e-4)

epoch,train_loss,valid_loss,accuracy,time
0,0.000843,1.462713,0.7891,01:02
1,0.000762,1.493414,0.7903,01:01
2,0.000672,1.538346,0.7891,01:02
3,0.000426,1.614513,0.7887,00:57
4,0.000733,1.678316,0.7855,01:04
5,0.000371,1.738605,0.7867,00:59


KeyboardInterrupt: 

In [64]:
m = torch.nn.BatchNorm2d(16)

In [73]:
a = dict((name, value) for (name, value) in m.named_parameters())

In [74]:
a

{'weight': Parameter containing:
 tensor([0.3405, 0.8395, 0.8966, 0.1832, 0.9338, 0.4375, 0.2854, 0.5059, 0.2966,
         0.3649, 0.3178, 0.4626, 0.5304, 0.4742, 0.1519, 0.8145],
        requires_grad=True), 'bias': Parameter containing:
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        requires_grad=True)}

In [96]:
a = torch.Tensor([5, 4, 3])

In [97]:
torch.cat([a, a[[0, 2, 1]]])

tensor([5., 4., 3., 5., 3., 4.])

In [98]:
a[[0,2,1]]

tensor([5., 3., 4.])

In [99]:
np.add(a, a[[2, 1, 0]])

tensor([8., 8., 8.])

In [103]:
a = torch.Tensor([[5, 4, 3], [6, 5, 4]])

In [104]:
np.ptp(a)

tensor(3.)