# Package

In [1]:
#Torch related package
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import torchviz

In [2]:
#cuda related package
import torch.cuda
import torch.backends.cudnn as cudnn
print(torch.backends.cudnn.version())

None


In [3]:
# Other package
import time
import random

In [6]:
#%run Annexe.ipynb #Good trick to launch another notebook

# Commentaire pour la suite (TODO)

Le batch normalisation, il y en existe plusieurs, regarder exactement lequel on veut.

On peut enlever le biais dans les convolutions avant la normalisation

Le diminution de la taille des images, pas clair cette division par deux....

# Entrainement 

# CityscapesLoader

In [4]:
class color():
    convolution        = "darkgoldenrod1"
    subSampling        = "darkgoldenrod" 
    fullConvolution    = "firebrick1"
    upSampling         = "firebrick"
    batchNormalization = "deepskyblue3"
    relu               = "darkolivegreen3"
    add                = "bisque3"
    dropout            = "darkviolet"

In [46]:
classes = ['road', 'sidewalk','building', 'wall', 'fence', 'pole', 'traffic light', 'traffic sign',
           'vegetation', 'terrain', 'sky', 'person', 'rider', 'car', 'truck', 'bus','train', 'motorcycle', 'bicycle']
number_classes = len(classes)

#  GridNet

In [5]:
class firstConv(nn.Module):
    
    """
    (1) = nInputs : number of features map for the input
    (2) = nOutputs : number of features map for the output
    This is the first convolution used to enter into the grid.
    """
    def __init__(self,nInputs,nOutputs):
        super(firstConv, self).__init__()

        self.conv1 = nn.Conv2d(in_channels = nInputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(1,1), padding=(1,1), dilation=1, groups=1, bias=True)
        
        self.batch1 = nn.BatchNorm2d(num_features = nOutputs, eps=1e-05, momentum=0.1,affine=True)
        
        self.ReLU1 = nn.ReLU()
        
        self.conv2 = nn.Conv2d(in_channels = nOutputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(1,1), padding=(1,1), dilation=1, groups=1, bias=True)
        
        self.batch2 = nn.BatchNorm2d(num_features = nOutputs, eps=1e-05, momentum=0.1,affine=True)
                
        self.ReLU2 = nn.ReLU()
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.batch1(x)
        x = self.ReLU1(x)
        x = self.conv2(x)
        x = self.batch2(x)
        x = self.ReLU2(x)
        return x

    
net = firstConv(nInputs = 3,nOutputs = 3)
print(net)

firstConv (
  (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batch1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (ReLU1): ReLU ()
  (conv2): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batch2): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (ReLU2): ReLU ()
)


In [6]:
class convSequence(nn.Module):
    
    """
    (1) = nInput : number of features map for the input
    (2) = nOutput : number of features map for the output
    (3) = dropFactor : Total Dropout on the entire Sequence, there is a probability p = dropFactor that
        the residual is deleted.
    This class reprensent a residual bloc that doesn't change number nor the size of the features maps
    """
    def __init__(self,nInputs,nOutputs,dropFactor):
        super(convSequence, self).__init__()
        self.dropFactor = dropFactor
        self.batch1 = nn.BatchNorm2d(num_features = nInputs, eps=1e-05, momentum=0.1,affine=True)

        self.conv1 = nn.Conv2d(in_channels = nInputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(1,1), padding=(1,1), dilation=1, groups=1, bias=True)
        
        self.ReLU1 = nn.ReLU()
        
        self.batch2 = nn.BatchNorm2d(num_features = nOutputs, eps=1e-05, momentum=0.1,affine=True)
        
        self.conv2 = nn.Conv2d(in_channels = nOutputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(1,1), padding=(1,1), dilation=1, groups=1, bias=True)
        
        self.ReLU2 = nn.ReLU()
        
        
    def forward(self, x_init):
        x = self.batch1(x_init)
        x = self.conv1(x)
        x = self.ReLU1(x)
        x = self.batch2(x)
        x = self.conv2(x)
        x = self.ReLU2(x)
        #Small trick : *1 is usefull to transforme Boolean into Integer
        x = ((random.random() > self.dropFactor)*1)*x
        x = x_init + x
        return x

    
net = convSequence(nInputs = 3,nOutputs = 3,dropFactor = 0.1)
print(net)

convSequence (
  (batch1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (ReLU1): ReLU ()
  (batch2): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (conv2): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (ReLU2): ReLU ()
)


In [7]:
class subSamplingSequence(nn.Module):
    
    """
    (1) = nInput : number of features map for the input
    (2) = nOutput : number of features map for the output
    This class represente a bloc that reduce the resolution of each feature map(factor2)
    """
    def __init__(self, nInputs, nOutputs):
        super(subSamplingSequence, self).__init__()
        
        self.batch1 = nn.BatchNorm2d(num_features = nInputs, eps=1e-05, momentum=0.1,affine=True)

        self.conv1 = nn.Conv2d(in_channels = nInputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(2,2), padding=(1,1), dilation=1, groups=1, bias=True)
        
        self.ReLU1 = nn.ReLU()
        
        self.batch2 = nn.BatchNorm2d(num_features = nOutputs, eps=1e-05, momentum=0.1,affine=True)
        
        self.conv2 = nn.Conv2d(in_channels = nOutputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(1,1), padding=(1,1), dilation=1, groups=1, bias=True)

        self.ReLU2 = nn.ReLU()

    def forward(self, x):
        x = self.batch1(x)
        x = self.conv1(x)
        x = self.ReLU1(x)
        x = self.batch2(x)
        x = self.conv2(x)
        x = self.ReLU2(x)
        return x

    
network = subSamplingSequence(nInputs = 3,nOutputs = 6)
print(network)
a = torch.randn(2, 3, 7, 7)
inputs = Variable(a)
out = network(inputs)
print(out.size())

subSamplingSequence (
  (batch1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (ReLU1): ReLU ()
  (batch2): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True)
  (conv2): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (ReLU2): ReLU ()
)
torch.Size([2, 6, 4, 4])


In [8]:
class upSamplingSequence(nn.Module):
    
    """
    (1) = nInput : number of features map for the input
    (2) = nOutput : number of features map for the output
    This class represente a bloc that increase the resolution of each feature map(factor2)
    """
    def __init__(self, nInputs, nOutputs):
        super(upSamplingSequence, self).__init__()
        
        self.batch1 = nn.BatchNorm2d(num_features = nInputs, eps=1e-05, momentum=0.1,affine=True)

        self.convTranspose1 = nn.ConvTranspose2d(in_channels = nInputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(2,2), padding=(1,1), dilation=1, groups=1, bias=True)
        
        self.ReLU1 = nn.ReLU()
        
        self.batch2 = nn.BatchNorm2d(num_features = nOutputs, eps=1e-05, momentum=0.1,affine=True)
        
        self.conv2 = nn.Conv2d(in_channels = nOutputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(1,1), padding=(1,1), dilation=1, groups=1, bias=True)

        self.ReLU2 = nn.ReLU()

        
    def forward(self, x):
        x = self.batch1(x)
        x = self.convTranspose1(x)
        x = self.ReLU1(x)
        x = self.batch2(x)
        x = self.conv2(x)
        x = self.ReLU2(x)
        return x


network = upSamplingSequence(nInputs = 6,nOutputs = 3)
print(network)
a = torch.randn(2, 6, 16, 16)
inputs = Variable(a)
out = network(inputs)
print(out.size())

#

upSamplingSequence (
  (batch1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True)
  (convTranspose1): ConvTranspose2d(6, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (ReLU1): ReLU ()
  (batch2): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (conv2): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (ReLU2): ReLU ()
)
torch.Size([2, 3, 31, 31])


In [9]:
class lastConv(nn.Module):
    
    """
    (1) = nInputs : number of features map for the input
    (2) = nOutputs : number of features map for the output
    This class represente the last Convolution of the network before the prediction
    """
    def __init__(self,nInputs,nOutputs):
        super(lastConv, self).__init__()

        self.conv1 = nn.Conv2d(in_channels = nInputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(1,1), padding=(1,1), dilation=1, groups=1, bias=True)
        
        self.batch1 = nn.BatchNorm2d(num_features = nOutputs, eps=1e-05, momentum=0.1,affine=True)
        
        self.ReLU1 = nn.ReLU()
        
        self.conv2 = nn.Conv2d(in_channels = nOutputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(1,1), padding=(1,1), dilation=1, groups=1, bias=True)
        
        self.batch2 = nn.BatchNorm2d(num_features = nOutputs, eps=1e-05, momentum=0.1,affine=True)
                
        self.ReLU2 = nn.ReLU()

        
    def forward(self, x):
        x = self.conv1(x)
        x = self.batch1(x)
        x = self.ReLU1(x)
        x = self.conv2(x)
        x = self.batch2(x)
        x = self.ReLU2(x)
        return x

    
net = lastConv(nInputs = 3,nOutputs = 3)
print(net)


lastConv (
  (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batch1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (ReLU1): ReLU ()
  (conv2): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batch2): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (ReLU2): ReLU ()
)


In [111]:
class gridNet(nn.Module):
    
    """
    (1) = nInput : number of features maps for the input
    (2) = nOutput : number of features maps for the output
    (3) = nColumns : number of columns of the gridNet, this number should be divisible by two.
    It count the number of bloc +1
    (4) = nFeatMaps : number of feature at each row of the gridNet
    (5) = dropFactor : factor witch control the dropout of an entire bloc 
    """
    def __init__(self,nInputs, nOutputs, nColumns, nFeatMaps, dropFactor):
        super(gridNet, self).__init__()
        
        #Define some parameters as an attribut of the class
        len_nfeatureMaps = len(nFeatMaps)
        self.nColumns = nColumns
        self.nFeatMaps = nFeatMaps
        self.len_nfeatureMaps = len_nfeatureMaps
        
        # A normalisation before any computation
        self.batchNormInitial = nn.BatchNorm2d(num_features = nInputs, eps=1e-05, momentum=0.1,affine=True)

        # The first convolution before entering into the grid.
        self.firstConv = firstConv(nInputs = nInputs, nOutputs = nFeatMaps[0])
        
        
        # We create the Grid. We will creat conv and sub/up sequences with different name.
        # The name is : "sequenceName" + starting position of the sequence(i,j) + "to" + ending position (k,l)
        for i in range(len(nFeatMaps)):
            for j in range(nColumns):
                #We don t creat a residual bloc on the last column
                if(j < (nColumns - 1)):
                    setattr(self, "convSequence" + str(i) + "_" + str(j) + "to" + str(i) + "_" + str(j + 1),
                            convSequence(nFeatMaps[i], nFeatMaps[i],dropFactor))
                
                #We creat subSampling only on half of the grid and not in the last row
                if(j < (nColumns // 2) and i < (len(nFeatMaps)-1)):
                    setattr(self, "subSamplingSequence" + str(i) + "_" + str(j) + "to" + str(i + 1) + "_" + str(j),
                            subSamplingSequence(nFeatMaps[i], nFeatMaps[i+1]))
                
                #Welook a the other half but not the first row
                if(j >= (nColumns // 2) and i > 0):
                    setattr(self, "upSamplingSequence" + str(i) + "_" + str(j) + "to" + str(i - 1) + "_" + str(j),
                            upSamplingSequence(nFeatMaps[i], nFeatMaps[i-1]))

        # The last convolution before the result.
        self.lastConv = lastConv(nInputs = nFeatMaps[0], nOutputs = nOutputs)    
    
        self.batchNormFinal = nn.BatchNorm2d(num_features = nInputs, eps=1e-05, momentum=0,affine=True)
    
    """This function return the fusion of the actual value on (i,j) and the new data which come from the sampling
    (1) = X_i_j : The value on the grid a the position (i,j)
    (2) = SamplingSequence : The sampling that should be added to the point (i,j)
    """
    def addTransform(self,X_i_j,SamplingSequence):
        return(X_i_j + SamplingSequence)
    
    
    
    def forward(self, x):

        # A normalisation before any computation
        x = self.batchNormInitial(x)
        # The first convolution before entering into the grid.
        x = self.firstConv(x)
        
        # X is the matrix that represente the values of the features maps at the point (i,j) in the grid.
        X = [[0 for i in range(self.nColumns)] for j in range(self.len_nfeatureMaps)]
        #The input of the grid is on (0,0)
        X[0][0] = x
        
        # Looking on half of the grid, with sumsampling and convolution sequence
        for j in range(self.nColumns//2):
            for i in range(self.len_nfeatureMaps):
                #For the first column, there is only subsampling
                if(j > 0):
                    #This syntaxe call self.conSequencei_(j-1)toi_j(X[i][j-1])
                    X[i][j] = getattr(self,"convSequence"
                                      + str(i) + "_" + str(j-1) + "to" + str(i) + "_" + str(j))(X[i][j-1])
                
                # For the first row, there is only ConvSequence (residual bloc)
                if(i > 0):
                    X[i][j] = self.addTransform(X[i][j] , getattr(self,"subSamplingSequence"
                                                + str(i-1) + "_" + str(j) + "to" + str(i) + "_" + str(j))(X[i-1][j]))

        # Looking on the other half of the grid
        for j in range(self.nColumns//2,self.nColumns):
            for i in range(self.len_nfeatureMaps-1,-1,-1):
                X[i][j] = getattr(self,"convSequence" +
                                      str(i) + "_" + str(j-1) + "to" + str(i) + "_" + str(j))(X[i][j-1])

                
                # There is no upSampling on the last row
                if(i < (self.len_nfeatureMaps - 1)):
                    X[i][j] = self.addTransform(X[i][j], getattr(self,"upSamplingSequence"
                                                + str(i+1) + "_" + str(j) + "to" + str(i) + "_" + str(j))(X[i+1][j]))

        x_final = self.lastConv(X[0][self.nColumns - 1])
        #x_final = self.batchNormFinal(x_final)
        if(False):
            print("Size of different X_i_j")
            for i1,i2 in enumerate(X):
                for j1,j2 in enumerate(i2):
                    print("Dim(X(" + str(i1) + ")(" + str(j1) + ")) : ",j2.size())
                    
        
        return x_final


network = gridNet(nInputs = 3,nOutputs = 3, nColumns = 4, nFeatMaps = [3,6,12,24], dropFactor = 0.1)
print(network)


gridNet (
  (batchNormInitial): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (firstConv): firstConv (
    (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batch1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
    (ReLU1): ReLU ()
    (conv2): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batch2): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
    (ReLU2): ReLU ()
  )
  (convSequence0_0to0_1): convSequence (
    (batch1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
    (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (ReLU1): ReLU ()
    (batch2): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
    (conv2): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (ReLU2): ReLU ()
  )
  (subSamplingSequence0_0to1_0): subSamplingSequence (
    (batch1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
    (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 

# Train

In [42]:
class Parameters():
    def __init__(self,nColumns, nFeatMaps, dropFactor,learning_rate):
        super(Parameters, self).__init__()
        self.nColumns = nColumns
        self.nFeatMaps = nFeatMaps
        self.dropFactor = dropFactor
        self.learning_rate = learning_rate

In [72]:
target = torch.LongTensor([1, 0, 4])
print(target)


 1
 0
 4
[torch.LongTensor of size 3]



In [102]:
def CrossEntropyMultiClass(y_train_estimated, y_train):
    #print(torch.log(y_train_estimated))
    return torch.sum(y_train * torch.log(y_train_estimated))


In [106]:
"""train return
    (0) = parameters : list of parameters of the network
    (1) = x_train : inputs of the training set
    (2) = y_train : outputs of the training set
    (3) = x_validation : inputs of the validation set
    (4) = y_validation : outputs of the validation set
"""
def train(parameters,x_train,y_train):
    
    nFeatureMaps_init = len(x_train[0,:,0,0])
    print(nFeatureMaps_init)
    network = gridNet(nInputs = nFeatureMaps_init,nOutputs = nFeatureMaps_init, nColumns = 4,
                      nFeatMaps = [3,6,12,24], dropFactor = 0.1)

    #criterion = nn.MSELoss()
    criterion = nn.CrossEntropyLoss()
    
    # create your optimizer
    optimizer = optim.SGD(network.parameters(), lr=parameters.learning_rate)


    optimizer.zero_grad()   # zero the gradient buffers
    for i in range(1):
        y_train_estimated = network(x_train)
        #print(y_train_estimated)
        print(torch.sum(y_train_estimated,1))
        #y_train_estimated = y_train_estimated.view(number_classes,10,33,33)

        #loss = criterion(y_train_estimated, y_train)
        loss = CrossEntropyMultiClass(y_train_estimated, y_train)
        loss.backward()
        optimizer.step()    # Does the update
        print(loss)


In [93]:
print(a.size())

torch.Size([10, 19, 33, 33])


In [92]:
print(torch.mean(a,1).size())

torch.Size([10, 1, 33, 33])


In [117]:
a = torch.randn(3,2,4,4)
print(a)
print(torch.norm(a, p=2, dim=1,keepDim = True).size())
print(torch.norm(a, p=2, dim=1,keepDim = True))


(0 ,0 ,.,.) = 
 -0.5060  0.3355 -0.0926 -1.1888
 -0.0631 -0.6041  0.7762  1.3932
  0.8904  0.7731 -1.3304  0.0306
 -1.2425 -0.5525 -0.7996  2.3915

(0 ,1 ,.,.) = 
 -0.1181 -0.0935 -0.6731 -0.2692
  1.0446 -0.4981  0.4258 -0.9175
  0.2209 -0.7444 -0.8104  0.5128
 -0.2682  1.7114  1.2250 -0.0911

(1 ,0 ,.,.) = 
 -0.7856  0.0196 -0.4976  1.3871
 -0.8296 -0.7159  0.6417 -0.5197
  1.0163 -1.2439 -1.3027 -0.4422
  0.3081  1.2636 -0.9944  2.0035

(1 ,1 ,.,.) = 
 -0.6033  0.4444  0.5580 -0.7985
  1.0743  1.5524 -0.5163 -0.3325
  0.4065 -1.7210  0.7766  1.5179
  1.4348  0.8710 -0.6408 -0.9367

(2 ,0 ,.,.) = 
 -0.8687 -1.5544 -0.1717 -0.6499
 -1.9172  0.4544 -1.4561 -1.4480
 -0.1442 -0.0672 -0.3175  0.4960
 -0.6759  1.6532  1.0313  0.1143

(2 ,1 ,.,.) = 
 -0.9479  1.3732  2.0844 -1.2347
 -0.9895 -0.7566  0.1692 -0.3346
  0.7447 -0.2999  0.3205  1.9147
 -0.8564  0.7971 -2.7102  0.5783
[torch.FloatTensor of size 3x2x4x4]



TypeError: torch.norm received an invalid combination of arguments - got (torch.FloatTensor, dim=int, keepDim=bool, p=int), but expected one of:
 * (torch.FloatTensor source)
 * (torch.FloatTensor source, float p)
 * (torch.FloatTensor source, float p, int dim)


In [110]:
a = torch.randn(10, number_classes, 33, 33)
b = torch.randn(10, number_classes, 33,33)*0
x_train = Variable(a)
y_train = Variable(b)

x_validation = Variable(b)
y_validation = Variable(b)
parameters = Parameters( nColumns = 4, learning_rate=0.01, nFeatMaps = [3,6,12,24,48], dropFactor = 0.1)
train(parameters = parameters, x_train=x_train, y_train=y_train)

19
Variable containing:
(0 ,0 ,.,.) = 
  7.4741e-01  1.2831e+00 -8.4799e-02  ...   8.4326e-01  2.0882e+00  2.5320e-01
 -7.7042e-01  5.3256e+00  5.2431e-01  ...   2.3777e+00 -2.5513e+00 -2.3114e+00
  2.3667e+00 -1.1097e-01 -6.5999e-02  ...  -1.3028e+00 -1.3827e+00 -3.2789e+00
                 ...                   ⋱                   ...                
  1.0834e+00  4.3420e+00  4.3725e-01  ...   3.1149e+00  1.9649e+00  2.2383e-01
 -1.9089e-01 -1.7947e+00  1.6055e+00  ...  -2.7149e+00 -2.6868e+00 -5.1451e+00
 -2.9211e-01 -8.3545e-01 -2.3369e+00  ...  -3.7019e+00 -3.0372e+00 -3.5281e+00
     ⋮ 

(1 ,0 ,.,.) = 
 -2.5753e+00  5.3419e-01 -4.0559e+00  ...  -1.6778e+00  1.3057e+00 -2.1984e+00
 -6.1441e-01 -1.2188e+00 -3.4997e+00  ...  -5.1200e+00 -3.5176e+00 -1.1166e+00
 -3.3343e+00 -8.5797e-01 -9.6186e-02  ...  -3.6769e+00 -4.2383e+00 -2.4403e+00
                 ...                   ⋱                   ...                
  1.4940e+00  2.4392e+00 -2.9094e+00  ...  -6.4245e+00 -3.9802e+00 -

# Trainer

# ZeroTarget

# Graphical

In [46]:
import torch
import torch.nn as nn
from torch.nn import Parameter
from torch.autograd import Variable, Function
from collections import defaultdict
import graphviz

"""
This is a rather distorted implementation of graph visualization in PyTorch.
This implementation is distorted because PyTorch's autograd is undergoing refactoring right now.
- neither func.next_functions nor func.previous_functions can be relied upon
- BatchNorm's C backend does not follow the python Function interface
- I'm not even sure whether to use var.creator or var.grad_fn (apparently the source tree and wheel builds use different
  interface now)
As a result, we are forced to manually trace the graph, using 2 redundant mechanisms:
- Function.__call__: this allows us to trace all Function creations. Function corresponds to Op in TF
- Module.forward_hook: this is needed because the above method doesn't work for BatchNorm, as the current C backend does
  not follow the Python Function interface. 
To do graph visualization, follow these steps:
1. register hooks on model: register_vis_hooks(model)
2. pass data through model: output = model(input)
3. remove hooks           : remove_vis_hooks()
4. perform visualization  : save_visualization(name, format='svg') # name is a string without extension
"""


old_function__call__ = Function.__call__

def register_creator(inputs, creator, output):
    """
    In the forward pass, our Function.__call__ and BatchNorm.forward_hook both call this method to register the creators
    inputs: list of input variables
    creator: one of
        - Function
        - BatchNorm module
    output: a single output variable
    """
    cid = id(creator)
    oid = id(output)
    if oid in vars: 
        return
    # connect creator to input
    for input in inputs:
        iid = id(input)
        func_trace[cid][iid] = input
        # register input
        vars[iid] = input
    # connect output to creator
    assert type(output) not in [tuple, list, dict]
    var_trace[oid][cid] = creator
    # register creator and output and all inputs
    vars[oid] = output
    funcs[cid] = creator

hooks = []

def register_vis_hooks(model):
    global var_trace, func_trace, vars, funcs
    remove_vis_hooks()
    var_trace  = defaultdict(lambda: {})     # map oid to {cid:creator}
    func_trace = defaultdict(lambda: {})     # map cid to {iid:input}
    vars  = {}                               # map vid to Variable/Parameter
    funcs = {}                               # map cid to Function/BatchNorm module
    hooks = []                               # contains the forward hooks, needed for hook removal

    def hook_func(module, inputs, output):
        assert 'BatchNorm' in mod.__class__.__name__        # batchnorms don't have shared superclass
        inputs = list(inputs)
        for p in [module.weight, module.bias]:
            if p is not None:
                inputs.append(p)
        register_creator(inputs, module, output)

    for mod in model.modules():
        if 'BatchNorm' in mod.__class__.__name__:           # batchnorms don't have shared superclass
            hook = mod.register_forward_hook(hook_func)
            hooks.append(hook)

    def new_function__call__(self, *args, **kwargs):
        inputs =  [a for a in args            if isinstance(a, Variable)]
        inputs += [a for a in kwargs.values() if isinstance(a, Variable)]
        output = old_function__call__(self, *args, **kwargs)
        register_creator(inputs, self, output)
        return output

    Function.__call__ = new_function__call__


def remove_vis_hooks():
    for hook in hooks:
        hook.remove()

    Function.__call__ = old_function__call__


def save_visualization(name, format='svg'):
    g = graphviz.Digraph(format=format)
    def sizestr(var):
        size = [int(i) for i in list(var.size())]
        return str(size)
    # add variable nodes
    for vid, var in vars.iteritems():
        if isinstance(var, nn.Parameter):
            g.node(str(vid), label=sizestr(var), shape='ellipse', style='filled', fillcolor='red')
        elif isinstance(var, Variable):
            g.node(str(vid), label=sizestr(var), shape='ellipse', style='filled', fillcolor='lightblue')
        else:
            assert False, var.__class__
    # add creator nodes
    for cid in func_trace:
        creator = funcs[cid]
        g.node(str(cid), label=str(creator.__class__.__name__), shape='rectangle', style='filled', fillcolor='orange')
    # add edges between creator and inputs
    for cid in func_trace:
        for iid in func_trace[cid]:
            g.edge(str(iid), str(cid))
    # add edges between outputs and creators
    for oid in var_trace:
        for cid in var_trace[oid]:
            g.edge(str(cid), str(oid))
    g.render(name)
class subSamplingSequence(nn.Module):
    
    """
    (1) = nInput : number of features map for the input
    (2) = nOutput : number of features map for the output
    This class represente a bloc that reduce the resolution of each feature map(factor2)
    """
    def __init__(self, nInputs, nOutputs):
        super(subSamplingSequence, self).__init__()
        self.Seque
        self.batch1 = nn.BatchNorm2d(num_features = nInputs, eps=1e-05, momentum=0.1,affine=True)

        self.conv1 = nn.Conv2d(in_channels = nInputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(2,2), padding=(1,1), dilation=1, groups=1, bias=True)
        
        self.ReLU1 = nn.ReLU()
        
        self.batch2 = nn.BatchNorm2d(num_features = nOutputs, eps=1e-05, momentum=0.1,affine=True)
        
        self.conv2 = nn.Conv2d(in_channels = nOutputs, out_channels = nOutputs,
                            kernel_size = (3,3), stride=(1,1), padding=(1,1), dilation=1, groups=1, bias=True)

        self.ReLU2 = nn.ReLU()

    def forward(self, x):
        x = self.batch1(x)
        x = self.conv1(x)
        x = self.ReLU1(x)
        x = self.batch2(x)
        x = self.conv2(x)
        x = self.ReLU2(x)
        return x

    
network = subSamplingSequence(nInputs = 3,nOutputs = 6)
print(network)
a = torch.randn(2, 3, 7, 7)
inputs = Variable(a)
out = network(inputs)
print(out.size())

def visualize(a,network):
    global recon
    inputs = Variable(a)
    register_vis_hooks(network)
    recon = network(inputs)
    remove_vis_hooks()
    save_visualization('pytorch_model', 'png')
    
visualize(a,network)
#resnet18 = models.resnet18()
#y = resnet18(inputs)
# print(y)

g = make_dot(out)
g.view()

AttributeError: 'subSamplingSequence' object has no attribute 'Seque'

In [None]:
from graphviz import Digraph
import re
import torchvision.models as models


def make_dot(var):
    node_attr = dict(style='filled',
                     shape='box',
                     align='left',
                     fontsize='12',
                     ranksep='0.1',
                     height='0.2')
    dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
    seen = set()

    def add_nodes(var):
        if var not in seen:
            if isinstance(var, Variable):
                value = '('+(', ').join(['%d'% v for v in var.size()])+')'
                dot.node(str(id(var)), str(value), fillcolor='lightblue')
            else:
                dot.node(str(id(var)), str(type(var).__name__))
            seen.add(var)
            if hasattr(var, 'previous_functions'):
                for u in var.previous_functions:
                    dot.edge(str(id(u[0])), str(id(var)))
                    add_nodes(u[0])
    add_nodes(var.creator)
    return dot