In [2]:
# The architecture of our CNN is given in Figure 1. The structure
# can be summarized as 28×28×1−26×26×4−100−M,
# where M is the number of classes. The input is a grayscale
# image patch. The size of the image patch is 28×28 pixels. Our
# CNN architecture contains only one convolution layer which
# consists of 4 kernels. The size of each kernel is 3 × 3 pixels.
# Unlike other traditional CNN architecture, the pooling layer is
# not used in our architecture. Then one fully connected layer
# of 100 neurons follows the convolution layer. The last layer
# consists of a logistic regression with softmax which outputs
# the probability of each class, such that

In [4]:
from __future__ import print_function
import torch
import numpy as np
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [12]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel 10x10, 4 output channels, 3x3 square convolution
        self.conv1 = nn.Conv2d(1, 4, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(4 * 8 * 8, 100)
        self.fc2 = nn.Linear(100, 2) #Number of classes = 'text'

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x))
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [13]:
net = Net()
print(net)

Net (
  (conv1): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear (256 -> 100)
  (fc2): Linear (100 -> 2)
)


In [14]:
# Total number of learnable parameters
params = list(net.parameters())
print(len(params))
print(params[0].size())

6
torch.Size([4, 1, 3, 3])


In [15]:
# Input to CNN is 10x10
input = Variable(torch.randn(1, 1, 10, 10))
out = net(input)
print(out)
# output size is 1x2 because there are 2 labels

Variable containing:
 0.5082  0.4918
[torch.FloatTensor of size 1x2]



In [17]:
# Zero the gradient buffers of all parameters and backprops with random gradients
net.zero_grad()
out.backward(torch.randn(1, 2))

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)