In [2]:
# The architecture of our CNN is given in Figure 1. The structure
# can be summarized as 10×10x1−26×26×4−100−M,
# where M is the number of classes. The input is a grayscale
# image patch. The size of the image patch is 28×28 pixels. Our
# CNN architecture contains only one convolution layer which
# consists of 4 kernels. The size of each kernel is 3 × 3 pixels.
# Unlike other traditional CNN architecture, the pooling layer is
# not used in our architecture. Then one fully connected layer
# of 100 neurons follows the convolution layer. The last layer
# consists of a logistic regression with softmax which outputs
# the probability of each class, such that

In [66]:
from __future__ import print_function
import torch
import numpy as np
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pickle
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torchvision

In [211]:
'''
Get train data
'''
training_data = pickle.load(open('training_data.pkl','r'))

training_data = np.array(training_data)
training_data = training_data #Test on first 2000 image segments

X_Train = training_data[:,0]
y_Train = training_data[:,1]

training_data.shape

(21643, 2)

In [225]:
'''
Get Validation data
'''
validation_data = pickle.load(open('validation_data.pkl','r'))

validation_data = np.array(validation_data)
validation_data = validation_data 

X_Train = validation_data[:,0]
y_Train = validation_data[:,1]

validation_data.shape

(1810, 2)

In [212]:
class Net(nn.Module):

    def __init__(self): # DO NOT HARDCODE
        super(Net, self).__init__()
        # 1 input image channel 10x10, 4 output channels, 3x3 square convolution
        self.conv1 = nn.Conv2d(1, 4, 3)
        self.dropout = nn.Dropout(0.5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(4 * 8 * 8, 100)
        self.fc2 = nn.Linear(100, 2) #Number of classes = 'text'
#         self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = x.view(-1, self.num_flat_features(x))
        #         dropout with 0.5
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        #out = self.sigmoid(x)
#         x = F.log_softmax(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [213]:
net = Net()
print(net)

Net (
  (conv1): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
  (dropout): Dropout (p = 0.5)
  (fc1): Linear (256 -> 100)
  (fc2): Linear (100 -> 2)
)


In [214]:
# Total number of learnable parameters
params = list(net.parameters())
print(len(params))
print(params[0].size())

6
torch.Size([4, 1, 3, 3])


In [226]:
# Preparing the data
trainloader = DataLoader(training_data.tolist(), batch_size=1, shuffle=True)
validloader = DataLoader(training_data.tolist())

In [218]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1)

In [232]:
def train_function(trainloader, net, optimizer, criterion):
    running_loss = 0.0
    net.train()
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs.unsqueeze(0).float()), Variable(labels.long())

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
#         if i % 2000 == 1999:    # print every 2000 mini-batches
#             print('[%d, %5d] loss: %.3f' %
#                   (epoch + 1, i + 1, running_loss / 2000))
#             running_loss = 0.0
    

In [257]:
def validation_function(validloader, net, optimizer, criterion):
    running_loss = 0.0
    correct = 0
    total = 0
    net.eval()
    for i, data in enumerate(validloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels_var = Variable(inputs.unsqueeze(0).float(), volatile=True), Variable(labels.long(), volatile=True)

#         # zero the parameter gradients
#         optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels_var)
#         loss.backward()
#         optimizer.step()

        # print statistics
        running_loss += loss.data[0]
#         if i % 200 == 199:    # print every 2000 mini-batches
#             print('[%d, %5d] loss: %.3f' %
#                   (epoch + 1, i + 1, running_loss / 200))
#             running_loss = 0.0
        
        _, predicted = torch.max(outputs.data, 1)
#         print ('predicted: %d' % (predicted))
        total += labels.size(0)
        correct += (predicted == labels).sum()

    print('Accuracy of the network on the 1810 test images: %d %%' % (100 * correct / total))
    

In [256]:
# Training Phase
for epoch in range(5):  # loop over the dataset multiple times
    train_function(trainloader, net, optimizer, criterion)
    running_loss = 0.0
    correct = 0
    total = 0
    net.eval()
    for i, data in enumerate(validloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels_var = Variable(inputs.unsqueeze(0).float(), volatile=True), Variable(labels.long(), volatile=True)

#         # zero the parameter gradients
#         optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels_var)
#         loss.backward()
#         optimizer.step()

        # print statistics
        running_loss += loss.data[0]
        if i % 200 == 199:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0
        
        _, predicted = torch.max(outputs.data, 1)
#         print ('predicted: %d' % (predicted))
        total += labels.size(0)
        correct += (predicted == labels).sum()

    print('Accuracy of the network on the 1810 test images: %d %%' % (100 * correct / total))

[1,   200] loss: 0.658
[1,   400] loss: 0.743
[1,   600] loss: 0.729
[1,   800] loss: 0.567
[1,  1000] loss: 0.567
[1,  1200] loss: 0.782
[1,  1400] loss: 0.771
[1,  1600] loss: 0.588
[1,  1800] loss: 0.567
[1,  2000] loss: 0.631
[1,  2200] loss: 0.740
[1,  2400] loss: 0.757
[1,  2600] loss: 0.567
[1,  2800] loss: 0.567
[1,  3000] loss: 0.752
[1,  3200] loss: 0.775
[1,  3400] loss: 0.656
[1,  3600] loss: 0.567
[1,  3800] loss: 0.641
[1,  4000] loss: 0.760
[1,  4200] loss: 0.749
[1,  4400] loss: 0.569
[1,  4600] loss: 0.567
[1,  4800] loss: 0.759
[1,  5000] loss: 0.792
[1,  5200] loss: 0.656
[1,  5400] loss: 0.567
[1,  5600] loss: 0.729
[1,  5800] loss: 0.766
[1,  6000] loss: 0.658
[1,  6200] loss: 0.567
[1,  6400] loss: 0.567
[1,  6600] loss: 0.778
[1,  6800] loss: 0.764
[1,  7000] loss: 0.627
[1,  7200] loss: 0.567
[1,  7400] loss: 0.657
[1,  7600] loss: 0.774
[1,  7800] loss: 0.739
[1,  8000] loss: 0.567
[1,  8200] loss: 0.588
[1,  8400] loss: 0.778
[1,  8600] loss: 0.782
[1,  8800] 

[4,  5800] loss: 1.004
[4,  6000] loss: 0.643
[4,  6200] loss: 0.340
[4,  6400] loss: 0.340
[4,  6600] loss: 1.045
[4,  6800] loss: 1.000
[4,  7000] loss: 0.539
[4,  7200] loss: 0.340
[4,  7400] loss: 0.672
[4,  7600] loss: 1.031
[4,  7800] loss: 0.914
[4,  8000] loss: 0.340
[4,  8200] loss: 0.408
[4,  8400] loss: 1.045
[4,  8600] loss: 1.058
[4,  8800] loss: 0.440
[4,  9000] loss: 0.340
[4,  9200] loss: 0.643
[4,  9400] loss: 1.031
[4,  9600] loss: 0.841
[4,  9800] loss: 0.340
[4, 10000] loss: 0.316
[4, 10200] loss: 1.054
[4, 10400] loss: 1.031
[4, 10600] loss: 0.485
[4, 10800] loss: 0.340
[4, 11000] loss: 0.686
[4, 11200] loss: 0.972
[4, 11400] loss: 0.801
[4, 11600] loss: 0.340
[4, 11800] loss: 0.333
[4, 12000] loss: 1.072
[4, 12200] loss: 1.040
[4, 12400] loss: 0.480
[4, 12600] loss: 0.340
[4, 12800] loss: 0.646
[4, 13000] loss: 1.013
[4, 13200] loss: 0.900
[4, 13400] loss: 0.340
[4, 13600] loss: 0.340
[4, 13800] loss: 1.045
[4, 14000] loss: 1.140
[4, 14200] loss: 0.602
[4, 14400] 

In [151]:
inputs.size()

torch.Size([1, 1, 10, 10])

In [254]:
outputs.data


 0.2920 -0.2586
[torch.FloatTensor of size 1x2]

In [103]:
# Input to CNN is 10x10

input = Variable(torch.randn(1, 1, 10, 10)) # Here random input is given
out = net(input)
print(out)
# output size is 1x2 because there are 2 labels

Variable containing:
 0.4865  0.5135
[torch.FloatTensor of size 1x2]



In [17]:
# Zero the gradient buffers of all parameters and backprops with random gradients
net.zero_grad()
out.backward(torch.randn(1, 2))

In [223]:
'''
Get test data
'''
test_data = pickle.load(open('test_data.pkl','r'))

test_data = np.array(test_data)
test_data = test_data #Test on first 2000 image segments

X_Test = test_data[:,0]
y_Test = test_data[:,1]

test_data.shape

(10853, 2)

In [26]:
# DUMMY CODE
# out = net(input)
target = Variable(torch.arange(0, 2))  # a dummy target, for example
criterion = nn.MSELoss()

loss = criterion(out, target)
print(loss)

Variable containing:
 0.2909
[torch.FloatTensor of size 1]



In [83]:
training_data = pickle.load(open('training_data.pkl','r'))

training_data = np.array(training_data)

X_Train = training_data[:,0]
y_Train = training_data[:,1]

N = len(X_Train)
H = X_Train[0].shape[0]
W = X_Train[0].shape[1]
trainTensor = torch.LongTensor(N, H, W)
for i in range(N):
    trainTensor[i] = torch.LongTensor(X_Train[i].tolist())
trainTensor = trainTensor.unsqueeze(1)

In [74]:
X_Train[0]

array([[109, 106, 105, 100,  98,  98,  98,  94,  95,  90],
       [ 38,  48,  44,  32,  38,  35,  36,  29,  33,  29],
       [ 24,  22,  24,  24,  21,  22,  22,  20,  19,  21],
       [  8,   8,   7,   8,  10,  10,   8,   8,   9,   8],
       [ 23,  21,  21,   8,  19,   3,  24,  30,  26,  22],
       [  7,   7,   5,   4,   7,   9,   8,   7,   6,   7],
       [  1,   0,   0,   0,   1,   7,   2,   2,   3,   1],
       [ 13,   7,   3,   4,   2,   7,  11,   4,   9,   9],
       [ 18,  23,  36,  18,  28,  28,  37,  47,  39,  50],
       [ 24,  35,  31,  51,  10,  55,  66,  77,  67,  61]], dtype=uint8)

In [84]:
y_Train[0]

0

In [71]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

In [85]:

dataiter = iter(trainloader)
images, label = dataiter.next()
images


(0 ,.,.) = 
  109  106  105  100   98   98   98   94   95   90
   38   48   44   32   38   35   36   29   33   29
   24   22   24   24   21   22   22   20   19   21
    8    8    7    8   10   10    8    8    9    8
   23   21   21    8   19    3   24   30   26   22
    7    7    5    4    7    9    8    7    6    7
    1    0    0    0    1    7    2    2    3    1
   13    7    3    4    2    7   11    4    9    9
   18   23   36   18   28   28   37   47   39   50
   24   35   31   51   10   55   66   77   67   61
[torch.ByteTensor of size 1x10x10]

In [119]:
inputs

Variable containing:
(0 ,0 ,.,.) = 
  244  247  238  233  232  218  229  228  214  231
  229  239  245  212  216  228  212  216  210  222
  226  225  233  219  236  222  233  235  234  229
  238  233  242  232  235  227  233  229  230  232
  240  237  226  229  226  234  208  234  218  231
  231  226  232  231  214  219  228  238  223  217
  191  212  191  226  214  159  186  220  211  213
  236  232  227  226  217  185  223  208  216  208
  229  224  228  233  227  219  226  208  217  208
  221  230  217  221  232  225  232  220  202  215
[torch.FloatTensor of size 1x1x10x10]

In [149]:
Variable(torch.arange(0, 2))

Variable containing:
 0
 1
[torch.FloatTensor of size 2]

In [153]:
input_t= Variable(torch.randn(3, 5), requires_grad=True)

In [154]:
input_t

Variable containing:
 1.4050 -0.0642  0.6156 -1.4410  0.9281
-0.1144 -0.0128  2.6492 -1.2253 -1.1597
 0.5336  2.3839  1.5902 -1.5398  0.1211
[torch.FloatTensor of size 3x5]

In [156]:
target_t = Variable(torch.LongTensor(3).random_(5))

In [157]:
target_t

Variable containing:
 1
 0
 2
[torch.LongTensor of size 3]

In [162]:
labels.unsqueeze(2).size()

RuntimeError: dimension out of range (expected to be in range of [-2, 1], but got 2)