In [1]:
import torch
from warpctc_pytorch import CTCLoss
import numpy
ctc_loss = CTCLoss()
# expected shape of seqLength x batchSize x alphabet_size
probs = torch.FloatTensor([[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]]]).transpose(0, 1).contiguous()
labels = torch.IntTensor([1, 2])
label_sizes = torch.IntTensor([2])
probs_sizes = torch.IntTensor([2])
probs.requires_grad_(True)  # tells autograd to compute gradients for probs
cost = ctc_loss(probs, labels, probs_sizes, label_sizes)
cost.backward()

In [2]:
import torch
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.nn.init as init

from torch.nn import Linear, Conv2d, BatchNorm2d, MaxPool2d, Dropout2d, Conv1d
from torch.nn.functional import relu, elu, relu6, sigmoid, tanh, softmax

In [3]:
# hyperameters of the model
num_classes = 5
channels = 1
height = 5
width = 2
num_filters_conv1 = 16
kernel_size_conv1 = 1 # [height, width]
stride_conv1 = 1 # [stride_height, stride_width]
kernel_size_pool1 = 1
stride_pool1 = 1
num_l1 = 100
padding_conv1 = 0
dilation=1

def compute_conv_dim(dim_size):
    return int((dim_size - kernel_size_conv1 + 2 * padding_conv1) / stride_conv1 + 1)

def compute_maxPool_dim(dim_size):
    return int((dim_size - kernel_size_pool1 + 2 * padding_conv1) / stride_pool1 + 1)

def comput_conv_1d_dim(dim_size):
    return int((dim_size+2*padding_conv1-dilation*(kernel_size_conv1-1) - 1)/stride_conv1 + 1)
    
def comput_maxPool_dim(dim_size):
    return int((dim_size+2*padding_conv1-dilation*(kernel_size_pool1-1) - 1)/stride_pool1 + 1)

# define network
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        #out_dim = (input_dim - filter_dim + 2 * padding) / stride + 1
        self.conv_1 = Conv2d(in_channels=channels,
                             out_channels=num_filters_conv1,
                             kernel_size=kernel_size_conv1,
                             stride=stride_conv1)
        
       # self.maxPool_1 = MaxPool2d(2, stride=2)
        
        self.conv_out_height = compute_conv_dim(height)
        self.conv_out_width = compute_conv_dim(width)
      #  self.conv_out_height = compute_maxPool_dim(self.conv_out_height)
      #  self.conv_out_width = compute_maxPool_dim(self.conv_out_width)
        
        # add dropout to network
        #self.dropout = Dropout2d(p=0.5)
        self.l1_in_features = num_filters_conv1*self.conv_out_width
        #self.l1_in_features = channels * height * width
        
        self.l_1 = Linear(in_features=self.l1_in_features, 
                          out_features=num_l1,
                          bias=True)
        self.l_out = Linear(in_features=num_l1, 
                            out_features=num_classes,
                            bias=False)
    
    def forward(self, x): # x.size() = [batch, channel, height, width]
        x = relu(self.conv_1(x))
        #x = self.maxPool_1(x)
        # torch.Tensor.view: http://pytorch.org/docs/master/tensors.html?highlight=view#torch.Tensor.view
        #   Returns a new tensor with the same data as the self tensor,
        #   but of a different size.
        # the size -1 is inferred from other dimensions 
        #print(x.shape)#NCHW to HCNW
        x=x.permute(0, 2, 1, 3)
        #print(x.shape)
        x=x.contiguous()
        x = x.view(1, self.conv_out_height, self.l1_in_features)
        #print(x.shape)
        #x = self.dropout(relu(self.l_1(x)))
        x = relu(self.l_1(x))
        #print(x.shape)
        return softmax(self.l_out(x), dim=1)


net = Net()
print(net)

Net(
  (conv_1): Conv2d(1, 16, kernel_size=(1, 1), stride=(1, 1))
  (l_1): Linear(in_features=32, out_features=100, bias=True)
  (l_out): Linear(in_features=100, out_features=5, bias=False)
)


In [8]:
sam = numpy.empty([5,1, 5, 5])

In [14]:
for i, sample in enumerate(sam):
    sample[0] = numpy.random.rand(5,5)
    print(sample[0])

[[0.64715801 0.25938183 0.19669304 0.86327342 0.89617698]
 [0.01161764 0.65876866 0.68804475 0.46002251 0.49834358]
 [0.59020645 0.6079038  0.68491672 0.93689378 0.36714086]
 [0.88934532 0.92976987 0.25906369 0.8573994  0.09596173]
 [0.3108552  0.45216214 0.04994604 0.30259031 0.93478389]]
[[0.21192095 0.83646819 0.69787761 0.87380731 0.38436348]
 [0.38722338 0.42302554 0.68732456 0.93009401 0.96046503]
 [0.47771811 0.01868009 0.43962558 0.98811545 0.85404804]
 [0.59922576 0.60232594 0.61542878 0.90748531 0.7052207 ]
 [0.53838507 0.42913368 0.74481268 0.67253602 0.27737164]]
[[0.65192154 0.34374388 0.09167979 0.86410905 0.02437801]
 [0.13978028 0.43741097 0.67711351 0.72087997 0.91150312]
 [0.2188677  0.62445679 0.8601249  0.85243001 0.23723294]
 [0.55991007 0.75165778 0.69800039 0.00878664 0.77735312]
 [0.48648694 0.38013087 0.12926345 0.87434348 0.68915826]]
[[0.11159564 0.39338661 0.8687816  0.71663989 0.36544792]
 [0.70465212 0.98791416 0.53141428 0.09129472 0.84194843]
 [0.3593178

In [15]:
labels = numpy.empty([5, 5])

In [17]:
for lab in labels:
    lab = numpy.random.randint(0, 4, 5)
    print(lab)

[2 2 3 0 1]
[0 1 1 0 2]
[1 1 3 1 3]
[1 2 0 1 0]
[0 3 0 3 2]
