In [1]:
import torch
import torch.nn as nn
import numpy as np
from matplotlib import pyplot as plt

In [2]:
import sys
sys.path.append("../model_utils/")
from custom_layers import Conv_bn_mask

In [3]:
from custom_gru import BidirRNNLayer, GRUCell, RNNLayer

## extract paddlepaddle weights

In [4]:
def load_parameter(file_name):
    with open(file_name, 'rb') as f:
        f.read(16)  # skip header.
        return np.fromfile(f, dtype=np.float32)
    
def compute_difference(paddle_outputs, torch_outputs):
    return np.sqrt(np.mean((paddle_outputs - torch_outputs)**2.0))

In [5]:
class cbmX2_bigru_layer(nn.Module):
    def __init__(self):
        super(cbmX2_bigru_layer, self).__init__()
        self.conv_bn_mask0 = Conv_bn_mask(ichannel=1,
                                          ochannel=32,
                                          kernel_size=(11, 41),
                                          stride=(3, 2),
                                          padding=(5, 20),
                                          bias=False,
                                          track_running_stats=True)

        self.conv_bn_mask1 = Conv_bn_mask(ichannel=32,
                                          ochannel=32,
                                          kernel_size=(11, 21),
                                          stride=(1, 2),
                                          padding=(5, 10),
                                          bias=False,
                                          track_running_stats=True)

        self.bigru0 = BidirRNNLayer(GRUCell, input_size=41 * 32, hidden_size=1024, gate_act="relu", state_act="tanh")
        # self.bigru1 = BidirRNNLayer(GRUCell, input_size=2048, hidden_size=1024, gate_act="relu", state_act="tanh")
        # self.bigru2 = BidirRNNLayer(GRUCell, input_size=2048, hidden_size=1024, gate_act="relu", state_act="tanh")

        # 28 of char + 1 blank
        # vocab list:  ["'", ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
        # self.bottleneck = nn.Linear(2048, 28 + 1)
        # self.softmax = nn.Softmax(dim=0)


    def forward(self, input, length):
        x = input
        seq_length = length 
        batch_size = x.shape[0]
        x = self.conv_bn_mask0(x, seq_length)
        x = self.conv_bn_mask1(x, seq_length)

        flattened_x = x.view(batch_size, -1, 41 * 32)
        flattened_x = nn.utils.rnn.pack_padded_sequence(flattened_x, seq_length.flatten(), batch_first=True)
        flattened_x, _ = self.bigru0(flattened_x, (torch.zeros((2, batch_size,1024)), ) )

        bottleneck_data, batch_sizes, _, _ = flattened_x
        # bottleneck_result = self.bottleneck(bottleneck_data)
        # output = self.softmax(bottleneck_result)
        output = nn.utils.rnn.PackedSequence(bottleneck_data, batch_sizes)
        # this is a special request for use CTC loss in pytorch
        output = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)
        # include both padded data and valid length for each sample
        # data: [batch_size, length of sample, feature size]
        # valid length: [valid length for each sample]
        return output


In [6]:
cbmX2_bigru_test = cbmX2_bigru_layer()
cbmX2_bigru_test.eval()

# TODO, double check the normalization part

cbmX2_bigru_layer(
  (conv_bn_mask0): Conv_bn_mask(
    (conv): Conv2d(1, 32, kernel_size=(11, 41), stride=(3, 2), padding=(5, 20), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): BReLU()
    (mask): Mask()
  )
  (conv_bn_mask1): Conv_bn_mask(
    (conv): Conv2d(32, 32, kernel_size=(11, 21), stride=(1, 2), padding=(5, 10), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): BReLU()
    (mask): Mask()
  )
  (bigru0): BidirRNNLayer(
    original_name=BidirRNNLayer
    (directions): _ConstModuleList(
      original_name=_ConstModuleList
      (0): RNNLayer(
        original_name=RNNLayer
        (cell): GRUCell(
          original_name=GRUCell
          (bn): BatchNorm1d(3072, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (1): ReverseRNNLayer(
        original_name=ReverseRNNLayer
        (cell): GRUCell(
    

In [7]:
cbmX2_bigru_test.state_dict().keys()
# TODO: load parameters

odict_keys(['conv_bn_mask0.conv.weight', 'conv_bn_mask0.bn.weight', 'conv_bn_mask0.bn.bias', 'conv_bn_mask0.bn.running_mean', 'conv_bn_mask0.bn.running_var', 'conv_bn_mask0.bn.num_batches_tracked', 'conv_bn_mask1.conv.weight', 'conv_bn_mask1.bn.weight', 'conv_bn_mask1.bn.bias', 'conv_bn_mask1.bn.running_mean', 'conv_bn_mask1.bn.running_var', 'conv_bn_mask1.bn.num_batches_tracked', 'bigru0.directions.0.cell.weight_i', 'bigru0.directions.0.cell.weight_h', 'bigru0.directions.0.cell.bias', 'bigru0.directions.0.cell.bn.weight', 'bigru0.directions.0.cell.bn.bias', 'bigru0.directions.0.cell.bn.running_mean', 'bigru0.directions.0.cell.bn.running_var', 'bigru0.directions.0.cell.bn.num_batches_tracked', 'bigru0.directions.1.cell.weight_i', 'bigru0.directions.1.cell.weight_h', 'bigru0.directions.1.cell.bias', 'bigru0.directions.1.cell.bn.weight', 'bigru0.directions.1.cell.bn.bias', 'bigru0.directions.1.cell.bn.running_mean', 'bigru0.directions.1.cell.bn.running_var', 'bigru0.directions.1.cell.bn.

In [8]:
conv0_weights   = load_parameter("../models/baidu_en8k/params/___conv_0__.w0")
conv0_weights   = conv0_weights.reshape(32, 1, 41, 11)
conv0_weights   = load_parameter("../models/baidu_en8k/params/___conv_0__.w0")
conv0_weights   = conv0_weights.reshape(32, 1, 41, 11)
conv0_weights   = np.transpose(conv0_weights, (0, 1, 3, 2))
conv0_bn_mean  = load_parameter("../models/baidu_en8k/params/___batch_norm_0__.w1")
conv0_bn_var   = load_parameter("../models/baidu_en8k/params/___batch_norm_0__.w2")
conv0_bn_gamma = load_parameter("../models/baidu_en8k/params/___batch_norm_0__.w0")
conv0_bn_beta  = load_parameter("../models/baidu_en8k/params/___batch_norm_0__.wbias")


conv1_weights = load_parameter("../models/baidu_en8k/params/___conv_1__.w0")
conv1_weights = conv1_weights.reshape(32, 32, 21, 11)
conv1_weights = np.transpose(conv1_weights, (0, 1, 3, 2))
conv1_bn_mean  = load_parameter("../models/baidu_en8k/params/___batch_norm_1__.w1")
conv1_bn_var   = load_parameter("../models/baidu_en8k/params/___batch_norm_1__.w2")
conv1_bn_gamma = load_parameter("../models/baidu_en8k/params/___batch_norm_1__.w0")
conv1_bn_beta  = load_parameter("../models/baidu_en8k/params/___batch_norm_1__.wbias")

# gru0
bigru0_directions_0_cell_weight_i         = load_parameter("../models/baidu_en8k/params/___fc_layer_0__.w0")
bigru0_directions_0_cell_weight_i         = bigru0_directions_0_cell_weight_i.reshape(41 * 32, 1024*3)
bigru0_directions_0_cell_weight_h         = load_parameter("../models/baidu_en8k/params/___gru_0__.w0")
w_u_r = bigru0_directions_0_cell_weight_h.flatten()[:1024*1024*2].reshape(1024,1024*2)
w_c   = bigru0_directions_0_cell_weight_h.flatten()[1024*1024*2:].reshape(1024,1024)
bigru0_directions_0_cell_weight_h = np.concatenate([w_u_r,w_c], 1)
bigru0_directions_0_cell_bias             = load_parameter("../models/baidu_en8k/params/___gru_0__.wbias")
bigru0_directions_0_cell_bn_bias          = load_parameter("../models/baidu_en8k/params/___batch_norm_2__.wbias")
bigru0_directions_0_cell_bn_weight        = load_parameter("../models/baidu_en8k/params/___batch_norm_2__.w0")
bigru0_directions_0_cell_bn_running_mean  = load_parameter("../models/baidu_en8k/params/___batch_norm_2__.w1")
bigru0_directions_0_cell_bn_running_var   = load_parameter("../models/baidu_en8k/params/___batch_norm_2__.w2")


bigru0_directions_1_cell_weight_i         = load_parameter("../models/baidu_en8k/params/___fc_layer_1__.w0")
bigru0_directions_1_cell_weight_i         = bigru0_directions_1_cell_weight_i.reshape(41 * 32, 1024*3)
bigru0_directions_1_cell_weight_h         = load_parameter("../models/baidu_en8k/params/___gru_1__.w0")
w_u_r = bigru0_directions_1_cell_weight_h.flatten()[:1024*1024*2].reshape(1024,1024*2)
w_c = bigru0_directions_1_cell_weight_h.flatten()[1024*1024*2:].reshape(1024,1024)
bigru0_directions_1_cell_weight_h = np.concatenate([w_u_r,w_c], 1)
bigru0_directions_1_cell_bias             = load_parameter("../models/baidu_en8k/params/___gru_1__.wbias")
bigru0_directions_1_cell_bn_bias          = load_parameter("../models/baidu_en8k/params/___batch_norm_3__.wbias")
bigru0_directions_1_cell_bn_weight        = load_parameter("../models/baidu_en8k/params/___batch_norm_3__.w0")
bigru0_directions_1_cell_bn_running_mean  = load_parameter("../models/baidu_en8k/params/___batch_norm_3__.w1")
bigru0_directions_1_cell_bn_running_var   = load_parameter("../models/baidu_en8k/params/___batch_norm_3__.w2")


In [9]:
pretrained_weights = {   "conv_bn_mask0.conv.weight"                 : conv0_weights,
                         "conv_bn_mask0.bn.weight"                   : conv0_bn_gamma,
                         "conv_bn_mask0.bn.bias"                     : conv0_bn_beta,
                         "conv_bn_mask0.bn.running_mean"             : conv0_bn_mean,
                         "conv_bn_mask0.bn.running_var"              : conv0_bn_var ,
                         "conv_bn_mask1.conv.weight"                 : conv1_weights,
                         "conv_bn_mask1.bn.weight"                   : conv1_bn_gamma,
                         "conv_bn_mask1.bn.bias"                     : conv1_bn_beta,
                         "conv_bn_mask1.bn.running_mean"             : conv1_bn_mean,
                         "conv_bn_mask1.bn.running_var"              : conv1_bn_var ,
                         "bigru0.directions.0.cell.weight_i"         : bigru0_directions_0_cell_weight_i,
                         "bigru0.directions.0.cell.weight_h"         : bigru0_directions_0_cell_weight_h,
                         "bigru0.directions.0.cell.bias"             : bigru0_directions_0_cell_bias,
                         "bigru0.directions.0.cell.bn.bias"          : bigru0_directions_0_cell_bn_bias        ,
                         "bigru0.directions.0.cell.bn.weight"        : bigru0_directions_0_cell_bn_weight      ,
                         "bigru0.directions.0.cell.bn.running_mean"  : bigru0_directions_0_cell_bn_running_mean,
                         "bigru0.directions.0.cell.bn.running_var"   : bigru0_directions_0_cell_bn_running_var ,
                         "bigru0.directions.1.cell.weight_i"         : bigru0_directions_1_cell_weight_i,
                         "bigru0.directions.1.cell.weight_h"         : bigru0_directions_1_cell_weight_h,
                         "bigru0.directions.1.cell.bias"             : bigru0_directions_1_cell_bias,
                         "bigru0.directions.1.cell.bn.bias"          : bigru0_directions_1_cell_bn_bias        ,
                         "bigru0.directions.1.cell.bn.weight"        : bigru0_directions_1_cell_bn_weight      ,
                         "bigru0.directions.1.cell.bn.running_mean"  : bigru0_directions_1_cell_bn_running_mean,
                         "bigru0.directions.1.cell.bn.running_var"   : bigru0_directions_1_cell_bn_running_var 
                     }

In [10]:
check_dict = cbmX2_bigru_test.state_dict()
for key in check_dict:
    if 'num_batches_tracked' in key:
        continue
    check_dict[key] = torch.from_numpy(pretrained_weights[key])
cbmX2_bigru_test.load_state_dict(check_dict)

<All keys matched successfully>

In [26]:
np_output[0].shape

(85, 2048)

In [28]:
np_input = np.load("paddle/cbmX2_bigru_input.npy", allow_pickle=True, encoding="bytes")
np_output = np.load("paddle/cbmX2_bigru_output.npy", allow_pickle=True, encoding="bytes")
temp_data = []
temp_length = []
for i in np_input:
    temp_data.append(i[0])
    temp_length.append(i[3])
np_input = np.array(temp_data)
np_length = np.array(temp_length).astype(int)

torch_input = torch.from_numpy(np_input)
# torch_input = torch_input.type(torch.FloatTensor)
np.testing.assert_array_almost_equal(torch_input, np_input, decimal=10)

torch_input = torch.unsqueeze(torch_input, 1)
torch_input = torch_input.transpose(3,2)

torch_output = cbmX2_bigru_test(torch_input, torch.from_numpy(np_length))

torch_output = torch_output[0][0]
torch_output = torch_output.data.numpy()

compute_difference(paddle_outputs=np_output[0], torch_outputs=torch_output)


[tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])]
<class 'torch.Tensor'>
tensor([[0., 0., 0.,  ..., 0., 0., 0.]])
after gru
[tensor([[ 0.0000,  0.0792, -0.1876,  ...,  0.1931,  0.0000,  0.0000]],
       grad_fn=<AddBackward0>)]
after gru
[tensor([[ 0.0000,  0.0792, -1.6596,  ...,  0.5133,  0.0000,  0.0000]],
       grad_fn=<AddBackward0>)]
after gru
[tensor([[ 0.0000,  3.2959, -1.6596,  ...,  0.5133,  0.0000,  0.0000]],
       grad_fn=<AddBackward0>)]
after gru
[tensor([[  0.5379,   3.2959,  -1.6596,  ..., -10.8259,   0.0000, -18.6539]],
       grad_fn=<AddBackward0>)]
after gru
[tensor([[   0.5379, -200.2467,   43.5144,  ...,  328.5081,    0.0000,
           76.0479]], grad_fn=<AddBackward0>)]
after gru
[tensor([[ 380.8412, -200.2467,   43.5144,  ...,  328.5081, -227.6659,
           76.0479]], grad_fn=<AddBackward0>)]
after gru
[tensor([[ 3.8084e+02,  2.7146e+07, -1.1783e+07,  ..., -5.7654e+07,
         -2.2767e

  import sys


nan

In [None]:
torch_output

In [None]:
n = 0
test = nn.utils.rnn.pad_packed_sequence(torch_output, batch_first=True)[0].data.numpy()[n]
compute_difference(np_output[n], test[:85])

In [None]:
torch_output[1]

In [None]:
temp = torch_output[0].data.numpy()

In [None]:
np.testing.assert_array_almost_equal(np_output[0], temp[0],decimal=5)

In [None]:
torch_tile(torch.tensor([[1,2,3,4]]), 0, 3)

In [None]:
flatten = result.view(1, -1, 2208)

In [None]:
flatten.size()

In [None]:
mask = torch.ByteTensor([[1]*3,[1]*3, [0]*3])
hehe = torch.rand((3,3))
hehe

In [None]:
torch.masked_select(hehe,mask)

In [None]:
mask = torch.zeros([6, 1, 10,10], dtype=torch.float)
mask[:,:,3:7,3:7] = 1

In [None]:
class mask(nn.Module):
    def __init__(self):
        super(mask, self).__init__()
        
    def forward(self,x, batch_info):
        c1,c2, w1,w2, h1,h2 = batch_info
        mask = torch.zeros_like(x, dtype=torch.float)
        mask[c1:c2, w1:w2, h1:h2] = 1
        return x * mask

In [None]:
mask_layer = mask()

In [None]:
def VariableRecurrent(batch_sizes, inner):
    def forward(input, hidden, weight):
        output = []
        input_offset = 0
        last_batch_size = batch_sizes[0]
        hiddens = []
        flat_hidden = not isinstance(hidden, tuple)
        if flat_hidden:
            hidden = (hidden,)
        for batch_size in batch_sizes:
            step_input = input[input_offset:input_offset + batch_size]
            input_offset += batch_size

            dec = last_batch_size - batch_size
            if dec > 0:
                hiddens.append(tuple(h[-dec:] for h in hidden))
                hidden = tuple(h[:-dec] for h in hidden)
            last_batch_size = batch_size

            if flat_hidden:
                hidden = (inner(step_input, hidden[0], *weight),)
            else:
                hidden = inner(step_input, hidden, *weight)

            output.append(hidden[0])
        hiddens.append(hidden)
        hiddens.reverse()

        hidden = tuple(torch.cat(h, 0) for h in zip(*hiddens))
        assert hidden[0].size(0) == batch_sizes[0]
        if flat_hidden:
            hidden = hidden[0]
        output = torch.cat(output, 0)

        return hidden, output
    
    return forward

In [None]:
a = torch.tensor([[11]*3,[12]*3, [13]*3])
b = torch.tensor([[21]*3,[22]*3, [23]*3, [24]*3])
c = torch.tensor([[31]*3,[32]*3, [33]*3, [34]*3, [35]*3])
seq = nn.utils.rnn.pack_sequence([c,b,a], )
input, batch_sizes, _,_ = seq

In [None]:
seq.unsorted_indices = seq.to

In [None]:
seq.unsorted_indices

In [None]:
def gru_test(input, hidden, weight):
    h, c = hidden
    hy = h + 1
    cy = c + 1
    return hy, cy 

In [None]:
forward = VariableRecurrent(batch_sizes, gru_test)

In [None]:
forward(input, (torch.tensor([0]*3), torch.tensor([0]*3)), [0])