In [1]:
from __future__ import print_function
from collections import OrderedDict

import matplotlib.pyplot as plt
import numpy as np

from Joe_nn_transfer import transfer, util, p2f_trans

%matplotlib inline

Using TensorFlow backend.


## Step 1
Simply define your PyTorch model like usual, and create an instance of it.

In [2]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1   = nn.Linear(16*5*5, 120)
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out
    
pytorch_network = LeNet()

## Step 2
Determine the names of the layers.

For the above model example it is very straightforward, but if you use param groups it may be a little more involved. To determine the names of the layers the next commands are useful:

In [3]:
# The most useful, just print the network
print(pytorch_network)

# Also useful: will only print those layers with params
state_dict = pytorch_network.state_dict()
print(util.state_dict_layer_names(state_dict))
for k,v in state_dict.items():
    print(k)
print(state_dict['conv1.weight'])
print(state_dict['conv1.weight'].shape)


LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
['conv1', 'conv2', 'fc1', 'fc2', 'fc3']
conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
fc3.weight
fc3.bias
tensor([[[[ 0.1539,  0.0896,  0.0490, -0.1535,  0.0020],
          [ 0.0309, -0.1750,  0.0868, -0.1527, -0.0524],
          [-0.1320, -0.1024,  0.0271, -0.1880, -0.1861],
          [-0.1242,  0.0839, -0.1913,  0.1370,  0.1899],
          [ 0.0739, -0.1770, -0.1475,  0.0048, -0.1703]]],


        [[[ 0.1676,  0.1166,  0.1534, -0.1749,  0.0259],
          [-0.0847,  0.0412, -0.1052,  0.0255, -0.0765],
          [-0.0072, -0.0668, -0.0168, -0.0974,  0.0535],
          [ 0.0466,  0.1062,  0.0685,  0.0344,  0.0292],
          [-0.0633, -0.1

## Step 3
Define an equivalent Keras network. Use the built-in `name` keyword argument for each layer with params.

In [4]:
import paddle
import paddle.fluid as fluid
import numpy as np
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Conv2DTranspose
from paddle.fluid.dygraph.base import to_variable
# K.set_image_data_format('channels_first')
# 定义 LeNet 网络结构
class LeNet(fluid.dygraph.Layer):
    def __init__(self, num_classes=1):
        super(LeNet, self).__init__()

        # 创建卷积和池化层块，每个卷积层使用Sigmoid激活函数，后面跟着一个2x2的池化
        self.conv1 = Conv2D(num_channels=1, num_filters=6, filter_size=5, act='relu')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv2 = Conv2D(num_channels=6, num_filters=16, filter_size=5, act='relu')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        # 创建第3个卷积层
        self.fc1 = Linear(input_dim=16*5*5, output_dim=120, act='relu')
        self.fc2 = Linear(input_dim=120, output_dim=84, act='relu')
        self.fc3 = Linear(input_dim=84, output_dim=num_classes)
    # 网络的前向计算过程
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = fluid.layers.reshape(x, [x.shape[0], -1])
        x = self.fc1(x)
        x = self.fc2(x)
        return x

with fluid.dygraph.guard():
    paddle_network = LeNet()
    print(paddle_network)
    state_dict = paddle_network.state_dict()
    # print(util.state_dict_layer_names(state_dict))
    for k, v in state_dict.items():
        print(k)
    print(state_dict['conv1.weight'])
    # state_dict.numpy

<__main__.LeNet object at 0x13102aef0>
conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
fc3.weight
fc3.bias
name conv2d_0.w_0, dtype: VarType.FP32 shape: [6, 1, 5, 5] 	lod: {}
	dim: 6, 1, 5, 5
	layout: NCHW
	dtype: float
	data: [0.0269396 0.570315 -0.395852 -0.159138 -0.123513 0.258803 -0.302941 -0.329502 0.00120546 0.185934 0.126938 0.0954494 -0.222355 0.211731 -0.384386 0.190065 0.0127135 0.250071 -0.304662 0.408702 0.144936 -0.201661 0.0899215 -0.0250759 0.00422118 -0.302021 -0.198132 -0.51248 0.124092 -0.163991 0.23053 -0.124536 0.0593972 -0.18371 0.405645 -0.0314066 -0.201687 0.223069 0.355114 -0.106769 0.156184 -0.444167 -0.0635065 -0.364388 -0.301678 -0.0326005 0.348064 -0.167422 0.134496 0.208305 0.711069 0.272064 -0.479625 0.087672 -0.216643 0.12084 0.347996 0.165636 -0.480425 0.300299 -0.275088 -0.199513 0.246383 -0.0783111 0.112633 -0.219666 0.156988 -0.197156 0.209432 0.382418 -0.161711 -0.174804 0.358004 -0.320728 0.526463 -0.232346 -

In [5]:

# test_qat()

## Step 4
Now simply convert!

In [6]:
# transfer.pytorch_to_paddle(keras_network, pytorch_network)
p2f_trans.pytorch_to_paddle(pytorch_network, paddle_network)


Layer names in PyTorch state_dict ['conv1', 'conv2', 'fc1', 'fc2', 'fc3']
Layer names in paddle state_dict ['conv1', 'conv2', 'fc1', 'fc2', 'fc3']
niubi <_io.TextIOWrapper name='save_temp.pdparams' mode='a' encoding='UTF-8'>
niubi <_io.TextIOWrapper name='save_temp.pdparams' mode='a' encoding='UTF-8'>
niubi <_io.TextIOWrapper name='save_temp.pdparams' mode='a' encoding='UTF-8'>
niubi <_io.TextIOWrapper name='save_temp.pdparams' mode='a' encoding='UTF-8'>
niubi <_io.TextIOWrapper name='save_temp.pdparams' mode='a' encoding='UTF-8'>


RuntimeError: Parameter not found, Can't not find [ conv1.weight ] in stat_dictuse_structured_name is set to [True]

## Done!

Now let's check whether it was succesful. If it was, both networks should have the same output.

In [None]:
# Create dummy data
# data = torch.rand(6,1,32,32)
# data_keras = data.numpy()
# data_pytorch = Variable(data, requires_grad=False)

# # Do a forward pass in both frameworks
# keras_pred = keras_network.predict(data_keras)
# pytorch_pred = pytorch_network(data_pytorch).data.numpy()

# Create dummy data
data = torch.rand(6,1,32,32)
data_paddle = data.numpy()
data_pytorch = Variable(data, requires_grad=False)

# Do a forward pass in both frameworks
paddle_pred = paddle_network(data_paddle)
pytorch_pred = pytorch_network(data_pytorch).data.numpy()

In [None]:
# assert keras_pred.shape == pytorch_pred.shape

# plt.axis('Off')
# plt.imshow(keras_pred)
# plt.show()
# plt.axis('Off')
# plt.imshow(pytorch_pred)
# plt.show()

They are the same, it works :)