In [7]:
from __future__ import print_function
from collections import OrderedDict

import matplotlib.pyplot as plt
import numpy as np

from Joe_nn_transfer import transfer, util, p2f_trans

%matplotlib inline

## Step 1
Simply define your PyTorch model like usual, and create an instance of it.

In [8]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1   = nn.Linear(16*5*5, 120)
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out
    
pytorch_network = LeNet()

## Step 2
Determine the names of the layers.

For the above model example it is very straightforward, but if you use param groups it may be a little more involved. To determine the names of the layers the next commands are useful:

In [9]:
# The most useful, just print the network
print(pytorch_network)

# Also useful: will only print those layers with params
state_dict = pytorch_network.state_dict()
print(util.state_dict_layer_names(state_dict))
for k,v in state_dict.items():
    print(k)
print(state_dict['conv1.weight'])
print(state_dict['conv1.weight'].shape)


LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
['conv1', 'conv2', 'fc1', 'fc2', 'fc3']
conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
fc3.weight
fc3.bias
tensor([[[[ 0.0707, -0.0501,  0.0964, -0.1343,  0.1532],
          [-0.0278,  0.0048, -0.1483, -0.1993, -0.0422],
          [-0.1017, -0.0814,  0.1391,  0.1890, -0.1501],
          [-0.0538, -0.0041, -0.0746,  0.1505,  0.1568],
          [-0.1743,  0.0445,  0.0212, -0.0575, -0.1678]]],


        [[[-0.1156, -0.1424,  0.1084,  0.1871,  0.1440],
          [ 0.0989,  0.0634, -0.0953, -0.1825,  0.0041],
          [-0.0897,  0.1815, -0.0961, -0.0455,  0.1424],
          [-0.1721, -0.0341, -0.1097, -0.0370, -0.1893],
          [-0.0813, -0.0

## Step 3
Define an equivalent Keras network. Use the built-in `name` keyword argument for each layer with params.

In [10]:
import paddle
import paddle.fluid as fluid
import numpy as np
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Conv2DTranspose
from paddle.fluid.dygraph.base import to_variable
# K.set_image_data_format('channels_first')
# 定义 LeNet 网络结构
class LeNet(fluid.dygraph.Layer):
    def __init__(self, num_classes=1):
        super(LeNet, self).__init__()

        # 创建卷积和池化层块，每个卷积层使用Sigmoid激活函数，后面跟着一个2x2的池化
        self.conv1 = Conv2D(num_channels=1, num_filters=6, filter_size=5, act='relu')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv2 = Conv2D(num_channels=6, num_filters=16, filter_size=5, act='relu')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        # 创建第3个卷积层
        self.fc1 = Linear(input_dim=16*5*5, output_dim=120, act='relu')
        self.fc2 = Linear(input_dim=120, output_dim=84, act='relu')
        self.fc3 = Linear(input_dim=84, output_dim=num_classes)
    # 网络的前向计算过程
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = fluid.layers.reshape(x, [x.shape[0], -1])
        x = self.fc1(x)
        x = self.fc2(x)
        return x

with fluid.dygraph.guard():
    paddle_network = LeNet()
    print(paddle_network)
    state_dict = paddle_network.state_dict()
    # print(util.state_dict_layer_names(state_dict))
    for k, v in state_dict.items():
        print(k)
    print(state_dict['conv1.weight'])
    # state_dict.numpy

<__main__.LeNet object at 0x109b5af50>
conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
fc3.weight
fc3.bias
name conv2d_0.w_0, dtype: VarType.FP32 shape: [6, 1, 5, 5] 	lod: {}
	dim: 6, 1, 5, 5
	layout: NCHW
	dtype: float
	data: [-0.0195192 0.166801 0.0483513 0.297938 0.00955139 0.0229684 0.533133 0.410709 -0.584196 0.279063 0.244781 0.143331 0.434516 0.115924 -0.0594141 -0.180877 0.307854 0.102176 0.118056 -0.300637 0.37904 -0.10797 -0.147727 -0.112089 0.455957 0.522227 -0.198681 -0.41545 -0.358607 -0.424032 0.517354 0.120713 0.104666 -0.133958 -0.0628879 -0.0890392 -0.162785 -0.00167088 -0.0315742 0.340461 0.165832 0.0689681 0.0988597 0.0272247 -0.352552 0.536348 0.244403 0.0930358 0.280713 0.239592 0.203817 -0.259892 0.365353 -0.228202 0.317915 -0.254233 -0.0914817 0.207284 -0.103901 -0.567613 0.215304 0.144952 0.190905 0.317872 0.0634739 -0.0791167 -0.0649471 0.382461 0.0510031 -0.0663796 0.0116494 -0.0778461 0.00144824 -0.425225 0.413482 0.212

In [11]:

# test_qat()

## Step 4
Now simply convert!

In [12]:
# transfer.pytorch_to_paddle(keras_network, pytorch_network)
p2f_trans.pytorch_to_paddle(pytorch_network, paddle_network)


Layer names in PyTorch state_dict ['conv1', 'conv2', 'fc1', 'fc2', 'fc3']
Layer names in paddle state_dict ['conv1', 'conv2', 'fc1', 'fc2', 'fc3']


AttributeError: 'LeNet' object has no attribute 'load_weights'

## Done!

Now let's check whether it was succesful. If it was, both networks should have the same output.

In [None]:
# Create dummy data
# data = torch.rand(6,1,32,32)
# data_keras = data.numpy()
# data_pytorch = Variable(data, requires_grad=False)

# # Do a forward pass in both frameworks
# keras_pred = keras_network.predict(data_keras)
# pytorch_pred = pytorch_network(data_pytorch).data.numpy()

# Create dummy data
data = torch.rand(6,1,32,32)
data_paddle = data.numpy()
data_pytorch = Variable(data, requires_grad=False)

# Do a forward pass in both frameworks
paddle_pred = paddle_network(data_paddle)
pytorch_pred = pytorch_network(data_pytorch).data.numpy()

In [None]:
# assert keras_pred.shape == pytorch_pred.shape

# plt.axis('Off')
# plt.imshow(keras_pred)
# plt.show()
# plt.axis('Off')
# plt.imshow(pytorch_pred)
# plt.show()

They are the same, it works :)