Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "add fuse_bn_add_act_ops args" #4914

Merged
merged 6 commits into from
Oct 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions PaddleCV/image_classification/build_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,9 @@ def _basic_model(data, model, args, is_train):
image_in = fluid.layers.transpose(
image, [0, 2, 3, 1]) if args.data_format == 'NHWC' else image
image_in.stop_gradient = image.stop_gradient
# fuse_bn_add_act only supports amp training
fuse_bn_add_act=False
if is_train and args.fuse_bn_add_act_ops:
fuse_bn_add_act=True
net_out = model.net(input=image_in,
class_dim=args.class_dim,
data_format=args.data_format,
fuse_bn_add_act=fuse_bn_add_act)
data_format=args.data_format)
else:
net_out = model.net(input=image, class_dim=args.class_dim)
softmax_out = fluid.layers.softmax(net_out, use_cudnn=False)
Expand Down
132 changes: 31 additions & 101 deletions PaddleCV/image_classification/models/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class ResNet():
def __init__(self, layers=50):
self.layers = layers

def net(self, input, class_dim=1000, data_format="NCHW", fuse_bn_add_act=False):
def net(self, input, class_dim=1000, data_format="NCHW"):
layers = self.layers
supported_layers = [18, 34, 50, 101, 152]
assert layers in supported_layers, \
Expand Down Expand Up @@ -77,8 +77,7 @@ def net(self, input, class_dim=1000, data_format="NCHW", fuse_bn_add_act=False):
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
name=conv_name,
data_format=data_format,
fuse_bn_add_act=fuse_bn_add_act)
data_format=data_format)

pool = fluid.layers.pool2d(
input=conv, pool_type='avg', global_pooling=True, data_format=data_format)
Expand All @@ -98,8 +97,7 @@ def net(self, input, class_dim=1000, data_format="NCHW", fuse_bn_add_act=False):
stride=2 if i == 0 and block != 0 else 1,
is_first=block == i == 0,
name=conv_name,
data_format=data_format,
fuse_bn_add_act=fuse_bn_add_act)
data_format=data_format)

pool = fluid.layers.pool2d(
input=conv, pool_type='avg', global_pooling=True, data_format=data_format)
Expand Down Expand Up @@ -157,7 +155,7 @@ def shortcut(self, input, ch_out, stride, is_first, name, data_format):
else:
return input

def bottleneck_block(self, input, num_filters, stride, name, data_format, fuse_bn_add_act):
def bottleneck_block(self, input, num_filters, stride, name, data_format):
conv0 = self.conv_bn_layer(
input=input,
num_filters=num_filters,
Expand All @@ -173,56 +171,26 @@ def bottleneck_block(self, input, num_filters, stride, name, data_format, fuse_b
act='relu',
name=name + "_branch2b",
data_format=data_format)
if not fuse_bn_add_act:
conv2 = self.conv_bn_layer(
input=conv1,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c",
data_format=data_format)
short = self.shortcut(
input,
num_filters * 4,
stride,
is_first=False,
name=name + "_branch1",
data_format=data_format)
conv2 = self.conv_bn_layer(
input=conv1,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c",
data_format=data_format)

return fluid.layers.elementwise_add(
x=short, y=conv2, act='relu', name=name + ".add.output.5")
else:
conv2 = fluid.layers.conv2d(
input=conv1,
num_filters=num_filters * 4,
filter_size=1,
act=None,
param_attr=ParamAttr(name=name + "_branch2c" + "_weights"),
bias_attr=False,
name=name + '_branch2c' + '.conv2d.output.1',
data_format=data_format)
short = self.shortcut(
input,
num_filters * 4,
stride,
is_first=False,
name=name + "_branch1",
data_format=data_format)
name = name + "_branch2c"
bn_name = "bn" + name[3:]
short = fluid.contrib.layers.fused_bn_add_act(
conv2,
short,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',
name=name + ".add.output.5")
short = self.shortcut(
input,
num_filters * 4,
stride,
is_first=False,
name=name + "_branch1",
data_format=data_format)

return short
return fluid.layers.elementwise_add(
x=short, y=conv2, act='relu', name=name + ".add.output.5")

def basic_block(self, input, num_filters, stride, is_first, name,
data_format, fuse_bn_add_act):
def basic_block(self, input, num_filters, stride, is_first, name, data_format):
conv0 = self.conv_bn_layer(
input=input,
num_filters=num_filters,
Expand All @@ -231,54 +199,16 @@ def basic_block(self, input, num_filters, stride, is_first, name,
stride=stride,
name=name + "_branch2a",
data_format=data_format)
if not fuse_bn_add_act:
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
act=None,
name=name + "_branch2b",
data_format=data_format)
short = self.shortcut(
input,
num_filters,
stride,
is_first,
name=name + "_branch1",
data_format=data_format)

return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
else:
conv1 = fluid.layers.conv2d(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=1,
padding=1,
groups=1,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
name=name + '_branch2b' + '.conv2d.output.1',
data_format=data_format)
short = self.shortcut(
input,
num_filters,
stride,
is_first,
name=name + "_branch1",
data_format=data_format)
name = name + "_branch2b"
bn_name = "bn" + name[3:]
short = fluid.contrib.layers.fused_bn_add_act(
conv1,
short,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')

return short
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
act=None,
name=name + "_branch2b",
data_format=data_format)
short = self.shortcut(
input, num_filters, stride, is_first, name=name + "_branch1", data_format=data_format)
return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')


def ResNet18():
Expand Down
2 changes: 1 addition & 1 deletion PaddleCV/image_classification/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def parse_args():
add_arg('fuse_bn_act_ops', bool, False, "Whether to use batch_norm and act fusion.")
add_arg('fuse_bn_add_act_ops', bool, False, "Whether to use batch_norm, elementwise_add and act fusion. This is only used for AMP training.")
add_arg('enable_addto', bool, False, "Whether to enable the addto strategy for gradient accumulation or not. This is only used for AMP training.")

add_arg('use_label_smoothing', bool, False, "Whether to use label_smoothing")
add_arg('label_smoothing_epsilon', float, 0.1, "The value of label_smoothing_epsilon parameter")
#NOTE: (2019/08/08) temporary disable use_distill
Expand Down