Skip to content

Commit

Permalink
fix incorrect parameter sharing between bidirectional rnns.
Browse files Browse the repository at this point in the history
  • Loading branch information
lcy-seso committed Jun 19, 2017
1 parent 132a26a commit ba0ff69
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 43 deletions.
80 changes: 37 additions & 43 deletions sequence_tagging_for_ner/network_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,7 @@
import paddle.v2.evaluator as evaluator


def stacked_rnn(input_layer,
hidden_size,
hidden_para_attr,
rnn_para_attr,
stack_num=3,
reverse=False):
for i in range(stack_num):
hidden = paddle.layer.fc(
size=hidden_size,
act=paddle.activation.Tanh(),
bias_attr=paddle.attr.Param(initial_std=1.),
input=[input_layer] if not i else [hidden, rnn],
param_attr=[rnn_para_attr]
if not i else [hidden_para_attr, rnn_para_attr])

rnn = paddle.layer.recurrent(
input=hidden,
act=paddle.activation.Relu(),
bias_attr=paddle.attr.Param(initial_std=1.),
reverse=reverse,
param_attr=rnn_para_attr)
return hidden, rnn


def ner_net(word_dict_len, label_dict_len, stack_num=3, is_train=True):
def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
mark_dict_len = 2
word_dim = 50
mark_dim = 5
Expand All @@ -51,37 +27,55 @@ def ner_net(word_dict_len, label_dict_len, stack_num=3, is_train=True):
size=mark_dim,
param_attr=paddle.attr.Param(initial_std=math.sqrt(1. / word_dim)))

emb_layers = [word_embedding, mark_embedding]

word_caps_vector = paddle.layer.concat(input=emb_layers)
word_caps_vector = paddle.layer.concat(
input=[word_embedding, mark_embedding])

mix_hidden_lr = 1e-3
rnn_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=0.1)
hidden_para_attr = paddle.attr.Param(
initial_std=1 / math.sqrt(hidden_dim), learning_rate=mix_hidden_lr)

forward_hidden, rnn_forward = stacked_rnn(word_caps_vector, hidden_dim,
hidden_para_attr, rnn_para_attr)
backward_hidden, rnn_backward = stacked_rnn(
word_caps_vector,
hidden_dim,
hidden_para_attr,
rnn_para_attr,
reverse=True)

fea = paddle.layer.fc(
# the first rnn layer shares the input-to-hidden mappings.
hidden = paddle.layer.fc(
name="__hidden00__",
size=hidden_dim,
act=paddle.activation.Tanh(),
bias_attr=paddle.attr.Param(initial_std=1.),
input=word_caps_vector,
param_attr=hidden_para_attr)

fea = []
for direction in ["fwd", "bwd"]:
for i in range(stack_num):
if i:
hidden = paddle.layer.fc(
name="__hidden%02d_%s__" % (i, direction),
size=hidden_dim,
act=paddle.activation.STanh(),
bias_attr=paddle.attr.Param(initial_std=1.),
input=[hidden, rnn],
param_attr=[hidden_para_attr, rnn_para_attr])

rnn = paddle.layer.recurrent(
name="__rnn%02d_%s__" % (i, direction),
input=hidden,
act=paddle.activation.Relu(),
bias_attr=paddle.attr.Param(initial_std=1.),
reverse=i % 2 if direction == "fwd" else not i % 2,
param_attr=rnn_para_attr)
fea += [hidden, rnn]

rnn_fea = paddle.layer.fc(
size=hidden_dim,
bias_attr=paddle.attr.Param(initial_std=1.),
act=paddle.activation.STanh(),
input=[forward_hidden, rnn_forward, backward_hidden, rnn_backward],
param_attr=[
hidden_para_attr, rnn_para_attr, hidden_para_attr, rnn_para_attr
])
input=fea,
param_attr=[hidden_para_attr, rnn_para_attr] * 2)

emission = paddle.layer.fc(
size=label_dict_len,
bias_attr=False,
input=fea,
input=rnn_fea,
param_attr=rnn_para_attr)

if is_train:
Expand Down
2 changes: 2 additions & 0 deletions sequence_tagging_for_ner/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from utils import *
from network_conf import *

from paddle.v2.layer import parse_network


def main(train_data_file,
test_data_file,
Expand Down

0 comments on commit ba0ff69

Please sign in to comment.