Skip to content
This repository has been archived by the owner on Aug 3, 2021. It is now read-only.

Commit

Permalink
Test new data b128.
Browse files Browse the repository at this point in the history
  • Loading branch information
VahidooX committed Aug 14, 2018
1 parent 381315a commit cca1551
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 13 deletions.
10 changes: 5 additions & 5 deletions example_configs/text2text/en-de/en-de-convs2s_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@

iter_size = 1
dtype = "mixed" #tf.float32 #tf.float32 # #
shuffle_train = False
shuffle_train = True
use_horovod = True

max_steps = int((4500000 / (num_gpus * batch_size * iter_size)) * epoch_num)

conv_act = None #tf.nn.relu tf.nn.tanh gated_linear_units
normalization_type = "layer_norm" #weight_norm or "batch_norm" or None
conv_act = gated_linear_units #tf.nn.relu tf.nn.tanh gated_linear_units
normalization_type = "weight_norm" #weight_norm or "batch_norm" or None
scaling_factor = math.sqrt(0.5) #changed here
inti_var = None

Expand Down Expand Up @@ -88,7 +88,7 @@
#"conv_nchannels_kwidth": [(512, 3)]*10 + [(768, 3)]*3 + [(2048, 1)]*2,

# fairseq config
"conv_nchannels_kwidth": [(512*2, 3)]*9 + [(1024, 3)]*4 + [(2048, 1)]*2,
"conv_nchannels_kwidth": [(512, 3)]*9 + [(1024, 3)]*4 + [(2048, 1)]*2,

"embedding_dropout_keep_prob": 0.8,
"hidden_dropout_keep_prob": 0.8,
Expand Down Expand Up @@ -117,7 +117,7 @@
#"conv_nchannels_kwidth": [(512, 3)]*10 + [(768, 3)]*3 + [(2048, 1)]*2,

# fairseq config
"conv_nchannels_kwidth": [(512*2, 3)]*9 + [(1024, 3)]*4 + [(2048, 1)]*2,
"conv_nchannels_kwidth": [(512, 3)]*9 + [(1024, 3)]*4 + [(2048, 1)]*2,

"embedding_dropout_keep_prob": 0.8,
"hidden_dropout_keep_prob": 0.8,
Expand Down
6 changes: 3 additions & 3 deletions example_configs/text2text/en-de/en-de-convs2s_plus.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@

iter_size = 1
dtype = "mixed" #tf.float32 #tf.float32 # #
shuffle_train = True
shuffle_train = False
use_horovod = True

max_steps = int((4500000 / (num_gpus * batch_size * iter_size)) * epoch_num)

conv_act = None #tf.nn.relu tf.nn.tanh gated_linear_units
normalization_type = "layer_norm" #weight_norm or "batch_norm" or None
normalization_type = "batch_norm" #weight_norm or "batch_norm" or None
scaling_factor = 1.0 #math.sqrt(0.5) #changed here

inti_var = None #1e-3
inti_var = 1e-3

base_params = {
# iter_size can be used just with horovod
Expand Down
2 changes: 2 additions & 0 deletions open_seq2seq/decoders/convs2s_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ def _decode(self, input_dict):
layer_id=i + 1,
add_res=True,
mode=self.mode,
normalization_type=self.normalization_type,
scaling_factor=self.scaling_factor,
regularizer=self.regularizer,
init_var=self.init_var
)
Expand Down
7 changes: 4 additions & 3 deletions open_seq2seq/decoders/convs2s_decoder2.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def _decode(self, input_dict):
hidden_dropout=self.params["hidden_dropout_keep_prob"],
conv_padding="VALID",
decode_padding=True,
activation=tf.nn.relu, #changed here
activation=self.conv_activation, #changed here
normalization_type=self.normalization_type,
regularizer=self.regularizer,
init_var=self.init_var)
Expand All @@ -194,8 +194,8 @@ def _decode(self, input_dict):
layer_id=i + 1,
add_res=True,
mode=self.mode,
normalization_type=self.normalization_type,
scaling_factor=self.scaling_factor,
normalization_type=self.normalization_type,
regularizer=self.regularizer,
init_var=self.init_var)

Expand Down Expand Up @@ -321,7 +321,8 @@ def _call(self, decoder_inputs, encoder_outputs_a, encoder_outputs_b,
outputs = (outputs + res_inputs) * self.scaling_factor

# changed here
outputs = tf.nn.relu(outputs) #self.conv_activation(outputs)
if i < len(self.layers) - 2:
outputs = tf.nn.relu(outputs) #self.conv_activation(outputs)


with tf.variable_scope("linear_layer_after_cnn_layers"):
Expand Down
2 changes: 1 addition & 1 deletion open_seq2seq/parts/convs2s/attention_wn_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class AttentionLayerNormalized(tf.layers.Layer):
"""Attention layer for convs2s with weight normalization"""

def __init__(self, in_dim, embed_size, layer_id, add_res, mode,
normalization_type="weight_norm",
scaling_factor=math.sqrt(0.5),
normalization_type="weight_norm",
regularizer=None,
init_var=None,
):
Expand Down
2 changes: 1 addition & 1 deletion open_seq2seq/parts/convs2s/conv_wn_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self,
decode_padding,
activation=gated_linear_units,
normalization_type="weight_norm",
regularizer=None, #tf.contrib.layers.l2_regularizer(scale=1e-4)
regularizer=None, # tf.contrib.layers.l2_regularizer(scale=1e-4)
init_var=None,
):
"""initializes the 1D convolution layer.
Expand Down

0 comments on commit cca1551

Please sign in to comment.