From 75aa1e4754ad21e689693ca56a1c4d12194f595d Mon Sep 17 00:00:00 2001 From: Vahid Date: Wed, 21 Oct 2020 18:39:13 -0700 Subject: [PATCH] fixed docs. Signed-off-by: Vahid --- .../asr/modules/conformer_encoder.py | 28 +++++++------------ .../asr/modules/conformer_modules.py | 1 - 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/nemo/collections/asr/modules/conformer_encoder.py b/nemo/collections/asr/modules/conformer_encoder.py index 41416b06ee1b..458cb413f010 100644 --- a/nemo/collections/asr/modules/conformer_encoder.py +++ b/nemo/collections/asr/modules/conformer_encoder.py @@ -39,7 +39,8 @@ class ConformerEncoder(NeuralModule, Exportable): """ The encoder for ASR model of Conformer. Based on this paper: - https://arxiv.org/abs/2005.08100 + 'Conformer: Convolution-augmented Transformer for Speech Recognition' by Anmol Gulati et al. + https://arxiv.org/abs/2005.08100 """ def _prepare_for_export(self): @@ -98,7 +99,7 @@ def __init__( feat_in, n_layers, d_model, - feat_out=0, + feat_out=-1, subsampling='vggnet', subsampling_factor=4, subsampling_conv_channels=64, @@ -206,13 +207,7 @@ def forward(self, audio_signal, length): @staticmethod def make_pad_mask(seq_lens, max_time, device=None): - """Make masking for padding. - Args: - seq_lens (IntTensor): `[B]` - device_id (int): - Returns: - mask (IntTensor): `[B, T]` - """ + """Make masking for padding.""" bs = seq_lens.size(0) seq_range = torch.arange(0, max_time, dtype=torch.int32) seq_range_expand = seq_range.unsqueeze(0).expand(bs, max_time) @@ -268,18 +263,15 @@ def __init__(self, d_model, d_ff, conv_kernel_size, self_attention_model, n_head self.dropout = nn.Dropout(dropout) self.norm_out = LayerNorm(d_model) - def forward(self, x, att_mask=None, pos_emb=None, u_bias=None, v_bias=None, pad_mask=None): + def forward(self, x, att_mask=None, pos_emb=None, pad_mask=None): """ Args: - x (FloatTensor): `[B, T, d_model]` - att_mask (ByteTensor): `[B, T, T]` - pos_emb (LongTensor): `[L, 1, d_model]` - u (FloatTensor): global parameter for relative positional embedding - v (FloatTensor): global parameter for relative positional embedding + x (torch.Tensor): input signals (B, T, d_model) + att_mask (torch.Tensor): attention masks(B, T, T) + pos_emb (torch.Tensor): (L, 1, d_model) + pad_mask (torch.tensor): padding mask Returns: - xs (FloatTensor): `[B, T, d_model]` - xx_aws (FloatTensor): `[B, H, T, T]` - + x (torch.Tensor): (B, T, d_model) """ residual = x x = self.norm_feed_forward1(x) diff --git a/nemo/collections/asr/modules/conformer_modules.py b/nemo/collections/asr/modules/conformer_modules.py index 5662ddbfe439..8e6e0a0fefbc 100644 --- a/nemo/collections/asr/modules/conformer_modules.py +++ b/nemo/collections/asr/modules/conformer_modules.py @@ -70,7 +70,6 @@ class ConformerFeedForward(nn.Module): """ feed-forward module of Conformer model. """ - def __init__(self, d_model, d_ff, dropout, activation=Swish()): super(ConformerFeedForward, self).__init__() self.linear1 = nn.Linear(d_model, d_ff)