Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

a question about code? #13

Closed
awmmmm opened this issue Mar 14, 2022 · 1 comment
Closed

a question about code? #13

awmmmm opened this issue Mar 14, 2022 · 1 comment

Comments

@awmmmm
Copy link

awmmmm commented Mar 14, 2022

`

class FFTBlocks(nn.Module):

def __init__(self, hidden_size, num_layers, ffn_kernel_size=9, dropout=0.0,
             num_heads=2, use_pos_embed=True, use_last_norm=True,
             use_pos_embed_alpha=True):
    super().__init__()
    self.num_layers = num_layers
    embed_dim = self.hidden_size = hidden_size
    self.dropout = dropout
    self.use_pos_embed = use_pos_embed
    self.use_last_norm = use_last_norm
    if use_pos_embed:
        self.max_source_positions = DEFAULT_MAX_TARGET_POSITIONS
        self.padding_idx = 0
        self.pos_embed_alpha = nn.Parameter(torch.Tensor([1])) if use_pos_embed_alpha else 1
        self.embed_positions = SinusoidalPositionalEmbedding(
            embed_dim, self.padding_idx, init_size=DEFAULT_MAX_TARGET_POSITIONS,
        )

    self.layers = nn.ModuleList([])
    self.layers.extend([
        TransformerEncoderLayer(self.hidden_size, self.dropout,
                                kernel_size=ffn_kernel_size, num_heads=num_heads)
        for _ in range(self.num_layers)
    ])
    if self.use_last_norm:
        self.layer_norm = nn.LayerNorm(embed_dim)
    else:
        self.layer_norm = None

def forward(self, x, padding_mask=None, attn_mask=None, return_hiddens=False):
    """
    :param x: [B, T, C]
    :param padding_mask: [B, T]
    :return: [B, T, C] or [L, B, T, C]
    """
    padding_mask = x.abs().sum(-1).eq(0).data if padding_mask is None else padding_mask
    nonpadding_mask_TB = 1 - padding_mask.transpose(0, 1).float()[:, :, None]  # [T, B, 1]
    ```

if self.use_pos_embed:
positions = self.pos_embed_alpha * self.embed_positions(x[..., 0])
x = x + positions
x = F.dropout(x, p=self.dropout, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1) * nonpadding_mask_TB
        hiddens = []
        for layer in self.layers:
            x = layer(x, encoder_padding_mask=padding_mask, attn_mask=attn_mask) * nonpadding_mask_TB
            hiddens.append(x)
        if self.use_last_norm:
            x = self.layer_norm(x) * nonpadding_mask_TB
        if return_hiddens:
            x = torch.stack(hiddens, 0)  # [L, T, B, C]
            x = x.transpose(1, 2)  # [L, B, T, C]
        else:
            x = x.transpose(0, 1)  # [B, T, C]
        return x`
`class FastSpeechEncoder(FFTBlocks):
    def __init__(self, dict_size, hidden_size=256, num_layers=4, kernel_size=9, num_heads=2,
                 dropout=0.0):

        super().__init__(hidden_size, num_layers, kernel_size, num_heads=num_heads,
                         use_pos_embed=False, dropout=dropout)  # use_pos_embed_alpha for compatibility
        self.embed_tokens = Embedding(dict_size, hidden_size, 0)
        self.embed_scale = math.sqrt(hidden_size)
        self.padding_idx = 0
        self.embed_positions = SinusoidalPositionalEmbedding(
            hidden_size, self.padding_idx, init_size=DEFAULT_MAX_TARGET_POSITIONS,
        )

    def forward(self, txt_tokens, attn_mask=None):
        """

        :param txt_tokens: [B, T]
        :return: {
            'encoder_out': [B x T x C]
        }
        """
        encoder_padding_mask = txt_tokens.eq(self.padding_idx).data
        x = self.forward_embedding(txt_tokens)  # [B, T, H]
        if self.num_layers > 0:
            x = super(FastSpeechEncoder, self).forward(x, encoder_padding_mask, attn_mask=attn_mask)
        return x

    def forward_embedding(self, txt_tokens):
        # embed tokens and positions
        x = self.embed_scale * self.embed_tokens(txt_tokens)
        if self.use_pos_embed:
            positions = self.embed_positions(txt_tokens)
            x = x + positions
        x = F.dropout(x, p=self.dropout, training=self.training)
        return x
`

I see you use position embedding twice when in encoder,and I don't understand the role of the second which I bold it in the code ,can you explain to me?
QAQ looking forward to your reply

@awmmmm
Copy link
Author

awmmmm commented Mar 14, 2022

sorry i don't see in super(),u set use_pos_embed=False

@awmmmm awmmmm closed this as completed Mar 14, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant