# CNN_conv1d

<div>
<img src='imgs/subwords.png' width='600' height='600'/>
</div>

-----------

<div>
<img src='imgs/conv1.png' width='600' height='6500'/>
</div>

In [75]:
from torch import nn
import torch.nn.functional as F

"""
目的是将一组subword的表征变成一个表征，维度保持不变
"""

class CNN_conv1d(nn.Module):
    def __init__(self):
        super(CNN_conv1d, self).__init__()
        self.char_dim = 10
        self.filter_size = 3
        self.out_channels = 10
        self.char_cnn =nn.Conv1d(in_channels=self.char_dim, out_channels=self.out_channels, kernel_size=self.filter_size)  #输出维度不变
        # in_channels(int) – 输入信号的通道。在文本分类中，即为词向量的维度
        # out_channels(int) – 卷积产生的通道（生成向量维度）。有多少个out_channels，就需要多少个1维卷积。这里，我们为了保持维度统一，使用与char_dim相同的。
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.1)

    def forward(self, inputs):
        """
        Arguments:
            inputs: [bs,max_len, max_word_len, dim]
        """
        
        bsz, word_len, max_word_len, dim = inputs.size()

        inputs = inputs.view(-1, max_word_len, dim) 
        x = inputs.transpose(1, 2)  # 一维卷积是在最后一维上扫，需要将dim放到倒数第二维的位置
        print(x.size())
        x = self.char_cnn(x)
        print(x.size())
        x = self.relu(x)
        x = F.max_pool1d(x, kernel_size=x.size(-1))  # 最后一维计算maxpooling, 保证表征维度不变
        print(x.size())
        x = self.dropout(x.squeeze())
        out = x.view(bsz, word_len, -1)
        return out

In [76]:
import torch
# [bs, max_len, max_word_len, dim]
cnn_bert = torch.rand(4, 15, 7, 10)
max_word_len = 7

In [77]:
cnn = CNN_conv1d()

In [78]:
out = cnn(cnn_bert)

torch.Size([60, 10, 7])
torch.Size([60, 10, 5])
torch.Size([60, 10, 1])


In [79]:
out.shape

torch.Size([4, 15, 10])

# TagEmebedding


<div>
<img src='imgs/tag-emb.png' width='500' height='500'/>
</div>

In [7]:
"""
目的是对tag映射成向量，tag序列映射到数据矩阵
"""

class TagEmbeddings(nn.Module):
    """Simple tag embeddings, randomly initialized."""

    def __init__(self, config):
        super(TagEmbeddings, self).__init__()
        self.tag_embeddings = nn.Embedding(config.tag_vocab_size, config.hidden_size)
        self.LayerNorm = nn.LayerNorm(config.hidden_size)
        self.dropout = nn.Dropout(config.dropout_prob)

    def forward(self, input_tag_ids):
        tags_embeddings = self.tag_embeddings(input_tag_ids)
        embeddings = tags_embeddings
        embeddings = self.LayerNorm(embeddings)
        embeddings = self.dropout(embeddings)
        return embeddings
    
class TagEmebedding(nn.Module):

    def __init__(self, config):
        super(TagEmebedding, self).__init__()
        # Embedding
        self.hidden_size = config.hidden_size
        self.embed = TagEmbeddings(config)
        # Linear
        self.fc = nn.Linear(config.hidden_size, config.output_dim)
        #  dropout
        self.dropout = nn.Dropout(config.dropout_prob)

    def forward(self, flat_input_ids, num_aspect):  
        # flat_input_ids.size() = (batch_size*num_aspect, seq_len)
        embed = self.embed(flat_input_ids) # (batch_size*num_aspect, seq_len， dim)
        embed = self.dropout(embed)
        input = embed.view(-1, num_aspect, flat_input_ids.size(1), self.hidden_size)
        # linear
        logit = self.fc(input) # 最后一维
        return logit

In [8]:
class Config(object):
    def __init__(self,
                 tag_vocab_size,
                 hidden_size=5,
                 layer_num=1,
                 output_dim=5,
                 dropout_prob=0.1,
                 num_aspect=4
                 ):
        self.tag_vocab_size = tag_vocab_size
        self.hidden_size = hidden_size
        self.layer_num = layer_num
        self.dropout_prob = dropout_prob
        self.output_dim = output_dim
        self.num_aspect = num_aspect
        
tag_config = Config(tag_vocab_size=50,
                       hidden_size=10,
                       layer_num=1,
                       output_dim=10,
                       dropout_prob=0.1,
                       num_aspect=3)

In [9]:
tag_model = TagEmebedding(tag_config)

In [80]:
# 随机初始化一个batch_size=8,num_aspect=3（SRL序列最大数量），tag_seq_len=15的序列
flat_input_tag_ids = torch.randint(0, 49, (24, 15))
num_aspect = 3

In [81]:
flat_input_tag_ids

tensor([[32, 41, 28, 25, 19, 40, 14, 11, 32,  4, 45, 12, 46, 36, 22],
        [26, 28, 21, 11, 38, 36, 10, 48,  2, 38, 23, 38,  7, 16, 42],
        [ 4, 14, 31, 25, 19, 43, 38, 32, 15, 23, 42, 16, 14, 41, 21],
        [12, 19, 40, 40, 28,  9, 28, 19, 37, 23, 17, 38,  7, 42, 26],
        [36, 28, 42,  9, 26, 25, 33, 16, 39, 37,  1,  8, 47, 39, 36],
        [19, 33,  5, 19, 13, 28,  4, 30,  5,  9, 25, 43, 30, 39, 48],
        [36, 44, 29,  3, 15, 28,  7, 22, 22,  7, 16, 17,  8, 17,  9],
        [19,  6, 13, 39, 30, 17,  7, 38, 45, 47, 22,  7, 15, 36, 31],
        [ 5,  3, 15,  2,  5, 14, 41, 17,  2, 13, 37,  9, 16,  7,  7],
        [ 7,  8, 38, 30,  1,  3, 38, 32, 46, 45, 30, 40, 10, 13, 14],
        [44, 23, 17, 34, 29,  8,  6, 24, 25,  0,  3,  8, 10, 24, 16],
        [ 5, 30, 19,  7,  5, 37, 47, 36, 39, 27,  8, 22, 16, 44,  0],
        [20, 45,  3, 47, 17, 42, 38, 11, 36, 10, 19, 17,  7, 48, 19],
        [ 3, 37, 48,  1, 10, 45, 11, 23,  4, 15, 13, 35,  1,  1, 47],
        [ 5, 13, 40,

In [82]:
flat_input_tag_ids.shape

torch.Size([24, 15])

In [83]:
tag_output = tag_model(flat_input_tag_ids, num_aspect)

In [84]:
tag_output.shape

torch.Size([8, 3, 15, 10])

# BiGRU

<div>
<img src='imgs/bigru.png' width='500' height='500'/>
</div>

In [67]:
"""
目的是对tag_embedding使用BiGRU编码
额外增加自递归模型，来在表示之中融入时序关系
"""
class BiGRU(nn.Module):
    def __init__(self, config):
        super(BiGRU, self).__init__()
        # Embedding
        self.hidden_size = config.hidden_size
        self.embed = TagEmbeddings(config)
        # GRU
        self.bigru = nn.GRU(config.hidden_size, config.hidden_size, dropout=config.dropout_prob,
                            num_layers=config.layer_num,
                            bidirectional=True, batch_first=True)
        # input_size：输入数据X的特征值的数目
        # hidden_size：隐藏层的神经元数量，也就是隐藏层的特征数量。
        # batch_first：如果设置为 True，则输入数据的维度中第一个维度就 是 batch 值，默认为 False。
        #              默认情况下第一个维度是序列的长度， 第二个维度才是 - - batch，第三个维度是特征数目。
        
        # Linear
        self.fc = nn.Linear(config.hidden_size * 2, config.output_dim)
        #  dropout
        self.dropout = nn.Dropout(config.dropout_prob)

    def forward(self, flat_input_ids, embed, num_aspect):  
        # flat_input_ids.size() = (batch_size*num_aspect, seq_len)
        input = embed.view(flat_input_ids.size(0), embed.size(2), -1)   # 铺平，[bs*num_aspect, seq_len, dim]
        self.bigru.flatten_parameters() # 重置参数的数据指针，提升内存的利用率和效率
        # gru
        gru_out, _ = self.bigru(input)  # 每个时刻的输出，[bs*num_aspect, max_len, 2*dim]
        gru_out = gru_out.view(-1, num_aspect, flat_input_ids.size(1), 2 * self.hidden_size)    # [bs,num_aspect, max_len, 2*dim]
        logit = self.fc(gru_out)    # 线性降维，[bs,num_aspect, max_len, dim]
        return logit

In [68]:
bigru_config = Config(tag_vocab_size=50,
                           hidden_size=10,
                           layer_num=1,
                           output_dim=10,
                           dropout_prob=0.1,
                           num_aspect=3)
bi_gru = BiGRU(bigru_config)

  "num_layers={}".format(dropout, num_layers))


In [69]:
# 输入tag序列：flat_input_tag_ids=[24, 15]
# tag序列的embedding结果：tag_output=[8, 3, 15, 10]
tag_output = bi_gru(flat_input_tag_ids, tag_output, num_aspect)

In [70]:
tag_output.shape

torch.Size([8, 3, 15, 10])