In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

# import spacy
import numpy as np

import random
import math
import time
from torch.utils.data import Dataset

In [5]:
class MyDataset(Dataset):

    def __init__(self, input_data, labels, input_masks):
        self.input_data = input_data
        self.labels = labels
        self.input_masks = input_masks


    def __len__(self):
        return len(self.input_data)


    def __getitem__(self, idx):
        input_item = self.input_data[idx]
        label_item = self.labels[idx]
        mask_item = self.input_masks[idx]

        return input_item, label_item, mask_item

In [2]:
def array_to_tensor(array):
    tensor = torch.from_numpy(array)
    tensor = tensor.type(torch.cuda.FloatTensor)
    return tensor

In [15]:
class Attention(nn.Module):
    
    def __init__(self, input_size, attention_size):
        
        super().__init__()
        self.attn = nn.Linear(input_size, attention_size)
        self.v = nn.Linear(attention_size, 1, bias=False)
    
    def forward(self, src, src_mask):
        
        # src = (batch, seq_len, input_size) input_size = 1
        # src_mask = (batch, seq_len)
        
        # energy = (batch, seq_len, attention_size)
        energy = torch.tanh(self.attn(src))
        
        # attention = (batch, seq_len, 1)
        # attention = (batch, seq_len)
        energy = self.v(energy).squeeze()
        
        energy = energy.masked_fill(src_mask == 0, -1e10)
     
        # (batch, seq_len, 1)
        attention_weights = F.softmax(energy, dim=1)
        
        # (batch, seq_len, 1)
        attention_weights = attention_weights.unsqueeze(2)
       
     
        context = attention_weights * src
        # context = (batch, seq_len, input_size)
        
        # return weights
        return context, attention_weights

### 测试一下

In [23]:
sequence_length = 3321
half_length = sequence_length // 2
num_masks = 10
x_mask = [torch.cat((torch.ones(half_length), torch.zeros(sequence_length - half_length))) for _ in range(num_masks)]
x_mask = torch.stack(x_mask)

In [24]:
print(x_mask.shape)

torch.Size([10, 3321])


In [26]:
x = torch.randn(10, 3321, 1)
attention_layer = Attention(1, 128)
x, attention_weights = attention_layer(x, x_mask)

torch.Size([10, 3321, 1])


In [29]:
print(x.shape)
print(attention_weights.shape)
print(attention_weights.squeeze())
print(x[0])

torch.Size([10, 3321, 1])
torch.Size([10, 3321, 1])
tensor([[0.0007, 0.0005, 0.0005,  ..., 0.0000, 0.0000, 0.0000],
        [0.0007, 0.0006, 0.0006,  ..., 0.0000, 0.0000, 0.0000],
        [0.0007, 0.0004, 0.0007,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0006, 0.0006, 0.0004,  ..., 0.0000, 0.0000, 0.0000],
        [0.0003, 0.0004, 0.0007,  ..., 0.0000, 0.0000, 0.0000],
        [0.0006, 0.0006, 0.0007,  ..., 0.0000, 0.0000, 0.0000]],
       grad_fn=<SqueezeBackward0>)
tensor([[-0.0002],
        [ 0.0003],
        [ 0.0003],
        ...,
        [ 0.0000],
        [-0.0000],
        [-0.0000]], grad_fn=<SelectBackward0>)


In [30]:
class PositionwiseFeedforwardLayer(nn.Module):

    def __init__(self, input_dim, pf_dim, dropout):
        super().__init__()

        self.fc_1 = nn.Linear(input_dim, pf_dim)
        self.fc_2 = nn.Linear(pf_dim, input_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        
        # x = [batch size, seq len, input_dim]
        
        x = self.dropout(torch.relu(self.fc_1(x)))
        # x = [batch size, seq len, pf dim]
        
        x = self.fc_2(x)
        # x = [batch size, seq len, input_dim]

        return x

### 测试一下

In [32]:
######################################################
x = torch.randn(10, 3321, 1)
attention_layer = Attention(1, 128)
x, attention_weights = attention_layer(x, x_mask)

print(x.shape)
print(attention_weights.shape)

######################################################
pf_layer = PositionwiseFeedforwardLayer(1, 64, 0.1)
x = pf_layer(x)

print(x.shape)

torch.Size([10, 3321, 1])


In [39]:
class EncoderLayer(nn.Module):

    def __init__(self,
                 input_size,
                 attention_size,
                 pf_dim,
                 dropout):
        
        super().__init__()

        self.self_attn_layer_norm = nn.LayerNorm(input_size)
        self.ff_layer_norm = nn.LayerNorm(input_size)
        self.self_attention = Attention(input_size, attention_size)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(input_size,
                                                                     pf_dim,
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src, src_mask):
        # src = [batch size, src_len, input_dim]
        # src_mask = [batch size, src len]

        # self attention
        _src, _ = self.self_attention(src, src_mask)
    
        # src = [batch, seq_len, input_dim]

        # dropout, residual connection and layer norm
        src = self.self_attn_layer_norm(src + self.dropout(_src))

        # src = [batch size, src len, hid dim]

        # positionwise feedforward
        _src = self.positionwise_feedforward(src)

        # dropout, residual and layer norm
        src = self.ff_layer_norm(src + self.dropout(_src))

        # src = [batch size, src len, hid dim]

        return src

### 测试一下

In [41]:
input_size = 32
attention_size = 128
pd_dim = 64
dropout = 0.1
#####################################################
# mask 
sequence_length = 3321
half_length = sequence_length // 2
num_masks = 10
x_mask = [torch.cat((torch.ones(half_length), torch.zeros(sequence_length - half_length))) for _ in range(num_masks)]
x_mask = torch.stack(x_mask)
print("x_mask is in shape of {} ".format(x_mask.shape))
######################################################
# src
x = torch.randn(10, 3321, input_size)

print("x is in shape of {} ".format(x.shape))
######################################################
# encoder layer
enc_layer = EncoderLayer(input_size, attention_size, pd_dim, dropout)
x = enc_layer(x, x_mask)
print(x.shape)

x_mask is in shape of torch.Size([10, 3321]) 
x is in shape of torch.Size([10, 3321, 32]) 
torch.Size([10, 3321, 32])


## Encoder

In [None]:
class Encoder(nn.Module):
    
    # 目前先用一层的encoder试试
    
    def __init__(self, encoderrnn, attention):
        super().__init__()
        self.encoderrnn = encoderrnn
        self.attention = attention
        
    def forward(self, x):
        output, x, hidden = encoderrnn(x)
        hidden = hidden.squeeze()
        context = self.attention(output, x)
        concatenation = torch.cat((context, hidden), dim=1)
        
        return concatenation