In [1]:
import import_ipynb

from statistics import mean
from DataSet import *

importing Jupyter notebook from DataSet.ipynb
importing Jupyter notebook from RNN_Basic.ipynb


In [2]:
"""
    Convert Input X into Embedding Time-Series Vector,
    and then inputs RNN, connects Last-Output to
    1-Dimension-Linear-Layer.
"""
class SequenceTaggingNet(nn.Module):
    def __init__(
        self, num_embeddings,
        embedding_dim=50,
        hidden_size=50,
        num_layers=1,
        dropout=0.2
    ):
        super().__init__()
        self.emb = nn.Embedding(
            num_embeddings, 
            embedding_dim,
            padding_idx=0
        )
        self.lstm = nn.LSTM(
            embedding_dim,               
            hidden_size, 
            num_layers,
            batch_first=True,
            dropout=dropout
        )
        self.linear = nn.Linear(
            hidden_size, 1
        )
        
    def forward(self, x, h0=None, l=None):
        # Convert ID as an Embedding Multiple-Dimension-Vector.
        # 'x' equals to(batch_size, step_size)
        # '->' equals to (batch_size, step_size, embedding_dim)
        x = self.emb(x)
        
        # Send a variable 'x' to RNN with Initial-State, h0.
        # 'x' equals to(batch_size, step_size, embedding_dim)
        # '->' equals to (batch_size, step_size, hidden_dim)
        x, h = self.lstm(x, h0)
        
        # Extract the Last-Phase only.
        # 'x' equals to(batch_size, step_size, hidden_dim)
        # '->' equals to (batch_size, 1)
        if l is not None:
            # Use it if Input has an original-length.
            x = x[list(range(len(x))), l-1, :]
        else:
            # Use Last-Phase only if there is no original-length.
            x = x[:, -1, :]
            
        # Input Extracted-Last-Phase to the Linear-Layer.
        x = self.linear(x)
        
        # Delete unnecessary Dimension.
        # (batch_size, 1) -> (batch_size, )
        x = x.squeeze()
        return x
            

In [5]:
# Training-Code
def eval_net(net, data_loader, device="cpu"):
    net.eval()
    ys = []
    ypreds = []
    
    for x, y, l in data_loader:
        x = x.to(device)
        y = y.to(device)
        l = l.to(device)
        
        with torch.no_grad():
            y_pred = net(x, l=l)
            y_pred = (y_pred > 0).long()
            ys.append(y)
            ypreds.append(y_pred)
    
    ys = torch.cat(ys)
    ypreds = torch.cat(ypreds)
    acc = (ys == ypreds).float().sum() / len(ys)
    return acc.item()

In [6]:
# num_embedding has a inputed 'train_data.vocab_size + 1' by including 0.
net = SequenceTaggingNet(train_data.vocab_size + 1, num_layers = 2)
net.to("cpu")  # Now we don't have 'cuda:0', so set 'net.to()' gets 'cpu'.
opt = optim.Adam(net.parameters())
loss_f = nn.BCEWithLogitsLoss()

for epoch in range(10):
    losses = []
    net.train()
    for x, y, l in tqdm.tqdm(train_loader):
        x = x.to("cpu")
        y = y.to("cpu")
        l = l.to("cpu")
        
        y_pred = net(x, l=l)
        loss = loss_f(y_pred, y.float())
        net.zero_grad()
        loss.backward()
        opt.step()
        losses.append(loss.item())
    
    # Add Model-Data in sequence by model-training-epochs.
    # I did not apply "cuda:0" or "gpu" or "cuda" because
    # this computer has no GPU.
    train_acc = eval_net(net, train_loader, "cpu")
    val_acc = eval_net(net, test_loader, "cpu")
    print(epoch, mean(losses), train_acc, val_acc)

100%|██████████| 782/782 [04:23<00:00,  2.97it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

0 0.014537069152554889 1.0 1.0


100%|██████████| 782/782 [04:50<00:00,  2.69it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

1 0.00010868345810513696 1.0 1.0


100%|██████████| 782/782 [04:48<00:00,  2.71it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

2 4.295565683496348e-05 1.0 1.0


100%|██████████| 782/782 [04:29<00:00,  2.91it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

3 2.2045045751026387e-05 1.0 1.0


100%|██████████| 782/782 [02:45<00:00,  4.71it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

4 1.2681106205802927e-05 1.0 1.0


100%|██████████| 782/782 [03:02<00:00,  4.27it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

5 7.760811455865011e-06 1.0 1.0


100%|██████████| 782/782 [02:51<00:00,  4.55it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

6 4.924616569321101e-06 1.0 1.0


100%|██████████| 782/782 [02:52<00:00,  4.54it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

7 3.1971080114304e-06 1.0 1.0


100%|██████████| 782/782 [02:51<00:00,  4.56it/s]
  0%|          | 0/782 [00:00<?, ?it/s]

8 2.104920375194573e-06 1.0 1.0


100%|██████████| 782/782 [03:34<00:00,  3.64it/s]


9 1.402404398590651e-06 1.0 1.0
