In [23]:
import argparse
import math
import time
import dill as pickle
from tqdm import tqdm
import easydict

import torch
import torch.nn.functional as F
import torch.optim as optim
from torchtext.data import Field, Dataset, BucketIterator
from torchtext.datasets import TranslationDataset

import transformer.Constants as Constants

In [20]:
opt = easydict.EasyDict(
{
    "data_pkl": 'm30k_deen_shr.pkl',
    "train_path": './bpe_deen/deen-train',
    "val_path": './bpe_deen/deen-val',
    
    "epoch": 500,
    "batch_size":2048,
    
    "d_model": 512,
    "d_inner_hid": 2048,
    "d_k": 64,
    "d_v": 64,
    
    "n_head": 8,
    "n_layers": 6,
    "warmup_step": 4000,
    
    "dropout": 0.1,

    "log": "m30k_deen_shr",
    "save_model": "trained"
})

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [16]:
def prepare_dataloaders_from_bpe_files(args, device):
    batch_size = opt.batch_size
    MIN_FREQ = 2

    data = pickle.load(open(opt.data_pkl, 'rb'))
    MAX_LEN = data['settings'].max_len
    field = data['vocab']
    fields = (field, field)

    def filter_examples_with_length(x):
        return len(vars(x)['src']) <= MAX_LEN and len(vars(x)['trg']) <= MAX_LEN

    train = TranslationDataset(
        fields=fields,
        path=opt.train_path, 
        exts=('.src', '.trg'),
        filter_pred=filter_examples_with_length)
    val = TranslationDataset(
        fields=fields,
        path=opt.val_path, 
        exts=('.src', '.trg'),
        filter_pred=filter_examples_with_length)

    opt.max_token_seq_len = MAX_LEN + 2
    opt.src_pad_idx = opt.trg_pad_idx = field.vocab.stoi[Constants.PAD_WORD]
    opt.src_vocab_size = opt.trg_vocab_size = len(field.vocab)

    train_iterator = BucketIterator(train, batch_size=batch_size, device=device, train=True)
    val_iterator = BucketIterator(val, batch_size=batch_size, device=device)
    return train_iterator, val_iterator

In [17]:
train_iterator, val_iterator = prepare_dataloaders_from_bpe_files(opt, device)

FileNotFoundError: [Errno 2] No such file or directory: './bpe_deen/bpe_vocab.pkl'

# 데이터 로더 

In [26]:
def prepare_dataloaders(opt, device):
    batch_size = opt.batch_size
    data = pickle.load(open(opt.data_pkl, 'rb'))

    opt.max_token_seq_len = data['settings'].max_len
    opt.src_pad_idx = data['vocab']['src'].vocab.stoi[Constants.PAD_WORD]
    opt.trg_pad_idx = data['vocab']['trg'].vocab.stoi[Constants.PAD_WORD]

    opt.src_vocab_size = len(data['vocab']['src'].vocab)
    opt.trg_vocab_size = len(data['vocab']['trg'].vocab)

    #========= Preparing Model =========#
    fields = {'src': data['vocab']['src'], 'trg':data['vocab']['trg']}

    train = Dataset(examples=data['train'], fields=fields)
    val = Dataset(examples=data['valid'], fields=fields)

    train_iterator = BucketIterator(train, batch_size=batch_size, device=device, train=True)
    val_iterator = BucketIterator(val, batch_size=batch_size, device=device)

    return train_iterator, val_iterator


In [27]:
train_iterator, val_iterator = prepare_dataloaders(opt, device)

# Scaled Dot Product Attention

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [30]:
class ScaledDotProductAttention(nn.Module):
    ''' Scaled Dot-Product Attention '''

    def __init__(self, temperature, attn_dropout=0.1):
        super().__init__()
        self.temperature = temperature
        self.dropout = nn.Dropout(attn_dropout)

    def forward(self, q, k, v, mask=None):

        attn = torch.matmul(q / self.temperature, k.transpose(2, 3))

        if mask is not None:
            attn = attn.masked_fill(mask == 0, -1e9)

        attn = self.dropout(F.softmax(attn, dim=-1))
        output = torch.matmul(attn, v)

        return output, attn