https://github.com/bentrevett/pytorch-seq2seq/blob/master/3%20-%20Neural%20Machine%20Translation%20by%20Jointly%20Learning%20to%20Align%20and%20Translate.ipynb

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchtext.legacy.datasets import Multi30k
from torchtext.legacy.data import Field, BucketIterator

import spacy
import numpy as np

import random
import math
import time

In [4]:
seed=1234

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic=True

In [5]:
spacy_de=spacy.load('de_core_news_sm')
spacy_en=spacy.load('en_core_web_sm')

In [6]:
def tokenize_de(text):
    return [tok.text for tok in spacy_de.tokenizer(text)]

def tokenize_en(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]

In [7]:
SRC=Field(tokenize=tokenize_de, init_token='', eos_token='',lower=True)
TRG=Field(tokenize=tokenize_en, init_token='',eos_token='',lower=True)

In [9]:
train_data,valid_data,test_data = Multi30k.splits(exts=('.de', '.en'), fields=(SRC,TRG))

downloading training.tar.gz


C:\Users\2469l\private\20221112\.data\multi30k\training.tar.gz: 100%|██████████████| 1.21M/1.21M [00:05<00:00, 229kB/s]


downloading validation.tar.gz


C:\Users\2469l\private\20221112\.data\multi30k\validation.tar.gz: 100%|███████████| 46.3k/46.3k [00:00<00:00, 75.4kB/s]


downloading mmt_task1_test2016.tar.gz


C:\Users\2469l\private\20221112\.data\multi30k\mmt_task1_test2016.tar.gz: 100%|███| 66.2k/66.2k [00:00<00:00, 77.0kB/s]


In [10]:
SRC.build_vocab(train_data, min_freq=2)
TRG.build_vocab(train_data, min_freq=2)

In [11]:
device= torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [12]:
BATCH_SIZE=128

train_iterator, valid_iterator, test_iterator = BucketIterator.splits((train_data, valid_data, test_data), batch_size=BATCH_SIZE, device=device)

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_dim, embdim, enc_hid_dim, dec_hid_dim, dropout):
        super().__init__()
        
        self.embedding = nn.Embedding(input_dim,emb_dim)
        #양방향=True
        self.rnn = nn.GRU(emb_dim, enc_hid_dim, bidirectional=True)
         # 양방향 rnn의 출력값을 concat 한 후에 fc layer에 전달합니다.
        self.fc=nn.Linear(enc_hid_dim * 2, dec_hid_dim)
        self.dropout=nn.Dropout(dropout)
    def forward(self,src):
        #src=[src len, batch_size]
        embedded= self.dropout(self.embedding(src))
        #embedded = [src len, batch_size, emb dim]
        outputs, hidden = self.rnn(embedded)
        #outputs=[단어길이, 배치사이즈, 은닉차원 * num_directions]
        #hidden=[n_layers*num_direction(2), batch size, hid_dim]
        #hidden layer는 [forward1,backward1,forward2,backward2... 식으로 쌓임]
        #hidden[-2,:,:]--> 마지막 forward, hidden[-1,:,:]->마지막 backward
        hidden=torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:], dim=1))))
        
        return outputs, hidden