In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator

import spacy

import random
import math
import os

In [2]:
SEED = 2222
random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
spacy_de = spacy.load('de')
spacy_en = spacy.load('en')

In [4]:
def process_en(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]
def process_de(text):
    return [tok.text for tok in spacy_de.tokenizer(text)]

In [5]:
Source = Field(tokenize=process_de, init_token='<sos>', eos_token='<eos>', lower=True)
Target = Field(tokenize=process_en, init_token='<sos>', eos_token='<eos>', lower=True)

In [6]:
train_data, valid_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(Source, Target))

In [7]:
len(train_data),len(valid_data),len(test_data)

(29000, 1014, 1000)

In [8]:
Source.build_vocab(train_data, min_freq=2)
Target.build_vocab(train_data, min_freq=2)

In [9]:
BATCH_SIZE = 128

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [11]:
device.type

'cuda'

In [12]:
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), batch_size=BATCH_SIZE, device=device)

In [13]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, dropout):
        super().__init__()
        self.input_dim = input_dim
        self.emb_dim = emb_dim
        self.hid_dim = hid_dim
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, hid_dim)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, src):
        embedded = self.dropout(self.embedding(src))#[sent len, batch size]
        outputs, hidden = self.rnn(embedded)#[sent len, batch size, emb dim]
        #outputs -> [sent len, batch size, hid dim * n directions]
        #hidden -> [n layers * n directions, batch size, hid dim]
        return hidden

In [14]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, dropout):
        super().__init__()
        self.emb_dim = emb_dim
        self.hid_dim = hid_dim
        self.output_dim = output_dim
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim + hid_dim, hid_dim)
        self.out = nn.Linear(emb_dim + hid_dim*2, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input, hidden, context):
        #input -> [batch size]
        #hidden -> [n layers * n directions, batch size, hid dim]
        #context -> [n layers * n directions, batch size, hid dim]
        
        #hidden -> [1, batch size, hid dim]
        #context -> [1, batch size, hid dim]
        
        input = input.unsqueeze(0)
        #input -> [1, batch size]
        
        embedded = self.dropout(self.embedding(input))
        #embedded -> [1, batch size, emb dim]
        
        emb_con = torch.cat((embedded, context), dim=2)
        #emb_con -> [1, batch size, emb dim + hid dim]
        
        output, hidden = self.rnn(emb_con, hidden)
        #output -> [sent len, batch size, hid dim * n directions]
        #hidden -> [n layers * n directions, batch size, hid dim]
        #output -> [1, batch size, hid dim]
        #hidden -> [1, batch size, hid dim]
        
        output = torch.cat((embedded.squeeze(0), hidden.squeeze(0), context.squeeze(0)), dim=1)
        #output -> [batch size, emb dim + hid dim * 2]
        
        prediction = self.out(output)
        
        return prediction,hidden