In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
cd '/content/drive/MyDrive/translation'

/content/drive/MyDrive/translation


In [None]:
!pip install -U torchtext==0.8.1

In [None]:
!python -m spacy download de_core_news_sm

In [None]:
import spacy
import random
from tqdm import tqdm
import torch
import torch.nn as nn
from torchtext.data import Field, BucketIterator
from torchtext.datasets import Multi30k
from torchtext.data.metrics import bleu_score
import utils
from utils import translation
from utils import score

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
spacy_de = spacy.load('de_core_news_sm')
spacy_en = spacy.load('en_core_web_sm')
def process_en(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]
def process_de(text):
    return [tok.text for tok in spacy_de.tokenizer(text)]

In [8]:
german = Field(tokenize=process_de, init_token='<sos>', eos_token='<eos>', lower=True)
english = Field(tokenize=process_en, init_token='<sos>', eos_token='<eos>', lower=True)

In [9]:
train_data, valid_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(german, english))

In [10]:
german.build_vocab(train_data, min_freq=2)
english.build_vocab(train_data, min_freq=2)

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), batch_size=32, device=device)

In [12]:
class Encoder(nn.Module):
  def __init__(self, input_dim, embedding_dim, hidden_dim, num_layers):
    super().__init__()
    self.embedding = nn.Embedding(input_dim, embedding_dim)
    self.rnn = nn.GRU(embedding_dim, hidden_dim)

  def forward(self, x):
    embedding = self.embedding(x)
    output, hidden = self.rnn(embedding)
    return hidden

In [13]:
class Decoder(nn.Module):
  def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, num_layers):
    super().__init__()
    self.embedding = nn.Embedding(input_dim, embedding_dim)
    self.rnn = nn.GRU(embedding_dim, hidden_dim)
    self.dense = nn.Linear(hidden_dim, output_dim)

  def forward(self, x, hidden):
    embedding = self.embedding(x)
    output, hidden = self.rnn(embedding)
    prediction = self.dense(output)
    return prediction, hidden

In [14]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target):
        batch_size = target.shape[1]
        target_len = target.shape[0]
        target_vocab_size = len(english.vocab)

        
        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
        hidden = self.encoder(source)

        x = target[0]
        for t in range(1, target_len):
            x = x.unsqueeze(0)
            output, hidden = self.decoder(x, hidden)
            output = output.squeeze(0)
            outputs[t] = output
            best_guess = output.argmax(1)
            x = target[t] if random.random() < 0.5 else best_guess  #teacher force ratio
        return outputs

In [15]:
encoder_input_dim = len(german.vocab)
embedding_dim = 512
decoder_input_dim = len(english.vocab)
output_dim = len(english.vocab)
hidden_dim = 1024
num_layers = 2

In [16]:
encoder = Encoder(encoder_input_dim, embedding_dim, hidden_dim, num_layers).to(device)
decoder = Decoder(input_dim=decoder_input_dim, embedding_dim=embedding_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers).to(device)

In [17]:
model = Seq2Seq(encoder, decoder)
model

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(7853, 512)
    (rnn): GRU(512, 1024)
  )
  (decoder): Decoder(
    (embedding): Embedding(5893, 512)
    (rnn): GRU(512, 1024)
    (dense): Linear(in_features=1024, out_features=5893, bias=True)
  )
)

In [18]:
#testing the forward pass with this data
X = [data.src for data in train_iterator]
y = [data.trg for data in train_iterator]

In [19]:
#here it is very much visible that the forward pass is working perfectly fine
output = model(X[900], y[700])
output.shape

torch.Size([22, 32, 5893])

Now its the time to train the model

In [20]:
num_epochs = 15
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in tqdm(range(num_epochs)):
  model.train()
  for batch in train_iterator:
    src = batch.src
    trg = batch.trg
    
    optimizer.zero_grad()
    output = model(src, trg)
    output = output.view(-1, output.shape[2])
    loss = loss_fn(output, trg.view(-1))
    loss.backward()
    nn.utils.clip_grad_norm(model.parameters(), max_norm=1)
    optimizer.step()

  print(f'epoch: {epoch+1}, loss: {loss}')

  7%|▋         | 1/15 [01:03<14:43, 63.14s/it]

epoch: 1, loss: 3.315805673599243


 13%|█▎        | 2/15 [02:07<13:46, 63.59s/it]

epoch: 2, loss: 3.227419853210449


 20%|██        | 3/15 [03:09<12:35, 62.95s/it]

epoch: 3, loss: 2.4845540523529053


 27%|██▋       | 4/15 [04:11<11:30, 62.74s/it]

epoch: 4, loss: 3.14363956451416


 33%|███▎      | 5/15 [05:13<10:25, 62.56s/it]

epoch: 5, loss: 3.552471876144409


 40%|████      | 6/15 [06:16<09:22, 62.49s/it]

epoch: 6, loss: 3.029020309448242


 47%|████▋     | 7/15 [07:18<08:19, 62.38s/it]

epoch: 7, loss: 2.953685760498047


 53%|█████▎    | 8/15 [08:20<07:16, 62.30s/it]

epoch: 8, loss: 2.8701467514038086


 60%|██████    | 9/15 [09:22<06:13, 62.28s/it]

epoch: 9, loss: 3.6454198360443115


 67%|██████▋   | 10/15 [10:24<05:10, 62.19s/it]

epoch: 10, loss: 2.820600748062134


 73%|███████▎  | 11/15 [11:27<04:08, 62.24s/it]

epoch: 11, loss: 3.6152312755584717


 80%|████████  | 12/15 [12:28<03:06, 62.11s/it]

epoch: 12, loss: 2.886676788330078


 87%|████████▋ | 13/15 [13:31<02:04, 62.11s/it]

epoch: 13, loss: 2.5457518100738525


 93%|█████████▎| 14/15 [14:32<01:02, 62.04s/it]

epoch: 14, loss: 3.2798430919647217


100%|██████████| 15/15 [15:34<00:00, 62.32s/it]

epoch: 15, loss: 2.761242151260376





In [None]:
torch.save(model.state_dict(), 'model_3.pt')