In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd '/content/drive/MyDrive/LSTM Translation'

/content/drive/MyDrive/LSTM Translation


In [None]:
!pip install torchtext==0.8.1
!python -m spacy download de_core_news_sm

In [None]:
import spacy
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torchtext.data import Field, BucketIterator
from torchtext.datasets import Multi30k
import warnings
warnings.filterwarnings("ignore")

In [None]:
from model import Encoder, Decoder, seq2seq
from utils import Valid_Accuracy, Train_model

the parameters needed are embedding_dim, enc_input_dim, dec_input_dim,hidden_dim, output_dim, num_layers


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
spacy_de = spacy.load('de_core_news_sm')
spacy_en = spacy.load('en_core_web_sm')

def tokenize_de(text):
  return [tok.text for tok in spacy_de.tokenizer(text)]

def tokenize_en(text):
  return [tok.text for tok in spacy_en.tokenizer(text)]

german = Field(tokenize=tokenize_de, lower=True, init_token='<sos>', eos_token='<eos>')
english = Field(tokenize=tokenize_en, lower=True, init_token='<sos>', eos_token='<eos>')

In [None]:
train_data, val_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(german, english))

german.build_vocab(train_data, max_size=15000, min_freq=2)
english.build_vocab(train_data, max_size=15000, min_freq=2)


#Now using the Bupcket Iterator where each batch will be padded accordingly.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_iterator = BucketIterator(train_data, batch_size=32,
    sort_within_batch = True,
    sort_key = lambda x: len(x.src),
    device = device
)

valid_iterator =  BucketIterator(val_data, batch_size=32,
    sort_within_batch = True,
    sort_key = lambda x: len(x.src),
    device = device
)

#training

In [None]:
embedding_dim = 300
hidden_dim = 100
num_layers = 2
output_dim = len(english.vocab)
enc_input_dim = len(german.vocab)
dec_input_dim = len(english.vocab)

In [None]:
model = seq2seq(embedding_dim=embedding_dim, enc_input_dim=enc_input_dim,
                dec_input_dim=dec_input_dim, 
               hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)

model = model.to(device)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
Train_model(model, loss_fn, epochs=10, optimizer=optimizer, 
            train_iterator=train_iterator, valid_iterator=valid_iterator)

100%|██████████| 907/907 [00:34<00:00, 26.35it/s]


Train Loss: 5.121735572814941, Train Accuracy: 0.3657561279601163
Valid Loss: 4.418412685394287, Valid Accuracy: 0.44009424503709643


100%|██████████| 907/907 [00:34<00:00, 26.60it/s]


Train Loss: 4.257118225097656, Train Accuracy: 0.46750447400912837
Valid Loss: 4.231466770172119, Valid Accuracy: 0.47261643014191485


100%|██████████| 907/907 [00:33<00:00, 26.85it/s]


Train Loss: 4.113120079040527, Train Accuracy: 0.49039662277476376
Valid Loss: 4.1236467361450195, Valid Accuracy: 0.49055566660003996


100%|██████████| 907/907 [00:34<00:00, 25.95it/s]


Train Loss: 4.0036115646362305, Train Accuracy: 0.5061883591571763
Valid Loss: 4.071510314941406, Valid Accuracy: 0.4978048293753742


100%|██████████| 907/907 [00:33<00:00, 26.91it/s]


Train Loss: 3.9321024417877197, Train Accuracy: 0.5169485059512249
Valid Loss: 4.015469551086426, Valid Accuracy: 0.5049155908639523


100%|██████████| 907/907 [00:33<00:00, 27.04it/s]


Train Loss: 3.8765571117401123, Train Accuracy: 0.5260962110058612
Valid Loss: 4.017780780792236, Valid Accuracy: 0.5057282327156805


100%|██████████| 907/907 [00:33<00:00, 27.17it/s]


Train Loss: 3.822195053100586, Train Accuracy: 0.5345675285561616
Valid Loss: 4.053606986999512, Valid Accuracy: 0.5036772113640943


100%|██████████| 907/907 [00:34<00:00, 26.12it/s]


Train Loss: 3.774026393890381, Train Accuracy: 0.5425920149719277
Valid Loss: 4.026890754699707, Valid Accuracy: 0.5066066066066066


100%|██████████| 907/907 [00:33<00:00, 27.30it/s]


Train Loss: 3.7317144870758057, Train Accuracy: 0.5493868454254301
Valid Loss: 4.011508941650391, Valid Accuracy: 0.5097970956833101


100%|██████████| 907/907 [00:33<00:00, 27.19it/s]


Train Loss: 3.6924564838409424, Train Accuracy: 0.5556084703730899
Valid Loss: 4.063658237457275, Valid Accuracy: 0.5034701267350634


In [None]:
torch.save(model.state_dict(), 'model.pt')