In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

from torchtext.legacy.datasets import Multi30k
from torchtext.legacy.data import Field, BucketIterator

import spacy
import numpy as np

import random
import math
import time

In [None]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Wed_Jul_22_19:09:09_PDT_2020
Cuda compilation tools, release 11.0, V11.0.221
Build cuda_11.0_bu.TC445_37.28845127_0


In [None]:
def setSeed(seed=1234):
  torch.manual_seed(seed)
  random.seed(seed)
  torch.cuda.manual_seed(seed)
  np.random.seed(seed)

In [None]:
setSeed()

In [None]:
!pip3 install https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.0.0/de_core_news_sm-3.0.0.tar.gz

Collecting https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.0.0/de_core_news_sm-3.0.0.tar.gz
[?25l  Downloading https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.0.0/de_core_news_sm-3.0.0.tar.gz (19.3MB)
[K     |████████████████████████████████| 19.3MB 6.6MB/s 
[?25hCollecting spacy<3.1.0,>=3.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/1b/d8/0361bbaf7a1ff56b44dca04dace54c82d63dad7475b7d25ea1baefafafb2/spacy-3.0.6-cp37-cp37m-manylinux2014_x86_64.whl (12.8MB)
[K     |████████████████████████████████| 12.8MB 204kB/s 
Collecting catalogue<2.1.0,>=2.0.3
  Downloading https://files.pythonhosted.org/packages/9c/10/dbc1203a4b1367c7b02fddf08cb2981d9aa3e688d398f587cea0ab9e3bec/catalogue-2.0.4-py3-none-any.whl
Collecting spacy-legacy<3.1.0,>=3.0.4
  Downloading https://files.pythonhosted.org/packages/8d/67/d4002a18e26bf29b17ab563ddb55232b445ab6a02f97bf17d1345ff34d3f/spacy_legacy-3.0.5-py2.py3-none-any.whl
Collecting 

In [None]:
!pip3 install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
[?25l  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz (13.7MB)
[K     |████████████████████████████████| 13.7MB 223kB/s 
Building wheels for collected packages: en-core-web-sm
  Building wheel for en-core-web-sm (setup.py) ... [?25l[?25hdone
  Created wheel for en-core-web-sm: filename=en_core_web_sm-3.0.0-cp37-none-any.whl size=13704313 sha256=311316dee1ddd4ce99969fe758328ce8d8fe449c452fa5a3f717e8477b560f40
  Stored in directory: /root/.cache/pip/wheels/91/2b/a1/d83336e8dfaacbbcdfc805b2c7195dd3ea10d507396fe31cac
Successfully built en-core-web-sm
Installing collected packages: en-core-web-sm
  Found existing installation: en-core-web-sm 2.2.5
    Uninstalling en-core-web-sm-2.2.5:
      Successfully uninstalled en-core-web-sm-2.2.5
Successfully installed en-core-web-sm-3.0.0


In [None]:
en=spacy.load('en_core_web_sm')
de=spacy.load('de_core_news_sm')

In [None]:
def tokenize_en(text):
  return [tok.text for tok in en.tokenizer(text)]
def tokenize_de(text):
   return [tok.text for tok in de.tokenizer(text)][::-1]

In [None]:
tokenize_en('hello my friend')

['hello', 'my', 'friend']

In [None]:
SRC=Field(tokenize=tokenize_de,lower=True,eos_token='eos', init_token='sos')
TRG=Field(tokenize=tokenize_en,lower=True,eos_token='eos', init_token ='sos')

In [None]:
SRC??

In [None]:
train_data,valid_data,test_data= Multi30k.splits(fields=(SRC,TRG),exts=('.de','.en'))

downloading training.tar.gz


training.tar.gz: 100%|██████████| 1.21M/1.21M [00:00<00:00, 1.84MB/s]


downloading validation.tar.gz


validation.tar.gz: 100%|██████████| 46.3k/46.3k [00:00<00:00, 284kB/s]


downloading mmt_task1_test2016.tar.gz


mmt_task1_test2016.tar.gz: 100%|██████████| 66.2k/66.2k [00:00<00:00, 274kB/s]


In [None]:
vars(train_data.examples[0])['src']

['.',
 'büsche',
 'vieler',
 'nähe',
 'der',
 'in',
 'freien',
 'im',
 'sind',
 'männer',
 'weiße',
 'junge',
 'zwei']

In [None]:
SRC.build_vocab(train_data,min_freq=2)
TRG.build_vocab(train_data,min_freq=2)

In [None]:
device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device, type(device))

cuda <class 'torch.device'>


In [None]:
train_loader,valid_loader,test_loader=BucketIterator.splits((train_data,valid_data,test_data),batch_size=128,device=device)
#valid_loader=BucketIterator(valid_data,batch_size=128,device=device)
#=BucketIterator(test_data,batch_size=128,device=device)

In [None]:
for batch in train_loader:
 print(batch.src)
 break


tensor([[  2,   2,   2,  ...,   2,   2,   2],
        [  4,   4,   4,  ...,   4,   4,   4],
        [290, 123, 422,  ..., 588, 332,   0],
        ...,
        [  1,   1,   1,  ...,   1,   1,   1],
        [  1,   1,   1,  ...,   1,   1,   1],
        [  1,   1,   1,  ...,   1,   1,   1]], device='cuda:0')


In [None]:
class Encoder(nn.Module):
  def __init__(self,input_dim,embed_dim,hid_dim,n_layers,dropout):
    super().__init__()
    self.embedding=nn.Embedding(input_dim,embed_dim)
    self.rnn=nn.LSTM(input_size=embed_dim,hidden_size=hid_dim,num_layers=n_layers,dropout=dropout)
    self.dropout=nn.Dropout(dropout)
  def forward(self,src):
    embed=self.dropout(self.embedding(src))
    output,(hidden,cell)=self.rnn(embed)
    return hidden,cell


In [None]:
class Decoder(nn.Module):
  def __init__(self,output_dim,embed_dim,hid_dim,n_layers,dropout):
    super().__init__()
    self.embedding=nn.Embedding(output_dim,embed_dim)
    self.rnn=nn.LSTM(input_size=embed_dim,hidden_size=hid_dim,num_layers=n_layers,dropout=dropout)
    self.dropout=nn.Dropout(dropout)
    self.fc=nn.Linear(hid_dim,output_dim)
    self.output_dim=output_dim
  def forward(self,src,hidden,cell):
    src=src.unsqueeze(0)
    embed=self.dropout(self.embedding(src))
    output,(hidden,cell)=self.rnn(embed,(hidden,cell))
    prediction=self.fc(output.squeeze(0))
    return prediction,hidden,cell
  

    

In [None]:
class seq2seq(nn.Module):
  def __init__(self ,encoder,decoder,device):
    super(seq2seq,self).__init__()
    self.encoder=encoder
    self.decoder=decoder
    self.device=device
  def forward(self,src,trg,teacher_force=0.5):
    
    seqlen=trg.shape[0]
    batchsize=src.shape[1]
    vocabsize=self.decoder.output_dim
    predictions=torch.zeros((seqlen,batchsize,vocabsize),device=device)
    
    hidden,cell=self.encoder(src)
    pred,hidden,cell=self.decoder(trg[0],hidden,cell)
   
    for i in range(1,seqlen):
      
      predictions[i,:,:]=pred
      if random.random() < teacher_force:
        src=trg[i]
      else:
        src=pred.argmax(1)
      pred,hidden,cell=self.decoder(src,hidden,cell)
    return predictions

In [None]:
input_dim,output_dim=len(SRC.vocab),len(TRG.vocab)
hid_dim=512
embed_dim=256
n_layers=2
dropout=0.5
encoder=Encoder(input_dim,embed_dim,hid_dim,n_layers,dropout)
decoder=Decoder(output_dim,embed_dim,hid_dim,n_layers,dropout)
model=seq2seq(encoder,decoder,device).to(device=device)

optimizor=optim.Adam(model.parameters())
pad_index=TRG.vocab.stoi[TRG.pad_token]
criterion=nn.CrossEntropyLoss(ignore_index=pad_index)

In [None]:
def init_weight(m):
  for name,param in m.named_parameters():
   nn.init.uniform(param.data,-0.08,0.08)
model.apply(init_weight)

  This is separate from the ipykernel package so we can avoid doing imports until


seq2seq(
  (encoder): Encoder(
    (embedding): Embedding(7853, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder(
    (embedding): Embedding(5893, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
    (fc): Linear(in_features=512, out_features=5893, bias=True)
  )
)

In [None]:
train_loader

<torchtext.legacy.data.iterator.BucketIterator at 0x7efd6e35f890>

In [None]:
def train(model,iterator,optimizor,criterion,clip=1):
  total_loss=0
  model.train()
  for i , batch in enumerate(iterator):
    
    optimizor.zero_grad()
    pred=model(batch.src,batch.trg)
    output_dim=pred.shape[-1]
    pred,trg=pred[1:].view(-1,output_dim),batch.trg[1:].view(-1)
    loss=criterion(pred,trg)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
    optimizor.step()
    #print(loss.item())
    total_loss+=loss
  return (total_loss.item())/len(iterator),torch.exp(total_loss.item()/len(iterator))


In [None]:

loss,prep = train(model,train_loader,optimizor,criterion,)

In [None]:
print(loss, prep)

3.969642135015143 52.96557292373904


In [None]:
np.exp(loss)

64.63932510357404

In [None]:
def evaluate(model,iterator,optimizor,criterion):
 
  model.eval()
  total_loss=0
  for i , batch in enumerate(iterator):
    with torch.no_grad():
      pred=model(batch.src,batch.trg)
      output_dim=pred.shape[-1]
      pred,trg=pred[1:].view(-1,output_dim),batch.trg[1:].view(-1)
      #output=pred.argmax(1)
     
      loss=criterion(pred,trg)
    
      total_loss+=loss

  return (total_loss.item())/len(iterator),math.exp(total_loss.item()/len(iterator))




In [None]:
epochs=5
best_loss=float('inf')
for epoch in range(epochs):
  loss_train,prep_train=train(model,train_loader,optimizor,criterion,)
  loss_valid,prep_valid=evaluate(model,valid_loader,optimizor,criterion,)
  if loss_valid<best_loss:
    best_loss=loss_valid
    torch.save(model.state_dict(),'seq2seq.pt')
  print(f'\tTrain Loss: {loss_train:.3f} | Train PPL: {math.exp(loss_train):7.3f}')
  print(f'\t Val. Loss: {loss_valid:.3f} |  Val. PPL: {math.exp(loss_valid):7.3f}')

	Train Loss: 3.117 | Train PPL:  22.586
	 Val. Loss: 3.058 |  Val. PPL:  21.278
	Train Loss: 3.006 | Train PPL:  20.206
	 Val. Loss: 3.084 |  Val. PPL:  21.843
	Train Loss: 2.912 | Train PPL:  18.397
	 Val. Loss: 3.013 |  Val. PPL:  20.346
	Train Loss: 2.844 | Train PPL:  17.193
	 Val. Loss: 2.948 |  Val. PPL:  19.067
	Train Loss: 2.747 | Train PPL:  15.603
	 Val. Loss: 2.990 |  Val. PPL:  19.881


In [None]:

model.load_state_dict(torch.load('seq2seq.pt'))

test_loss = evaluate(model, test_loader, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')