<a href="https://colab.research.google.com/github/SumedhGanpatye/Seq2Seq/blob/main/seq2seq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U spacy
!python -m spacy download de_core_news_sm
!pip install torchtext==0.6.0

Collecting de-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.7.0/de_core_news_sm-3.7.0-py3-none-any.whl (14.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m83.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: de-core-news-sm
Successfully installed de-core-news-sm-3.7.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('de_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Collecting torchtext==0.6.0
  Downloading torchtext-0.6.0-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.10

In [None]:
import torchtext
print(torchtext.__version__)

0.6.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import Multi30k
import numpy as np
import spacy
import random
from torch.utils.tensorboard import SummaryWriter
from torchtext.data import BucketIterator
from torchtext.data.metrics import bleu_score

In [None]:
import spacy
spacy_ger = spacy.load('de_core_news_sm')
spacy_eng = spacy.load('en_core_web_sm')

In [None]:
def tokenizer_ger(text):
  return [tok.text for tok in spacy_ger.tokenizer(text)]
def tokenizer_eng(text):
  return [tok.text for tok in spacy_eng.tokenizer(text)]

In [None]:
def translate_sentence(model, sentence, german, english, device, max_length=50):
    model.eval()
    tokens = [token.text.lower() for token in german.tokenizer(sentence)]
    tokens.insert(0, german.init_token)
    tokens.append(german.eos_token)

    text_to_indices = [german.vocab.stoi[token] for token in tokens]
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)

    outputs = [english.vocab.stoi["<sos>"]]

    for i in range(max_length):
        trg_tensor = torch.LongTensor(outputs).unsqueeze(1).to(device)

        with torch.no_grad():
            output = model(sentence_tensor, trg_tensor)

        best_guess = output.argmax(2)[-1, :].item()
        outputs.append(best_guess)

        if best_guess == english.vocab.stoi["<eos>"]:
            break

    translated_sentence = [english.vocab.itos[idx] for idx in outputs]
    return translated_sentence[1:-1]

def bleu(data, model, german, english, device):
    targets = []
    outputs = []

    for example in data:
        src = vars(example)["src"]
        trg = vars(example)["trg"]

        prediction = translate_sentence(model, src, german, english, device)
        outputs.append(prediction)
        targets.append([trg])

    return bleu_score(outputs, targets)


In [None]:
# def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
#     print("=> Saving checkpoint")
#     torch.save(state, filename)

# def load_checkpoint(checkpoint, model, optimizer):
#     print("=> Loading checkpoint")
#     model.load_state_dict(checkpoint["state_dict"])
#     optimizer.load_state_dict(checkpoint["optimizer"])

#     for param_group in optimizer.param_groups:
#         param_group["lr"] = learning_rate

In [None]:
# german = torchtext.data.Field(tokenize=tokenizer_ger,lower=True,init_token='<sos>',eos_token='<eos>')
# english = torchtext.data.Field(tokenize=tokenizer_eng,lower=True,init_token='<sos>',eos_token='<eos>')

In [None]:
# train_data,validation_data,test_data = Multi30k.splits(exts=('de','en'),fields=(german,english))

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
SRC_FIELD = torchtext.data.Field(tokenize='spacy', tokenizer_language='de_core_news_sm', init_token='<sos>', eos_token='<eos>')
TRG_FIELD = torchtext.data.Field(tokenize='spacy', tokenizer_language='en_core_web_sm', init_token='<sos>', eos_token='<eos>')

train_data, valid_data, test_data = torchtext.data.TabularDataset.splits(
    path='/content/drive/MyDrive/seq2seq_dataset', format='csv',
    train='train.csv', validation='valid.csv', test='test.csv',
    fields=[('src', SRC_FIELD), ('trg', TRG_FIELD)]
)

SRC_FIELD.build_vocab(train_data, min_freq=2)
TRG_FIELD.build_vocab(train_data, min_freq=2)



In [None]:
german = SRC_FIELD
english = TRG_FIELD

In [None]:
german.build_vocab(train_data,max_size=10000,min_freq=2)
english.build_vocab(train_data,max_size=10000,min_freq=2)

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):
        super(Encoder, self).__init__()
        self.dropout = nn.Dropout(p)
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)

    def forward(self, x):
        embedding = self.dropout(self.embedding(x))
        outputs, (hidden, cell) = self.rnn(embedding)
        return hidden, cell


class Decoder(nn.Module):
    def __init__(
        self, input_size, embedding_size, hidden_size, output_size, num_layers, p
    ):
        super(Decoder, self).__init__()
        self.dropout = nn.Dropout(p)
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell):
        x = x.unsqueeze(0)

        embedding = self.dropout(self.embedding(x))
        outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))

        predictions = self.fc(outputs)
        predictions = predictions.squeeze(0)

        return predictions, hidden, cell


class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target, teacher_force_ratio=0.5):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        target_vocab_size = len(english.vocab)

        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)

        hidden, cell = self.encoder(source)

        x = target[0]

        for t in range(1, target_len):
            output, hidden, cell = self.decoder(x, hidden, cell)
            outputs[t] = output
            best_guess = output.argmax(1)

            x = target[t] if random.random() < teacher_force_ratio else best_guess

        return outputs


In [None]:
num_epochs = 10
learning_rate = 0.001
batch_size = 64

load_model = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size_encoder = len(german.vocab)
input_size_decoder = len(english.vocab)
output_size = len(english.vocab)
encoder_embedding_size = 512
decoder_embedding_size = 512
hidden_size = 1024
num_layers = 2
enc_dropout = 0.1
dec_dropout = 0.1

def translate_sentence(model, sentence, german, english, device, max_length=50):
    model.eval()
    tokens = german.tokenize(sentence)
    tokens = [token.lower() for token in tokens]
    tokens.insert(0, german.init_token)
    tokens.append(german.eos_token)

    sentence_tensor = torch.tensor([german.vocab.stoi[token] for token in tokens]).unsqueeze(1).to(device)

    with torch.no_grad():
        hidden, cell = model.encoder(sentence_tensor)

    outputs = [english.vocab.stoi["<sos>"]]
    for _ in range(max_length):
        previous_word = torch.tensor([outputs[-1]]).to(device)

        with torch.no_grad():
            output, hidden, cell = model.decoder(previous_word, hidden, cell)
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)

        if best_guess == english.vocab.stoi["<eos>"]:
            break

    translated_sentence = [english.vocab.itos[idx] for idx in outputs]
    return ' '.join(translated_sentence)

writer = SummaryWriter(f"runs/loss_plot")
step = 0

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size=batch_size,
    sort_within_batch=True,
    sort_key=lambda x: len(x.src),
    device=device,
)

encoder_net = Encoder(
    input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout
).to(device)

decoder_net = Decoder(
    input_size_decoder,
    decoder_embedding_size,
    hidden_size,
    output_size,
    num_layers,
    dec_dropout,
).to(device)

model = Seq2Seq(encoder_net, decoder_net).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

pad_idx = english.vocab.stoi["<pad>"]
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)


sentence = "ein boot mit mehreren männern darauf wird von einem großen pferdegespann ans ufer gezogen."

losses = []
for epoch in range(num_epochs):
    print(f"[Epoch {epoch + 1} / {num_epochs}]")

    model.eval()
    translated_sentence = translate_sentence(
        model, sentence, german, english, device, max_length=50
    )
    print(f"Translated example sentence: \n {translated_sentence}")
    model.train()

    epoch_loss = 0
    for batch_idx, batch in enumerate(train_iterator):
        inp_data = batch.src.to(device)
        target = batch.trg.to(device)
        optimizer.zero_grad()
        output = model(inp_data, target)
        output = output[1:].reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        loss = criterion(output, target)
        epoch_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()

        writer.add_scalar("Training loss", loss.item(), global_step=step)
        step += 1

    print(f"Epoch Loss: {epoch_loss}")
    losses.append(epoch_loss)

score = bleu(test_data[1:100], model, german, english, device)
print(f"Bleu score {score*100:.2f}")


[Epoch 1 / 10]
Translated example sentence: 
 <sos> battling battling placard placard curly curly daring mountainside daring removed removed hotdogs hotdogs kilt dyed key troop placard fencing curly stacked sculpting flipped motorcycles sculpting motorcycles motorcycles lunch steaks steaks placard curly curly daring hotdogs hotdogs binoculars bulls arrange Capris basket basket curly curly daring everyone casino removed curly curly
Epoch Loss: 647.3358535766602
[Epoch 2 / 10]
Translated example sentence: 
 <sos> ' , ' . ' , ' . ' ] <eos>
Epoch Loss: 521.9679753780365
[Epoch 3 / 10]
Translated example sentence: 
 <sos> [ ' People ' , ' are ' , ' in ' , ' a ' , ' . ' , ' . ' ] <eos>
Epoch Loss: 495.5521486401558
[Epoch 4 / 10]
Translated example sentence: 
 <sos> [ ' Several ' , ' people ' , ' are ' , ' a ' , ' . ' ] <eos>
Epoch Loss: 479.67281997203827
[Epoch 5 / 10]
Translated example sentence: 
 <sos> [ ' People ' , ' are ' , ' in ' , ' a ' , ' . ' , ' . ' ] <eos>
Epoch Loss: 465.33853

KeyboardInterrupt: 

In [None]:
import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchtext.data import BucketIterator
from torch.nn import CrossEntropyLoss

# Configuration settings
num_epochs = 10
learning_rate = 0.001
batch_size = 64

load_model = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size_encoder = len(german.vocab)
input_size_decoder = len(english.vocab)
output_size = len(english.vocab)
encoder_embedding_size = 300
decoder_embedding_size = 300
hidden_size = 1024
num_layers = 2
enc_dropout = 0.5
dec_dropout = 0.5

# Define the translation function
def translate_sentence(model, sentence, german, english, device, max_length=50):
    model.eval()
    tokens = german.tokenize(sentence)
    tokens = [token.lower() for token in tokens]
    tokens.insert(0, german.init_token)
    tokens.append(german.eos_token)

    sentence_tensor = torch.tensor([german.vocab.stoi[token] for token in tokens]).unsqueeze(1).to(device)

    with torch.no_grad():
        hidden, cell = model.encoder(sentence_tensor)

    outputs = [english.vocab.stoi["<sos>"]]
    for _ in range(max_length):
        previous_word = torch.tensor([outputs[-1]]).to(device)

        with torch.no_grad():
            output, hidden, cell = model.decoder(previous_word, hidden, cell)
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)

        if best_guess == english.vocab.stoi["<eos>"]:
            break

    translated_sentence = [english.vocab.itos[idx] for idx in outputs]
    return ' '.join(translated_sentence)  # Join tokens to form the complete string

# Training configuration
writer = SummaryWriter(f"runs/loss_plot")
step = 0

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size=batch_size,
    sort_within_batch=True,
    sort_key=lambda x: len(x.src),
    device=device,
)

encoder_net = Encoder(
    input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout
).to(device)

decoder_net = Decoder(
    input_size_decoder,
    decoder_embedding_size,
    hidden_size,
    output_size,
    num_layers,
    dec_dropout,
).to(device)

model = Seq2Seq(encoder_net, decoder_net).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

pad_idx = english.vocab.stoi["<pad>"]
criterion = CrossEntropyLoss(ignore_index=pad_idx)

# Example sentence to translate
sentence = "ein boot mit mehreren männern darauf wird von einem großen pferdegespann ans ufer gezogen."

# Training loop
losses = []
for epoch in range(num_epochs):
    print(f"[Epoch {epoch + 1} / {num_epochs}]")

    model.eval()
    translated_sentence = translate_sentence(
        model, sentence, german, english, device, max_length=50
    )
    print(f"Translated example sentence: \n {translated_sentence}")
    model.train()

    epoch_loss = 0
    for batch_idx, batch in enumerate(train_iterator):
        inp_data = batch.src.to(device)
        target = batch.trg.to(device)
        optimizer.zero_grad()
        output = model(inp_data, target)
        output = output[1:].reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        loss = criterion(output, target)
        epoch_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()

        writer.add_scalar("Training loss", loss.item(), global_step=step)
        step += 1

    print(f"Epoch Loss: {epoch_loss}")
    losses.append(epoch_loss)

score = bleu(test_data[1:100], model, german, english, device)
print(f"Bleu score {score*100:.2f}")


In [None]:
# num_epochs = 10
# learning_rate = 0.001
# batch_size = 64

# load_model = False
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# input_size_encoder = len(german.vocab)
# input_size_decoder = len(english.vocab)
# output_size = len(english.vocab)
# encoder_embedding_size = 300
# decoder_embedding_size = 300
# hidden_size = 1024
# num_layers = 2
# enc_dropout = 0.5
# dec_dropout = 0.5

# def translate_sentence(model, sentence, german, english, device, max_length=50):

#     model.eval()
#     tokens = german.tokenize(sentence)
#     tokens = [token.lower() for token in tokens]
#     tokens.insert(0, german.init_token)
#     tokens.append(german.eos_token)


#     sentence_tensor = torch.tensor([german.vocab.stoi[token] for token in tokens]).unsqueeze(1).to(device)


#     with torch.no_grad():
#         hidden, cell = model.encoder(sentence_tensor)

#     outputs = [english.vocab.stoi["<sos>"]]
#     for _ in range(max_length):
#         previous_word = torch.tensor([outputs[-1]]).to(device)

#         with torch.no_grad():
#             output, hidden, cell = model.decoder(previous_word, hidden, cell)
#             best_guess = output.argmax(1).item()

#         outputs.append(best_guess)


#         if output.argmax(1).item() == english.vocab.stoi["<eos>"]:
#             break

#     translated_sentence = [english.vocab.itos[idx] for idx in outputs]
#     return translated_sentence[1:]


# writer = SummaryWriter(f"runs/loss_plot")
# step = 0

# train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
#     (train_data, valid_data, test_data),
#     batch_size=batch_size,
#     sort_within_batch=True,
#     sort_key=lambda x: len(x.src),
#     device=device,
# )

# encoder_net = Encoder(
#     input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout
# ).to(device)

# decoder_net = Decoder(
#     input_size_decoder,
#     decoder_embedding_size,
#     hidden_size,
#     output_size,
#     num_layers,
#     dec_dropout,
# ).to(device)

# model = Seq2Seq(encoder_net, decoder_net).to(device)
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# pad_idx = english.vocab.stoi["<pad>"]
# # criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)


# sentence = "ein boot mit mehreren männern darauf wird von einem großen pferdegespann ans ufer gezogen."

# losses = []
# for epoch in range(num_epochs):
#     print(f"[Epoch {epoch} / {num_epochs}]")



#     model.eval()
#     translated_sentence = translate_sentence(
#         model, sentence, german, english, device, max_length=50
#     )
#     print(f"Translated example sentence: \n {translated_sentence}")
#     model.train()

#     epoch_loss = 0
#     for batch_idx, batch in enumerate(train_iterator):

#         inp_data = batch.src.to(device)
#         target = batch.trg.to(device)
#         optimizer.zero_grad()
#         output = model(inp_data, target)
#         output = output[1:].reshape(-1, output.shape[2])
#         target = target[1:].reshape(-1)

#         # optimizer.zero_grad()
#         loss = criterion(output, target)
#         epoch_loss += loss.item()
#         loss.backward()
#         torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
#         optimizer.step()

#         writer.add_scalar("Training loss", loss, global_step=step)
#         step += 1

#     print(epoch_loss)
#     losses.append(epoch_loss)

# score = bleu(test_data[1:100], model, german, english, device)
# print(f"Bleu score {score*100:.2f}")

In [None]:
import torch
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchtext.data import BucketIterator
from torch.nn import CrossEntropyLoss

# Configuration settings
num_epochs = 10
learning_rate = 0.001
batch_size = 64

load_model = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size_encoder = len(german.vocab)
input_size_decoder = len(english.vocab)
output_size = len(english.vocab)
encoder_embedding_size = 300
decoder_embedding_size = 300
hidden_size = 1024
num_layers = 2
enc_dropout = 0.5
dec_dropout = 0.5

# Define the translation function
def translate_sentence(model, sentence, german, english, device, max_length=50):
    model.eval()
    tokens = german.tokenize(sentence)
    tokens = [token.lower() for token in tokens]
    tokens.insert(0, german.init_token)
    tokens.append(german.eos_token)

    sentence_tensor = torch.tensor([german.vocab.stoi[token] for token in tokens]).unsqueeze(1).to(device)

    with torch.no_grad():
        hidden, cell = model.encoder(sentence_tensor)

    outputs = [english.vocab.stoi["<sos>"]]
    for _ in range(max_length):
        previous_word = torch.tensor([outputs[-1]]).to(device)

        with torch.no_grad():
            output, hidden, cell = model.decoder(previous_word, hidden, cell)
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)

        if best_guess == english.vocab.stoi["<eos>"]:
            break

    translated_sentence = [english.vocab.itos[idx] for idx in outputs]
    return ' '.join(translated_sentence)  # Join tokens to form the complete string

# Training configuration
writer = SummaryWriter(f"runs/loss_plot")
step = 0

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size=batch_size,
    sort_within_batch=True,
    sort_key=lambda x: len(x.src),
    device=device,
)

encoder_net = Encoder(
    input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout
).to(device)

decoder_net = Decoder(
    input_size_decoder,
    decoder_embedding_size,
    hidden_size,
    output_size,
    num_layers,
    dec_dropout,
).to(device)

model = Seq2Seq(encoder_net, decoder_net).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

pad_idx = english.vocab.stoi["<pad>"]
criterion = CrossEntropyLoss(ignore_index=pad_idx)

# Example sentence to translate
sentence = "ein boot mit mehreren männern darauf wird von einem großen pferdegespann ans ufer gezogen."

# Training loop
losses = []
for epoch in range(num_epochs):
    print(f"[Epoch {epoch + 1} / {num_epochs}]")

    model.eval()
    translated_sentence = translate_sentence(
        model, sentence, german, english, device, max_length=50
    )
    print(f"Translated example sentence: \n {translated_sentence}")
    model.train()

    epoch_loss = 0
    for batch_idx, batch in enumerate(train_iterator):
        inp_data = batch.src.to(device)
        target = batch.trg.to(device)
        optimizer.zero_grad()
        output = model(inp_data, target)
        output = output[1:].reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        loss = criterion(output, target)
        epoch_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
        optimizer.step()

        writer.add_scalar("Training loss", loss.item(), global_step=step)
        step += 1

    print(f"Epoch Loss: {epoch_loss}")
    losses.append(epoch_loss)

score = bleu(test_data[1:100], model, german, english, device)
print(f"Bleu score {score*100:.2f}")
