<a href="https://colab.research.google.com/github/Jhansipothabattula/Machine_Learning/blob/main/Day66.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sequence-to-Sequence Models and Applications

## Sequence-to-Sequence (Seq2Seq) Models and Their Architecture

* **What Are Seq2Seq Models?**
    * Map an input sequence to an output sequence of different lengths
    * Widely used for tasks like language translation, text summarization, speech-to-text, and chatbots

* **Architecture**
    * **Encoder**
        * Processes the input sequence and encodes it into a fixed-length vector (**context vector**)
    * **Decoder**
        * Takes the context vector as input and generates the output sequence, step by step


## Encoder-Decoder Framework for Seq2Seq Tasks

* **How It Works**
    * **Encoder**
        * Sequentially processes the input sequence using RNN, LSTM, or GRU
        * Produces a context vector representing the entire input sequence
    * **Decoder**
        * Initializes its hidden state with the encoder's context vector
        * Generates the output sequence one token at a time
        * Predicts the next token using the previously generated tokens


## Attention Mechanism Overview

* **Why Attention?**
    * Standard Seq2Seq models compress the entire input sequence into a fixed-length vector, which can lead to information loss for long sequences.
    * The Attention Mechanism dynamically focuses on different parts of the input sequence when generating each output token.
* **How Attention Works**
    * Calculates a weight (or score) for each input token based on its relevance to the current decoder state.
    * Outputs a weighted sum of the encoder outputs, creating a context vector for each decoder step.


**Objective**
- Build a basic Seq2seq model using LSTM for translation and experiment with hyperparameters

In [2]:
# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import random # Added import for random module

# Example English-to-French sentences
english_sentences = ["hello", "how are you", "good morning", "thank you", "good night"]
french_sentences = ["bonjour", "comment ca va", "bon matin", "merci", "bonne nuit"]

# Vocabulary and tokenization
def build_vocab(sentences):
    vocab = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
    for sentence_str in sentences: # Corrected variable name
        for word in sentence_str.split(): # Corrected logic to add words
            if word not in vocab:
                vocab[word] = len(vocab)
    return vocab

english_vocab = build_vocab(english_sentences)
french_vocab = build_vocab(french_sentences)

# Tokenize and pad sentences
def tokenize(sentences, vocab, max_len):
    tokenized = []
    for sentence in sentences:
        tokens = [vocab.get(word, vocab["<UNK>"]) for word in sentence.split()]
        tokens = [vocab["<SOS>"]] + tokens + [vocab["<EOS>"]]
        tokens += [vocab["<PAD>"]] * (max_len - len(tokens))
        tokenized.append(tokens)
    return np.array(tokenized)
max_len_eng = max([len(sentence.split()) for sentence in english_sentences]) + 2
max_len_fr = max([len(sentence.split()) for sentence in french_sentences]) + 2

english_data = tokenize(english_sentences, english_vocab, max_len_eng) # Changed max_len to max_len_eng for English data
french_data = tokenize(french_sentences, french_vocab, max_len_fr)

class TranslationDataset(Dataset):

    def __init__(self, src_data, tgt_data):
        self.src_data = src_data
        self.tgt_data = tgt_data

    def __len__(self):
        return len(self.src_data)

    def __getitem__(self, idx):
        return torch.tensor(self.src_data[idx]), torch.tensor(self.tgt_data[idx])

dataset = TranslationDataset(english_data, french_data)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

class Encoder(nn.Module):

    def __init__(self, input_dim, embed_dim, hidden_dim, num_layers):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_dim, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True)

    def forward(self, x):
        embedded = self.embedding(x)
        outputs, (hidden, cell) = self.lstm(embedded)
        return hidden, cell

class Decoder(nn.Module):

    def __init__(self, output_dim, embed_dim, hidden_dim, num_layers):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_dim, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, hidden, cell):
        x = x.unsqueeze(1)
        embedded = self.embedding(x)
        output, (hidden, cell) = self.lstm(embedded, (hidden, cell))
        prediction = self.fc(output.squeeze(1))
        return prediction, hidden, cell

class Seq2Seq(nn.Module):

    def __init__(self, encoder, decoder, device):
      super(Seq2Seq, self).__init__()
      self.encoder = encoder
      self.decoder = decoder
      self.device = device

    def forward(self, src, tgt, teacher_forcing_ratio=0.5):
      batch_size = src.size(0)
      tgt_len = tgt.size(1)
      tgt_vocab_size = self.decoder.fc.out_features

      outputs = torch.zeros(batch_size, tgt_len, tgt_vocab_size).to(self.device)

      hidden, cell = self.encoder(src)

      input = tgt[:, 0]

      for t in range(1, tgt_len):
        output, hidden, cell = self.decoder(input, hidden, cell)
        outputs[:, t, :] = output
        top1 = output.argmax(1)
        input = tgt[:, t] if random.random() < teacher_forcing_ratio else top1
      return outputs

device = torch.device('cuda' if torch.cuda.is_available() else "cpu")

input_dim = len(english_vocab)
output_dim = len(french_vocab)
embed_dim = 64
hidden_dim = 128
num_layers = 2

encoder = Encoder(input_dim, embed_dim, hidden_dim, num_layers)
decoder = Decoder(output_dim, embed_dim, hidden_dim, num_layers)
model = Seq2Seq(encoder, decoder, device).to(device)

optimizer = optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss(ignore_index = french_vocab["<PAD>"])

def train(model, dataloader, optimizer, criterion, device, num_epochs=20):
    model.train()
    for epoch in range(num_epochs):
      epoch_loss = 0
      for src, tgt in dataloader:
        src, tgt = src.to(device), tgt.to(device)

        optimizer.zero_grad()
        output = model(src, tgt)

        output_dim_size = output.shape[2] # Store vocab size before reshaping
        output = output[:, 1:].reshape(-1, output_dim_size) # Corrected reshape
        tgt = tgt[:, 1:].reshape(-1)

        loss = criterion(output, tgt)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
      print(f"Epoch: {epoch+1},/{num_epochs}, Loss: {epoch_loss/len(dataloader)}")
train(model, dataloader, optimizer, criterion, device)

def translate_sentence(model, sentence, english_vocab, french_vocab, max_len_fr, device):
  model.eval()

  tokens = [english_vocab.get(word, english_vocab["<UNK>"]) for word in sentence.split()]
  tokens = [english_vocab["<SOS>"]] + tokens + [english_vocab["<EOS>"]]
  src = torch.tensor(tokens).unsqueeze(0).to(device)

  with torch.no_grad():
    hidden, cell = model.encoder(src)

  tgt_vocab = {v:k for k, v in french_vocab.items()}
  tgt_indices = [french_vocab["<SOS>"]]
  for _ in range(max_len_fr):
    tgt_tensor = torch.tensor(tgt_indices[-1]).unsqueeze(0).to(device)
    output, hidden, cell = model.decoder(tgt_tensor, hidden, cell)
    pred = output.argmax(1).item()
    tgt_indices.append(pred)
    if pred == french_vocab["<EOS>"]:
      break

  translated_sentence = [tgt_vocab[idx] for idx in tgt_indices[1:-1]] # Corrected variable name
  return " ".join(translated_sentence)

# Test Translation
sentence = "good morning"
translation = translate_sentence(model, sentence, english_vocab, french_vocab, max_len_fr, device)
print(f"Translation Sentence:{translation}")

Epoch: 1,/20, Loss: 2.549546400705973
Epoch: 2,/20, Loss: 2.484135548273722
Epoch: 3,/20, Loss: 2.388967752456665
Epoch: 4,/20, Loss: 2.291834831237793
Epoch: 5,/20, Loss: 2.111245075861613
Epoch: 6,/20, Loss: 1.9174563884735107
Epoch: 7,/20, Loss: 1.8483433723449707
Epoch: 8,/20, Loss: 1.6911557118097942
Epoch: 9,/20, Loss: 1.5613115628560383
Epoch: 10,/20, Loss: 1.3760685125986736
Epoch: 11,/20, Loss: 1.3686209917068481
Epoch: 12,/20, Loss: 1.1934099992116292
Epoch: 13,/20, Loss: 1.0432816346486409
Epoch: 14,/20, Loss: 0.8750132520993551
Epoch: 15,/20, Loss: 0.8090203801790873
Epoch: 16,/20, Loss: 0.7084643642107645
Epoch: 17,/20, Loss: 0.5222056160370508
Epoch: 18,/20, Loss: 0.4637317309776942
Epoch: 19,/20, Loss: 0.5049937069416046
Epoch: 20,/20, Loss: 0.3349197010199229
Translation Sentence:bon matin
