<a href="https://colab.research.google.com/github/Raghava-1845/Large-Language-Model/blob/main/Encoder_%26_decoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import torch
import torch.nn as nn
import torch.optim as optim

# ------------------
# Data
# ------------------
src_word = "thank you"
tgt_word = "gracias"

START = "^"

chars = sorted(set(src_word + tgt_word + START))
char2idx = {c: i for i, c in enumerate(chars)}
idx2char = {i: c for c, i in char2idx.items()}

def encode(word):
    return torch.tensor([char2idx[c] for c in word], dtype=torch.long)

src = encode(src_word)
tgt = encode(START + tgt_word)

# ------------------
# Encoder
# ------------------
class Encoder(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size)

    def forward(self, x):
        emb = self.embed(x).unsqueeze(1)
        _, hidden = self.rnn(emb)
        return hidden

# ------------------
# Decoder
# ------------------
class Decoder(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        emb = self.embed(x).unsqueeze(0)
        out, hidden = self.rnn(emb, hidden)
        out = self.fc(out.squeeze(0))
        return out, hidden

# ------------------
# Train
# ------------------
hidden_size = 32
encoder = Encoder(len(chars), hidden_size)
decoder = Decoder(len(chars), hidden_size)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(
    list(encoder.parameters()) + list(decoder.parameters()),
    lr=0.01
)

for epoch in range(1000):
    optimizer.zero_grad()

    hidden = encoder(src)
    loss = 0
    dec_input = tgt[0].unsqueeze(0)

    for i in range(1, len(tgt)):
        output, hidden = decoder(dec_input, hidden)
        loss += loss_fn(output, tgt[i].unsqueeze(0))
        dec_input = tgt[i].unsqueeze(0)

    loss.backward()
    optimizer.step()

# ------------------
# Inference
# ------------------
with torch.no_grad():
    hidden = encoder(src)
    dec_input = torch.tensor([char2idx[START]])
    result = []

    for _ in range(len(tgt) - 1):
        output, hidden = decoder(dec_input, hidden)
        idx = output.argmax().item()
        result.append(idx2char[idx])
        dec_input = torch.tensor([idx])

print("Input:", src_word)
print("Output:", "".join(result))


Input: thank you
Output: gracias
