<a href="https://colab.research.google.com/github/AkHiLdEvGoD/DeepLearning-Algorithms/blob/main/Seq2Seq_Encoder_Decoder(Pytorch).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn.functional as F

In [17]:
batch_size = 32
vocab_size = 10
input_timesteps = 5
output_timesteps = 6
embedding_dim = 8
hidden_size = 16

In [18]:
# Synthetic dataset of vocab size 10, input_seq = 5, output_seq = 6
X = torch.randint(0,vocab_size,(batch_size,input_timesteps))
Y = torch.randint(0,vocab_size,(batch_size,output_timesteps))

In [19]:
def gen_randn(shape):
  return (torch.randn(shape)*0.1).detach().requires_grad_()

In [20]:
#Embedding matrix
E = gen_randn((vocab_size,embedding_dim))

In [21]:
# Encoder weights
Wxh_enc = gen_randn((embedding_dim,hidden_size))
Whh_enc = gen_randn((hidden_size,hidden_size))
b_enc = gen_randn((1,hidden_size))

# Decoder weights
Wxh_dec = gen_randn((embedding_dim,hidden_size))
Whh_dec = gen_randn((hidden_size,hidden_size))
b_dec = gen_randn((1,hidden_size))

# Softmax Layer Weights(Output Layer)
W_out = gen_randn((hidden_size,vocab_size))
b_out = gen_randn((1,vocab_size))

In [23]:
params = [E,Wxh_enc,Whh_enc,b_enc,Wxh_dec,Whh_dec,b_dec,W_out,b_out]

In [24]:
def seq2seq_forward(X,Y):

  # Encoder
  h_enc = torch.zeros(batch_size,hidden_size)
  for t in range(input_timesteps):
    x_t = E[X[:,t]]
    h_enc = torch.tanh(x_t @ Wxh_enc + h_enc @ Whh_enc + b_enc)

  # Decoder
  h_dec = h_enc
  logits = []
  for t in range(output_timesteps):
    y_t = E[Y[:,t]]  # Teacher Forcing
    h_dec = torch.tanh(y_t @ Wxh_dec + h_dec @ Whh_dec + b_dec)
    logit = h_dec @ W_out + b_out
    logits.append(logit)           # Didn't use softmax here as softmax is already applied inside cross entropy function of Pytorch

  logits = torch.stack(logits,dim=1)
  return logits

In [25]:
epochs = 20
learning_rate = 0.1
for epoch in range(epochs):
  logits = seq2seq_forward(X,Y)
  loss = F.cross_entropy(logits.view(-1,vocab_size),Y.view(-1)) # Flatten the logits and the target tensor for loss calculation
  loss.backward()

  with torch.no_grad():
    for param in params:
      param -= learning_rate*param.grad

  print(f'Epoch : {epoch+1}, Loss : {loss.item()}')

Epoch : 1, Loss : 2.298929214477539
Epoch : 2, Loss : 2.2979421615600586
Epoch : 3, Loss : 2.2960033416748047
Epoch : 4, Loss : 2.293179988861084
Epoch : 5, Loss : 2.2895662784576416
Epoch : 6, Loss : 2.2852776050567627
Epoch : 7, Loss : 2.280440330505371
Epoch : 8, Loss : 2.2751858234405518
Epoch : 9, Loss : 2.2696382999420166
Epoch : 10, Loss : 2.263906478881836
Epoch : 11, Loss : 2.2580761909484863
Epoch : 12, Loss : 2.252199411392212
Epoch : 13, Loss : 2.2462828159332275
Epoch : 14, Loss : 2.240281581878662
Epoch : 15, Loss : 2.234083414077759
Epoch : 16, Loss : 2.227499008178711
Epoch : 17, Loss : 2.2202439308166504
Epoch : 18, Loss : 2.2119228839874268
Epoch : 19, Loss : 2.2020108699798584
Epoch : 20, Loss : 2.189828634262085
