## 1. Imports and Setup

In [4]:
import tensorflow as tf 
import numpy as np 
from collections import Counter
import matplotlib.pyplot as plt

##  2. Data Preparation

In [5]:
english_sentence  =[
  "i like apples",
  "you love bananas",
  "he plays football",
  "she reads books",
  "we study languages",
]
portuguese_sentences = [
    "eu gosto de maçãs",
    "você ama bananas",
    "ele joga futebol",
    "ela lê livros",
    "nós estudamos línguas",
]

### Tokenization and Vectorization

In [6]:
english_vectorizer = tf.keras.layers.TextVectorization(output_mode="int",output_sequence_length=10)
portuguese_vectorizer = tf.keras.layers.TextVectorization(output_mode="int",output_sequence_length=10)

english_vectorizer.adapt(english_sentence)
portuguese_vectorizer.adapt(["<SOS>" + s + "<EOS>" for s in portuguese_sentences])

vocab_en = english_vectorizer.get_vocabulary()
vocab_pt = portuguese_vectorizer.get_vocabulary()

## 3.  Encoder-Decoder Architecture

In [None]:
class Encoder(tf.keras.layers.Layer):
  def __init__(self, vocab_size,units):
    super().__init__()
    self.embedding = tf.keras.layers.Embedding(vocab_size,units)
    self.lstm = tf.keras.layers.LSTM(units,return_sequences=True,return_state=True)

  def call(self,x):
    x=self.embedding(x)
    output,h,c = self.lstm(x)
    return output,h,c
  
class Decoder(tf.keras.layers.Layer):
  def __init__(self,vocab_size,units):
    super().__init()
    self.embedding = tf.keras.layers.Embedding(vocab_size,units)
    self.lstm = tf.keras.layers.LSTM(units,return_sequences=True,return_state=True)

  def call(self,context,token,state):
    token = self.embedding(token)
    output, h, c = self.lstm(token,inital_state=state)
    logits = self.output_layer(output)
    return logits, [h,c]


## 4. Translator Model