## 1. Imports and Setup

In [4]:
import tensorflow as tf 
import numpy as np 
from collections import Counter
import matplotlib.pyplot as plt

##  2. Data Preparation

In [5]:
english_sentence  =[
  "i like apples",
  "you love bananas",
  "he plays football",
  "she reads books",
  "we study languages",
]
portuguese_sentences = [
    "eu gosto de maçãs",
    "você ama bananas",
    "ele joga futebol",
    "ela lê livros",
    "nós estudamos línguas",
]

### Tokenization and Vectorization

In [6]:
english_vectorizer = tf.keras.layers.TextVectorization(output_mode="int",output_sequence_length=10)
portuguese_vectorizer = tf.keras.layers.TextVectorization(output_mode="int",output_sequence_length=10)

english_vectorizer.adapt(english_sentence)
portuguese_vectorizer.adapt(["<SOS>" + s + "<EOS>" for s in portuguese_sentences])

vocab_en = english_vectorizer.get_vocabulary()
vocab_pt = portuguese_vectorizer.get_vocabulary()

## 3.  Encoder-Decoder Architecture

In [12]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, units):
        super().__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, units)
        self.lstm = tf.keras.layers.LSTM(units, return_sequences=True, return_state=True)
    
    def call(self, x):
        x = self.embedding(x)
        output, h, c = self.lstm(x)
        return output, h, c


class Decoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, units):
        super().__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, units)
        self.lstm = tf.keras.layers.LSTM(units, return_sequences=True, return_state=True)
        self.output_layer = tf.keras.layers.Dense(vocab_size, activation='softmax')
    
    def call(self, context, token, state):
        token = self.embedding(token)
        output, h, c = self.lstm(token, initial_state=state)
        logits = self.output_layer(output)
        return logits, [h, c]


## 4. Translator Model

In [13]:
class Translator(tf.keras.Model):
  def __init__(self,vocab_size_en,vocab_size_pt,units):
    super().__init__()
    self.encoder = Encoder(vocab_size_en,units)
    self.decoder = Decoder(vocab_size_pt,units)
    self.units = units

  def call(self,inputs):
    context,target= inputs
    enc_out, h,c = self.encoder(context)
    logits, _ =self.decoder(enc_out,target,[h,c])
    return logits

## 5. Traning Step

In [19]:
translator = Translator(len(vocab_en),len(vocab_pt),units = 128)

input_batch = english_vectorizer(english_sentence)
target_batch = portuguese_vectorizer(["<SOS>" + s + "<EOS>" for s in portuguese_sentences])

logits = translator((input_batch,target_batch[:,:-1]))
print("Logit hape:",logits.shape)

Logit hape: (5, 9, 18)


## 6. Translation Function

In [36]:
def translate(model, text, max_length=50, temperature=0.7):
    text = tf.constant([text])
    context, h, c = model.encoder(english_vectorizer(text))

    sos_id = portuguese_vectorizer(['<SOS>']).numpy()[0][0]
    eos_id = portuguese_vectorizer(['<EOS>']).numpy()[0][0]
    next_token = tf.constant([[sos_id]], dtype=tf.int64)

    state = [h, c]
    tokens = []
    
    for _ in range(max_length):
        logits, state = model.decoder(context, next_token, state)
        logits = tf.squeeze(logits, axis=1)
        probs = tf.nn.softmax(logits / temperature)
        next_token = tf.random.categorical(tf.math.log(probs), num_samples=1)
        
        if next_token.numpy()[0, 0] == eos_id:
            break

        tokens.append(next_token)
    
    tokens = tf.concat(tokens, axis=-1)
    words = [vocab_pt[i] for i in tokens.numpy()[0]]
    return " ".join(words)


## 7. Try Translation

In [37]:
example = "i like apples"
translation = translate(translator, example, temperature=0.7)
print(f"Input: {example}")
print(f"Translation: {translation}")


Input: i like apples
Translation: sosnós maçãseos joga  sosnós línguaseos lê


## 8. Evaluation Functions (ROGUE and Overlap)

In [38]:
from collections import Counter

def rogue_similarity(candidate,reference):
  candidate_counts = Counter(candidate)
  reference_counts = Counter(reference)
  overlap = sum(min(candidate_counts[t], reference_counts[t]) for t in candidate_counts)
  precision = overlap / len(candidate) if candidate else 0
  recall = overlap/len(reference) if reference else 0
  return (2*precision*recall)/(precision + recall) if (precision + recall) else 0

def average_overlap(samples,similarity_fn):
  scores = {}
  for i, candidate in enumerate(samples):
    overlap = 0
    for j, sample in enumerate(samples):
      if i == j:
        continue
      overlap+=similarity_fn(candidate,sample)
    scores[i] = round(overlap/(len(samples)-1),3)
  return scores

## 9. Test Block

In [39]:
test_sentences = [
    "i like apples",
    "he is reading a book",
    "we are learning languages",
    "they play football every day",
    "she loves music"
]

temperature = 0.7

for sentence in test_sentences:
  translation = translate(translator,sentence,temperature=temperature)
  print("=" * 60)
  print(f"Input Sentence: {sentence}")
  print(f"Predicted Translation: {translation}")
  print("=" * 60, "\n")


Input Sentence: i like apples
Predicted Translation: sosele línguaseos

Input Sentence: he is reading a book
Predicted Translation: sosnós línguaseos livroseos sosvocê maçãseos livroseos sosnós sosela sosnós

Input Sentence: we are learning languages
Predicted Translation: maçãseos  sosnós de sosnós  lê joga joga joga gosto futeboleos estudamos sosele sosvocê livroseos ama sosela sosela ama soseu joga soseu soseu sosele estudamos sosele sosvocê

Input Sentence: they play football every day
Predicted Translation: gosto  joga lê futeboleos gosto ama sosela soseu maçãseos de lê soseu gosto soseu gosto futeboleos estudamos sosele sosnós lê estudamos futeboleos gosto maçãseos sosele de sosnós  bananaseos sosnós sosele sosele joga línguaseos lê ama sosnós sosvocê ama sosele  futeboleos livroseos de bananaseos livroseos futeboleos joga livroseos

Input Sentence: she loves music
Predicted Translation: livroseos  sosele gosto lê ama maçãseos lê sosnós gosto 



## 10. Randomized Temperature Testing

In [49]:
example = "i like apples"

for temp in [0.3, 0.7, 1.0]:
    translation = translate(translator, example, temperature=temp)
    print(f"Temperature: {temp}")
    print(f"Input: {example}")
    print(f"Translation: {translation}")
    print("-" * 50)


Temperature: 0.3
Input: i like apples
Translation: joga joga gosto lê sosele maçãseos estudamos sosnós sosnós sosele bananaseos sosnós sosele ama estudamos joga línguaseos joga gosto  línguaseos joga joga soseu lê lê ama ama sosela lê sosvocê gosto sosela maçãseos futeboleos sosnós bananaseos livroseos sosela soseu futeboleos sosele de ama sosvocê joga maçãseos lê lê gosto
--------------------------------------------------
Temperature: 0.7
Input: i like apples
Translation: sosvocê sosela joga sosela ama sosela
--------------------------------------------------
Temperature: 1.0
Input: i like apples
Translation: joga futeboleos  sosnós gosto livroseos bananaseos línguaseos lê sosvocê sosela estudamos soseu joga línguaseos ama maçãseos futeboleos maçãseos línguaseos de de línguaseos bananaseos joga ama sosnós sosele sosela  livroseos sosela línguaseos sosvocê bananaseos  sosela ama maçãseos lê soseu sosele bananaseos ama ama sosele lê ama futeboleos línguaseos
----------------------------