PYTORCH PRACTICE


ENCODER_DECODER_ARCHITECTURE


In [None]:


import torch
import transformers
from tensorflow.keras.layers import TextVectorization



sentences = [
    "The early bird catches the worm and starts its day.",
    "A gentle rain fell all night long, nourishing the newly planted garden.",
    "She quickly ran to catch the bus before it drove away without her.",
    "He baked a large cake for his friend's birthday celebration last weekend.",
    "The old lighthouse stood tall against the crashing waves of the stormy sea.",
    "My little brother is learning to ride his new bicycle for the very first time.",
    "A small, friendly kitten played with a ball of yarn on the wooden floor.",
    "They watched the dazzling fireworks light up the night sky from their balcony.",
    "The students listened attentively to the teacher's important lecture.",
    "I like to read mystery novels on the couch on cold and rainy evenings.",
    "The cheerful dog wagged its tail with excitement at the sight of its owner.",
    "She picked a large bunch of beautiful flowers from the community park.",
    "The quiet library was the perfect place for studying and getting work done.",
    "He sipped a hot cup of coffee while watching the morning news on TV.",
    "The autumn leaves painted the entire forest in brilliant shades of yellow and orange.",
    "My grandmother told us many captivating stories about her childhood adventures.",
    "A clever fox sneaked into the farm and stole a plump, delicious chicken.",
    "The talented musician played a beautiful melody on his shiny new guitar.",
    "She wrote a long and heartfelt letter to her dear friend who lived far away.",
    "The children giggled with joy as they built a massive sandcastle on the beach.",
    "He finished his chores and then happily spent the rest of the day playing games.",
    "The fluffy white clouds drifted lazily across the bright blue sky above us.",
    "My parents cooked a special dinner to celebrate their anniversary last night.",
    "The brave firefighter quickly rescued the small cat from the burning building.",
    "She carefully wrapped the special gift in colorful paper for her best friend.",
    "The old car rattled and squeaked as it slowly drove down the long, dirt road.",
    "He happily played a lively game of basketball with his friends after school.",
    "The bright full moon shone down on the quiet, sleepy town below them.",
    "My sister and I watched our favorite cartoon show on the sofa.",
    "The birds sang a sweet morning song from the tall, green trees."
]

spanish_sentences = [
    "Al que madruga, Dios le ayuda, y comienza su día.",
    "Una suave lluvia cayó toda la noche, nutriendo el jardín recién plantado.",
    "Ella corrió rápidamente para alcanzar el autobús antes de que se fuera sin ella.",
    "Él horneó un gran pastel para la celebración de cumpleaños de su amigo el fin de semana pasado.",
    "El viejo faro se erguía alto contra las olas rompientes del mar tempestuoso.",
    "Mi hermano pequeño está aprendiendo a montar su nueva bicicleta por primera vez.",
    "Un pequeño y simpático gatito jugaba con una bola de lana en el suelo de madera.",
    "Ellos vieron los deslumbrantes fuegos artificiales iluminar el cielo nocturno desde su balcón.",
    "Los estudiantes escucharon atentamente la importante conferencia del profesor.",
    "Me gusta leer novelas de misterio en el sofá durante las tardes frías y lluviosas.",
    "El perro alegre meneó la cola con emoción al ver a su dueño.",
    "Ella recogió un gran ramo de hermosas flores del parque comunitario.",
    "La tranquila biblioteca era el lugar perfecto para estudiar y hacer el trabajo.",
    "Él sorbía una taza de café caliente mientras veía las noticias de la mañana en la televisión.",
    "Las hojas de otoño pintaron todo el bosque en brillantes tonos de amarillo y naranja.",
    "Mi abuela nos contó muchas historias cautivadoras sobre sus aventuras de la infancia.",
    "Un astuto zorro se coló en la granja y robó un pollo gordo y delicioso.",
    "El talentoso músico tocó una hermosa melodía en su nueva y brillante guitarra.",
    "Ella escribió una larga y sincera carta a su querida amiga que vivía muy lejos.",
    "Los niños se reían de alegría mientras construían un enorme castillo de arena en la playa.",
    "Él terminó sus tareas y luego felizmente pasó el resto del día jugando.",
    "Las mullidas nubes blancas flotaban perezosamente a través del cielo azul brillante sobre nosotros.",
    "Mis padres cocinaron una cena especial para celebrar su aniversario anoche.",
    "El valiente bombero rescató rápidamente al pequeño gato del edificio en llamas.",
    "Ella envolvió cuidadosamente el regalo especial en papel de colores para su mejor amigo.",
    "El viejo coche traqueteaba y rechinaba mientras bajaba lentamente por el largo camino de tierra.",
    "Él jugó felizmente un animado partido de baloncesto con sus amigos después de la escuela.",
    "La luna llena y brillante iluminó el pueblo tranquilo y dormido debajo de ellos.",
    "Mi hermana y yo vimos nuestro programa de dibujos animados favorito en el sofá.",
    "Los pájaros cantaron una dulce canción matutina desde los altos árboles verdes."
]

class TEXT_TO_EMBEDD:
  def __init__(self) -> None:
    self.tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")
    special_tokens = {'additional_special_tokens': ['[SOS]', '[EOS]']}
    self.tokenizer.add_special_tokens(special_tokens)
    self.sos_token_id = self.tokenizer.convert_tokens_to_ids('[SOS]')
    self.eos_token_id = self.tokenizer.convert_tokens_to_ids('[EOS]')


  def convert(self,input_sentences,output_sentences):

    output_with_special = []
    for sent in output_sentences:
        output_with_special.append(f"[SOS] {sent} [EOS]")

    encoded_outputs = self.tokenizer(output_with_special,padding = True,truncation = True,return_tensors = 'pt')

    encoded_inputs = self.tokenizer(input_sentences,padding = True,truncation=True,return_tensors = 'pt')

    input_encoding = encoded_inputs['input_ids']
    output_encoding = encoded_outputs['input_ids']


    vocab_size = self.tokenizer

    input_ids = encoded_inputs["input_ids"]
    input_mask = encoded_inputs["attention_mask"]
    output_ids = encoded_outputs["input_ids"]
    output_mask = encoded_outputs["attention_mask"]

    return {"input_ids": input_ids,
            "output_ids": output_ids,
            "vocab_size": len(self.tokenizer),
            "tokenizer": self.tokenizer
            }


# '''/// ___ Encoder ___ \\\'''

class Encoder(torch.nn.Module):
  def __init__(self,input_dim,num_layers,hidden_dim,dropout,*args,**kwargs):
        super().__init__(*args,**kwargs)
        self.lstm = torch.nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=float(dropout) if num_layers > 1 else 0.0
        )

  def forward(self,Input_embbed):
    output,[hiden_state,cell_state] = self.lstm(Input_embbed)
    return output,hiden_state,cell_state




class Decoder(torch.nn.Module):
  def __init__(self,input_dim,num_layers,dropout,hidden_dim,*args,**kwargs):
        super().__init__(*args,**kwargs)
        self.lstm = torch.nn.LSTM(input_dim,
                                  hidden_dim,
                                  num_layers,
                                  batch_first = True,
                                  dropout = float(dropout) if num_layers>1 else 0.0)


  def forward(self,output,input_hidden_state):
      o,[H0_t,c0_t] = self.lstm(output,input_hidden_state)
      return o,H0_t,c0_t





class Encoder_Decoder_Architecture(torch.nn.Module):
    def __init__(self, emmbed_dim, enc_layers, enc_hidden, enc_drop,
                      dec_layers, dec_hidden, dec_drop,
                       vocab_size,*args,**kwargs):
      super().__init__(*args,**kwargs)
      self.embedding = torch.nn.Embedding(vocab_size,emmbed_dim)

      self.encoder = Encoder(emmbed_dim, enc_layers, enc_hidden, enc_drop)
      self.decoder = Decoder(emmbed_dim, dec_layers, dec_drop, dec_hidden)
      self.layer = torch.nn.Linear(dec_hidden, vocab_size)
      self.softmax = torch.nn.Softmax(-1)



    def forward(self,Sentence_ids,Sentences_translations_ids):
      src_emb = self.embedding(Sentence_ids)
      target_emb = self.embedding(Sentences_translations_ids)

      encoder_output, Hidden_state_encoder, final_cell_state = self.encoder.forward(src_emb)

      decoder_output,Hidden_state_decoder,final_decoder_state = self.decoder.forward(target_emb,(Hidden_state_encoder,final_cell_state))

      logits = self.layer(decoder_output)

      return logits



def train(epochs,sentences,translated_sentences,lr = 0.001):

  emb = TEXT_TO_EMBEDD()

  data = emb.convert(sentences,translated_sentences)
  input_ids     = data["input_ids"]
  output_ids    = data["output_ids"]
  vocab_size    = data["vocab_size"]
  tokenizer     = data["tokenizer"]

  decoder_input_ids = output_ids[:,:-1]
  decoder_target_ids = output_ids[:,1:]

  embedd_dim = 128

  seq2seq = Encoder_Decoder_Architecture(embedd_dim,enc_layers = 3,enc_hidden = 128,enc_drop=0.3,
                                         dec_layers=3,dec_hidden=128,dec_drop=0.2,
                                         vocab_size=vocab_size)
  optimizer = torch.optim.Adam(seq2seq.parameters(),lr = lr)
  loss_fn = torch.nn.CrossEntropyLoss(ignore_index=0)


  for i in range(epochs):
    seq2seq.train()
    optimizer.zero_grad()

    logits = seq2seq(input_ids, decoder_input_ids)
    B, T, V = logits.shape
    logits_flat = logits.reshape(-1,V)
    targets_flat = decoder_target_ids.reshape(-1)


    ls = loss_fn(logits_flat, targets_flat)
    ls.backward()
    torch.nn.utils.clip_grad_norm_(seq2seq.parameters(), 1.0)

    optimizer.step()

    if(i%100 == 0):
        preds = torch.argmax(logits, dim=2)
        acc = (preds == decoder_target_ids).float().mean()
        print(f"Epoch:{i} , loss: {ls.item():.4f} , accuracy: {acc.item():.4f}")

  return seq2seq, data["tokenizer"]




