In [1]:
import pickle
import torch
import numpy as np
import json
import tokens
torch.cuda.empty_cache()

In [2]:
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
torch.cuda.reset_accumulated_memory_stats()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_max_memory_cached()
# Optionally, you can also use the following command to reset the memory allocator
# torch.cuda.memory._record_memory_history(enabled=False)
print("Cleared CUDA memory.")

Cleared CUDA memory.




In [3]:
# print(torch.cuda.memory_summary(device=None, abbreviated=False))

In [4]:


def check_gpu_memory():
    total_memory = torch.cuda.get_device_properties(0).total_memory
    allocated_memory = torch.cuda.memory_allocated(0)
    reserved_memory = torch.cuda.memory_reserved(0)

    print(f"Total GPU Memory: {total_memory / (1024 ** 3):.2f} GB")
    print(f"Allocated GPU Memory: {allocated_memory / (1024 ** 3):.2f} GB")
    print(f"Reserved GPU Memory: {reserved_memory / (1024 ** 3):.2f} GB")

# Example usage
check_gpu_memory()


Total GPU Memory: 6.00 GB
Allocated GPU Memory: 0.00 GB
Reserved GPU Memory: 0.00 GB


In [5]:
torch.cuda.is_available()

True

In [6]:
with open('english_to_index.json', 'r') as f:
    data = json.load(f)

english_to_index = {eval(key) : val for key,val in data.items()}

In [7]:
with open('index_to_english.json', 'r') as f:
    data = json.load(f)

index_to_english = {eval(key) : eval(val) for key,val in data.items()}

In [8]:
with open('hindi_to_index.json', 'r') as f:
    data = json.load(f)

hindi_to_index = {eval(key) : val for key,val in data.items()}

In [9]:
with open('index_to_hindi.json', 'r') as f:
    data = json.load(f)

index_to_hindi = {eval(key) : eval(val) for key,val in data.items()}

In [10]:
index_to_english[4567], index_to_hindi[10000].decode('utf-8', errors='replace')

(b'hearing ', '�समें ')

In [11]:
with open('english_sentences.txt', 'r', encoding='utf-8') as f:
    EngData = f.readlines()

with open('hindi_sentences.txt', 'r', encoding='utf-8') as f:
    HindiData = f.readlines()

In [12]:
EngData[0],HindiData[0]

('however paes who was partnering australias paul hanley could only go as far as the quarterfinals where they lost to bhupathi and knowles\n',
 'आस्ट्रेलिया के पाल हेनली के साथ जोड़ी बनाने वाले पेस मियामी में क्वार्टरफाइनल तक ही पहुंच सके क्योंकि इस दौर में उन्हें भूपति और नोल्स ने हराया था।\n')

In [13]:
english_sentences = [sentence.rstrip('\n').lower() for sentence in EngData]
Hindi_sentences = [sentence.rstrip('\n') for sentence in HindiData]

In [14]:
english_sentences[:3], Hindi_sentences[:3]

(['however paes who was partnering australias paul hanley could only go as far as the quarterfinals where they lost to bhupathi and knowles',
  'whosoever desires the reward of the world with allah is the reward of the world and of the everlasting life allah is the hearer the seer',
  'the value of insects in the biosphere is enormous because they outnumber all other living groups in measure of species richness'],
 ['आस्ट्रेलिया के पाल हेनली के साथ जोड़ी बनाने वाले पेस मियामी में क्वार्टरफाइनल तक ही पहुंच सके क्योंकि इस दौर में उन्हें भूपति और नोल्स ने हराया था।',
  'और जो शख्स अपने आमाल का बदला दुनिया ही में चाहता है तो ख़ुदा के पास दुनिया व आख़िरत दोनों का अज्र मौजूद है और ख़ुदा तो हर शख्स की सुनता और सबको देखता है',
  'जैवमंडल में कीड़ों का मूल्य बहुत है क्योंकि प्रजातियों की समृद्धि के मामले में उनकी संख्या अन्य जीव समूहों से ज़्यादा है।'])

In [15]:
len(english_sentences), len(Hindi_sentences)

(99737, 99737)

In [16]:
english_sentences = english_sentences[:50000]
Hindi_sentences = Hindi_sentences[:50000]

In [17]:
len(english_sentences), len(Hindi_sentences)

(50000, 50000)

In [18]:
# max_sequence_length = 100

# def is_valid_length(sentence, typee):
#     ids = tokens.encode(sentence, typee)

#     return len(ids) < max_sequence_length-2

# valid_index = []

# for i in range(len(english_sentences)):
#     if( is_valid_length(english_sentences[i], 'encoder') and is_valid_length(Hindi_sentences[i], 'decoder') ):
#         valid_index.append(i)


max_sequence_length = 100
with open('valid_index_100.pkl', 'rb') as file:
    # Load the data from the file
    valid_index = pickle.load(file)

In [19]:
type(valid_index), len(valid_index)

(list, 9394)

In [20]:
len(english_sentences), len(valid_index)

(50000, 9394)

In [21]:
# with open('valid_index_100.pkl', 'wb') as f:
#     # Dump the list into the file
#     pickle.dump(valid_index, f)

In [22]:
Hindi_sentences = [Hindi_sentences[i] for i in valid_index]
english_sentences = [english_sentences[i] for i in valid_index]

In [23]:
from transformer import Transformer
d_model = 512
max_seq_len = max_sequence_length
num_head = 8
head_dim = d_model // num_head
drop_prob = 0.1
ffn = d_model*2
encoder_type = 'encoder'
decoder_type = 'decoder' 
n_layers = 1
start_token = b'<START>'
end_token = b'<END>'
padding_token = b'<PAD>'

transformer = Transformer(d_model, 
                          max_seq_len, 
                          num_head, 
                          head_dim, 
                          drop_prob, 
                          english_to_index, 
                          ffn, hindi_to_index, 
                          encoder_type, decoder_type, 
                          n_layers, start_token, 
                          end_token, padding_token)
transformer

Transformer(
  (encoder): Encoder(
    (sentence_embedding): SentenceEmbedding(
      (embedding): Embedding(55003, 512)
      (position_encoder): PositionalEncoding()
    )
    (layers): SequentialEncoder(
      (0): EncoderLayers(
        (attention): MultiheadAttention(
          (qkv_layer): Linear(in_features=512, out_features=1536, bias=True)
          (linear): Linear(in_features=512, out_features=512, bias=True)
        )
        (norm): LayerNormalization()
        (feedforward): PositionwiseFeedForward(
          (linear1): Linear(in_features=512, out_features=1024, bias=True)
          (linear2): Linear(in_features=1024, out_features=512, bias=True)
          (relu): ReLU()
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (decoder): Decoder(
    (sentence_embedding): SentenceEmbedding(
      (embedding): Embedding(75003, 512)
      (position_encoder): PositionalEncoding()
    )
    (layers): Seq

In [24]:
from torch.utils.data import Dataset, DataLoader

class TextData(Dataset):
    def __init__(self, english_sentences, hindi_sentences) -> None:
        super().__init__()
        self.english_sentences = english_sentences
        self.hindi_sentences = hindi_sentences

    def __len__(self):
        return len(self.english_sentences)
    
    def __getitem__(self, index):
        return self.english_sentences[index], self.hindi_sentences[index]

In [25]:
dataset = TextData(english_sentences, Hindi_sentences)

In [26]:
batch_size = 20

In [27]:
dataset[1]

('they are not seen anywhere', 'दोनों ही कहीं भी दिखाई नहीं पड़ रहे।')

In [28]:
import random

leraning_rate = random.uniform(0.001, 1)
leraning_rate

0.07577382830185518

In [29]:
from torch import nn

criterian = nn.CrossEntropyLoss(ignore_index=hindi_to_index[padding_token], reduction='none')

for params in transformer.parameters():
    if params.dim() > 1:
        nn.init.xavier_uniform(params)

optim = torch.optim.Adam(transformer.parameters(), lr=leraning_rate)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

  nn.init.xavier_uniform(params)


In [30]:
import torch
torch.cuda.empty_cache()

In [31]:
train_loader = DataLoader(dataset, batch_size)

In [32]:
NEG_INFTY = -1e9
# NEG_INFTY = float('-inf')

def create_masks(eng_batch, hn_batch):
    max_sequence_length = 100
    num_sentences = len(eng_batch)
    look_ahead_mask = torch.full([max_sequence_length, max_sequence_length], True)
    look_ahead_mask = torch.triu(look_ahead_mask, diagonal=1)

    encoder_padding_mask = torch.full([num_sentences, max_sequence_length, max_sequence_length], False)
    decoder_padding_mask_self_attention = torch.full([num_sentences, max_sequence_length, max_sequence_length], False)
    decoder_padding_mask_cross_attention = torch.full([num_sentences, max_sequence_length, max_sequence_length], False)

    for idx in range(num_sentences):
        eng_sentence_length, hn_sentence_length = len(tokens.encode(eng_batch[idx], 'encoder')), len(tokens.encode(hn_batch[idx], 'decoder'))
        eng_tokens_to_padding_mask = np.arange(eng_sentence_length+1, max_sequence_length)
        hn_tokens_to_padding_mask = np.arange(hn_sentence_length+1, max_sequence_length)

        encoder_padding_mask[idx, :, eng_tokens_to_padding_mask] = True
        encoder_padding_mask[idx , eng_tokens_to_padding_mask, :] = True

        decoder_padding_mask_self_attention[idx, :, hn_tokens_to_padding_mask] = True
        decoder_padding_mask_self_attention[idx, hn_tokens_to_padding_mask, :] = True

        decoder_padding_mask_cross_attention[idx, :, eng_tokens_to_padding_mask] = True
        decoder_padding_mask_cross_attention[idx, hn_tokens_to_padding_mask, :] = True

    encoder_self_attention_mask = torch.where(encoder_padding_mask, NEG_INFTY, 0)
    decoder_self_attention_mask =  torch.where(look_ahead_mask + decoder_padding_mask_self_attention, NEG_INFTY, 0)
    decoder_cross_attention_mask = torch.where(decoder_padding_mask_cross_attention, NEG_INFTY, 0)
    return encoder_self_attention_mask, decoder_self_attention_mask, decoder_cross_attention_mask
        

In [33]:
# PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

In [47]:
transformer.train()
transformer.to(device)
total_loss = 0
# num_epoch = 10
num_epoch = 5

for epoch in range(num_epoch):
    print(f"EPOCH :- {epoch}")
    iterator = iter(train_loader)

    for batch_num, batch in enumerate(iterator):
        transformer.train()
        eng_batch, hn_batch = batch
        encoder_self_attention_mask, decoder_self_attention_mask, decoder_cross_attention_mask = create_masks(eng_batch, hn_batch)
        optim.zero_grad()

        hn_prediction = transformer(eng_batch,
                                     hn_batch,
                                     encoder_self_attention_mask.to(device), 
                                     decoder_self_attention_mask.to(device), 
                                     decoder_cross_attention_mask.to(device))
        
        labels = transformer.decoder.sentence_embedding.batchTokenize(hn_batch, start=True, end=True)
        loss = criterian(
            hn_prediction.view(-1, len(hindi_to_index)).to(device),
            labels.view(-1).to(device)
        ).to(device)

        valid_indices = torch.where(labels.view(-1) == hindi_to_index[padding_token], False, True)

        loss = loss.sum()/valid_indices.sum()
        loss.backward()
        optim.step()


        if(batch_num%100 == 0):
            print(f"Iteration {batch_num} : {loss.item()}")
            print(f"English : {eng_batch[0]}")
            print(f"Translation : {hn_batch[0]}")

            hn_sentence_prediction = torch.argmax(hn_prediction[0], axis=1)
            hindi_prediction = []
            for i in hn_sentence_prediction:
                if(i == hindi_to_index[end_token]):
                    break
                hindi_prediction.append(i.item())
            print("Predicted length :- ",len(hindi_prediction))
            print(f"Hindi Prediction :- {tokens.decode(hindi_prediction, 'decoder')}")

            transformer.eval()
            hn_sentence = ("",)
            eng_sentence = ("himandri and bindu rani both friends were from karnals salwan village",)

            checking = []
            for word_counter in range(max_sequence_length):
                encoder_self_attention_mask, decoder_self_attention_mask, decoder_cross_attention_mask = create_masks(eng_sentence, hn_sentence)
                predictions = transformer(eng_batch,
                                            hn_batch,
                                            encoder_self_attention_mask.to(device), 
                                            decoder_self_attention_mask.to(device), 
                                            decoder_cross_attention_mask.to(device))
                next_token_index = torch.argmax(predictions[0][word_counter]).item()
                next_token = tokens.decode([next_token_index], 'decoder')
                if(next_token == "<END>"):
                    break
                checking.append(next_token_index)
                hn_sentence = (hn_sentence[0]+next_token,)
            print("Generated Tokens :- ",checking)
            print(f"Evaluation Of {eng_sentence[0]} :-- ")
            print("Generated :- ",hn_sentence[0])
            # print(f"Checking :- ", tokens.decode(checking, 'decoder'))

EPOCH :- 0
Iteration 0 : 2.5266993045806885
English : share videos
Translation : वीडियो क्लिप शेयर किए
Predicted length :-  54
Hindi Prediction :- �त���� ��������� �����������थ � ोत�� त�
Generated Tokens :-  [164, 224, 164, 164, 164, 165, 165, 224, 164, 32, 164, 164, 32, 164, 164, 164, 164, 165, 165, 32, 164, 164, 164, 164, 165, 164, 224, 224, 224, 224, 164]
Evaluation Of himandri and bindu rani both friends were from karnals salwan village :-- 
Generated :-  ��������� �� ������ �����������
Iteration 100 : 2.6060941219329834
English : bajaj auto in search of a winner
Translation : बजाज ऑटो  खेल में फिर से वापसी के आसार
Predicted length :-  100
Hindi Prediction :- ����������������  त���� ������ ��त�� थ�� त��थथ॥ त�� ��थ���त�
Generated Tokens :-  [164, 224, 164, 139, 224, 164, 224, 224, 164, 224, 224, 164, 224, 165, 224, 224, 224, 224, 164, 224, 224, 165, 224, 32, 32, 224, 164, 164, 224, 165, 224, 224, 164, 224, 32, 224, 164, 224, 224, 165, 224, 224, 164, 224, 32, 224, 164, 139, 224, 164,

In [None]:
model_path = 'transformer_model_2.pth'
torch.save(transformer.state_dict(), model_path)

In [None]:
transformer = Transformer(d_model, 
                          max_seq_len, 
                          num_head, 
                          head_dim, 
                          drop_prob, 
                          english_to_index, 
                          ffn, hindi_to_index, 
                          encoder_type, decoder_type, 
                          n_layers, start_token, 
                          end_token, padding_token)


In [None]:
transformer.load_state_dict(torch.load(model_path))
transformer.to(device)
transformer.eval()


Transformer(
  (encoder): Encoder(
    (sentence_embedding): SentenceEmbedding(
      (embedding): Embedding(55003, 512)
      (position_encoder): PositionalEncoding()
    )
    (layers): SequentialEncoder(
      (0): EncoderLayers(
        (attention): MultiheadAttention(
          (qkv_layer): Linear(in_features=512, out_features=1536, bias=True)
          (linear): Linear(in_features=512, out_features=512, bias=True)
        )
        (norm): LayerNormalization()
        (feedforward): PositionwiseFeedForward(
          (linear1): Linear(in_features=512, out_features=1024, bias=True)
          (linear2): Linear(in_features=1024, out_features=512, bias=True)
          (relu): ReLU()
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (decoder): Decoder(
    (sentence_embedding): SentenceEmbedding(
      (embedding): Embedding(75003, 512)
      (position_encoder): PositionalEncoding()
    )
    (layers): Seq

In [None]:
def predict(eng_sentence, transformer, max_sequence_length, device):
    transformer.eval()
    hn_sentence = ("",)

    for word_counter in range(max_sequence_length):
        encoder_self_attention_mask, decoder_self_attention_mask, decoder_cross_attention_mask = create_masks(eng_sentence, hn_sentence)
        predictions = transformer(eng_sentence,
                                  hn_sentence,
                                  encoder_self_attention_mask.to(device), 
                                  decoder_self_attention_mask.to(device), 
                                  decoder_cross_attention_mask.to(device))
        # print(len(predictions.shape),"gjgfgh")
        next_token_index = torch.argmax(predictions[0][word_counter]).item()
        next_token = tokens.decode([next_token_index], 'decoder')
        if next_token == "<END>":
            print("break")
            break
        hn_sentence = (hn_sentence[0] + next_token,)
    
    return hn_sentence

# Example usage:
eng_sentence = ("hello how are you??",)
predicted_sentence = predict(eng_sentence, transformer, max_sequence_length, device)
print("Generated :- ", predicted_sentence)


3 gjgfgh
3 gjgfgh
3 gjgfgh
break
Generated :-  ('��',)
