# Muhammad Tahir Zia - 2021465

# Akhtar Ali - 2021758

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Downloaded Compatible Versions for Torch and TorchText

In [None]:
pip install torch==2.0.0 torchtext==0.15.1

Collecting torch==2.0.0
  Downloading torch-2.0.0-cp311-cp311-manylinux1_x86_64.whl.metadata (24 kB)
Collecting torchtext==0.15.1
  Downloading torchtext-0.15.1-cp311-cp311-manylinux1_x86_64.whl.metadata (7.4 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch==2.0.0)
  Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==2.0.0)
  Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cuda-cupti-cu11==11.7.101 (from torch==2.0.0)
  Downloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu11==8.5.0.96 (from torch==2.0.0)
  Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu11==11.10.3.66 (from torch==2.0.0)
  Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Co

### Created an environment to Train the Model otherwise it was giving errors

In [None]:
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
import torch
torch.use_deterministic_algorithms(True)
# Set up an environment for training the model

### Importing Libraries

In [None]:
import copy
from typing import Optional, Any, Union, Callable

import torch
import math
import time
import torch.nn as nn
from torch import Tensor
import torch.nn.functional as F
from torch.nn import Module
from torch.nn import MultiheadAttention
from torch.nn import ModuleList
from torch.nn.init import xavier_uniform_
from torch.nn import Dropout
from torch.nn import Linear
from torch.nn import LayerNorm

import spacy

from collections import Counter
import io
from torchtext.vocab import vocab
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

from nltk.translate.bleu_score import sentence_bleu
from torchtext.data.metrics import bleu_score

import sys

## Tokens

### Urdu Tokens

In [None]:
# load urdu object via spacy
nlp = spacy.blank('ur')
# generate token for input urdu text
def urdu_sen_tokens(inp):
    doc=nlp(inp)
    doc=str(doc)
    doc=doc.replace("\n","")
    return [doc]

### English Tokens

In [None]:
#loading english object from spacy
nlp = spacy.blank('en')
# generate token for input english text
def eng_sen_tokens(inp):
    doc=nlp(inp)
    doc=str(doc)
    doc=doc.replace("\n","")
    return [doc]

## Transformer

### Layers

#### Encoder Layer

In [None]:
class TransformerEncoderLayer(Module):

    __constants__ = ['batch_first', 'norm_first']

    def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout: float = 0.1,
                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
                 layer_norm_eps: float = 1e-5, batch_first: bool = False, norm_first: bool = False,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first,
                                            **factory_kwargs)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward, **factory_kwargs)#input features,output features
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model, **factory_kwargs)

        self.norm_first = norm_first
        self.norm1 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm2 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)

        # Legacy string support for activation function.
        if isinstance(activation, str):
            activation = _get_activation_fn(activation)

        if activation is F.relu:
            self.activation_relu_or_gelu = 1
        elif activation is F.gelu:
            self.activation_relu_or_gelu = 2
        else:
            self.activation_relu_or_gelu = 0
        self.activation = activation

    def __setstate__(self, state):
        if 'activation' not in state:
            state['activation'] = F.relu
        super(TransformerEncoderLayer, self).__setstate__(state)

    def forward(self, src: Tensor, src_mask: Optional[Tensor] = None,
                src_key_padding_mask: Optional[Tensor] = None) -> Tensor:

        if (src.dim() == 3 and not self.norm_first and not self.training and
            self.self_attn.batch_first and
            self.self_attn._qkv_same_embed_dim and self.activation_relu_or_gelu and
            self.norm1.eps == self.norm2.eps and
            ((src_mask is None and src_key_padding_mask is None)
             if src.is_nested
             else (src_mask is None or src_key_padding_mask is None))):
            tensor_args = (
                src,
                self.self_attn.in_proj_weight,
                self.self_attn.in_proj_bias,
                self.self_attn.out_proj.weight,
                self.self_attn.out_proj.bias,
                self.norm1.weight,
                self.norm1.bias,
                self.norm2.weight,
                self.norm2.bias,
                self.linear1.weight,
                self.linear1.bias,
                self.linear2.weight,
                self.linear2.bias,
            )#biases and weights
            if (not torch.overrides.has_torch_function(tensor_args) and
                    # We have to use a list comprehension here because TorchScript
                    # doesn't support generator expressions.
                    all([(x.is_cuda or 'cpu' in str(x.device)) for x in tensor_args]) and
                    (not torch.is_grad_enabled() or all([not x.requires_grad for x in tensor_args]))):
                return torch._transformer_encoder_layer_fwd(
                    src,
                    self.self_attn.embed_dim,
                    self.self_attn.num_heads,
                    self.self_attn.in_proj_weight,
                    self.self_attn.in_proj_bias,
                    self.self_attn.out_proj.weight,
                    self.self_attn.out_proj.bias,
                    self.activation_relu_or_gelu == 2,
                    False,
                    self.norm1.eps,
                    self.norm1.weight,
                    self.norm1.bias,
                    self.norm2.weight,
                    self.norm2.bias,
                    self.linear1.weight,
                    self.linear1.bias,
                    self.linear2.weight,
                    self.linear2.bias,
                    src_mask if src_mask is not None else src_key_padding_mask,
                )
        x = src
        if self.norm_first:
            x = x + self._sa_block(self.norm1(x), src_mask, src_key_padding_mask)
            x = x + self._ff_block(self.norm2(x))
        else:
            x = self.norm1(x + self._sa_block(x, src_mask, src_key_padding_mask))
            x = self.norm2(x + self._ff_block(x))

        return x

    # self-attention block
    def _sa_block(self, x: Tensor,
                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
        x = self.self_attn(x, x, x,
                           attn_mask=attn_mask,
                           key_padding_mask=key_padding_mask,
                           need_weights=False)[0]
        return self.dropout1(x)

    # feed forward block
    def _ff_block(self, x: Tensor) -> Tensor:
        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
        return self.dropout2(x)

#### Decoder Layer

In [None]:
class TransformerDecoderLayer(Module):
    __constants__ = ['batch_first', 'norm_first']

    def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout: float = 0.1,
                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
                 layer_norm_eps: float = 1e-5, batch_first: bool = False, norm_first: bool = False,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super(TransformerDecoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first,
                                            **factory_kwargs)
        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first,
                                                 **factory_kwargs)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward, **factory_kwargs)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model, **factory_kwargs)

        self.norm_first = norm_first
        self.norm1 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm2 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm3 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
        self.dropout3 = Dropout(dropout)

        # Legacy string support for activation function.
        if isinstance(activation, str):
            self.activation = _get_activation_fn(activation)
        else:
            self.activation = activation

    def __setstate__(self, state):
        if 'activation' not in state:
            state['activation'] = F.relu
        super(TransformerDecoderLayer, self).__setstate__(state)

    def forward(self, tgt: Tensor, memory: Tensor, tgt_mask: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None,
                tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None) -> Tensor:

        x = tgt
        if self.norm_first:
            x = x + self._sa_block(self.norm1(x), tgt_mask, tgt_key_padding_mask)
            x = x + self._mha_block(self.norm2(x), memory, memory_mask, memory_key_padding_mask)
            x = x + self._ff_block(self.norm3(x))
        else:
            x = self.norm1(x + self._sa_block(x, tgt_mask, tgt_key_padding_mask))
            x = self.norm2(x + self._mha_block(x, memory, memory_mask, memory_key_padding_mask))
            x = self.norm3(x + self._ff_block(x))

        return x

    # self-attention block
    def _sa_block(self, x: Tensor,
                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
        x = self.self_attn(x, x, x,
                           attn_mask=attn_mask,
                           key_padding_mask=key_padding_mask,
                           need_weights=False)[0]
        return self.dropout1(x)

    # multihead attention block
    def _mha_block(self, x: Tensor, mem: Tensor,
                   attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
        x = self.multihead_attn(x, mem, mem,
                                attn_mask=attn_mask,
                                key_padding_mask=key_padding_mask,
                                need_weights=False)[0]
        return self.dropout2(x)

    # feed forward block
    def _ff_block(self, x: Tensor) -> Tensor:
        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
        return self.dropout3(x)


### Modules

#### Encoder Module

In [None]:
# TransformerEncoder is a stack of N encoder layers
class TransformerEncoder(Module):

    __constants__ = ['norm']

    def __init__(self, encoder_layer, num_layers, norm=None, enable_nested_tensor=True):
        super(TransformerEncoder, self).__init__()
        self.layers = _get_clones(encoder_layer, num_layers)
        self.num_layers = num_layers
        self.norm = norm
        self.enable_nested_tensor = enable_nested_tensor

    def forward(self, src: Tensor, mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None) -> Tensor:
        output = src
        convert_to_nested = False
        first_layer = self.layers[0]
        if isinstance(first_layer, torch.nn.TransformerEncoderLayer):
            if (not first_layer.norm_first and not first_layer.training and
                    first_layer.self_attn.batch_first and
                    first_layer.self_attn._qkv_same_embed_dim and first_layer.activation_relu_or_gelu and
                    first_layer.norm1.eps == first_layer.norm2.eps and
                    src.dim() == 3 and self.enable_nested_tensor) :
                if src_key_padding_mask is not None and not output.is_nested and mask is None:
                    tensor_args = (
                        src,
                        first_layer.self_attn.in_proj_weight,
                        first_layer.self_attn.in_proj_bias,
                        first_layer.self_attn.out_proj.weight,
                        first_layer.self_attn.out_proj.bias,
                        first_layer.norm1.weight,
                        first_layer.norm1.bias,
                        first_layer.norm2.weight,
                        first_layer.norm2.bias,
                        first_layer.linear1.weight,
                        first_layer.linear1.bias,
                        first_layer.linear2.weight,
                        first_layer.linear2.bias,
                    )
                    if not torch.overrides.has_torch_function(tensor_args):
                        if output.is_cuda or 'cpu' in str(output.device):
                            convert_to_nested = True
                            output = torch._nested_tensor_from_mask(output, src_key_padding_mask.logical_not())

        for mod in self.layers:
            if convert_to_nested:
                output = mod(output, src_mask=mask)
            else:
                output = mod(output, src_mask=mask, src_key_padding_mask=src_key_padding_mask)

        if convert_to_nested:
            output = output.to_padded_tensor(0.)

        if self.norm is not None:
            output = self.norm(output)

        return output


#### Decoder Module

In [None]:
# TransformerDecoder is a stack of N Decoder layers
class TransformerDecoder(Module):
    __constants__ = ['norm']

    def __init__(self, decoder_layer, num_layers, norm=None):
        super(TransformerDecoder, self).__init__()
        self.layers = _get_clones(decoder_layer, num_layers)
        self.num_layers = num_layers
        self.norm = norm

    def forward(self, tgt: Tensor, memory: Tensor, tgt_mask: Optional[Tensor] = None,
                memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None,
                memory_key_padding_mask: Optional[Tensor] = None) -> Tensor:

        output = tgt

        for mod in self.layers:
            output = mod(output, memory, tgt_mask=tgt_mask,
                         memory_mask=memory_mask,
                         tgt_key_padding_mask=tgt_key_padding_mask,
                         memory_key_padding_mask=memory_key_padding_mask)

        if self.norm is not None:
            output = self.norm(output)

        return output


#### Transformer Module

In [None]:
class Transformer(Module):
   #d_model=size of input embeddings is just a vector represenation of the particular word

    def __init__(self, d_model: int = 512, nhead: int = 8, num_encoder_layers: int = 6,
                 num_decoder_layers: int = 6, dim_feedforward: int = 2048, dropout: float = 0.1,
                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
                 custom_encoder: Optional[Any] = None, custom_decoder: Optional[Any] = None,
                 layer_norm_eps: float = 1e-5, batch_first: bool = False, norm_first: bool = False,
                 device=None, dtype=None) -> None:

        factory_kwargs = {'device': device, 'dtype': dtype}
        super(Transformer, self).__init__()

        if custom_encoder is not None:
            self.encoder = custom_encoder
        else:
            encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout,
                                                    activation, layer_norm_eps, batch_first, norm_first,
                                                    **factory_kwargs)
            encoder_norm = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
            self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)

        if custom_decoder is not None:
            self.decoder = custom_decoder
        else:
            decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout,
                                                    activation, layer_norm_eps, batch_first, norm_first,
                                                    **factory_kwargs)
            decoder_norm = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
            self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm)

        self._reset_parameters()

        self.d_model = d_model
        self.nhead = nhead

        self.batch_first = batch_first


# src_padding_mask, tgt_padding_mask, src_padding_mask)
    def forward(self, src: Tensor, tgt: Tensor, src_mask: Optional[Tensor] = None, tgt_mask: Optional[Tensor] = None,
                memory_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None,
                tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None) -> Tensor:

        is_batched = src.dim() == 3
        if not self.batch_first and src.size(1) != tgt.size(1) and is_batched:
            raise RuntimeError("the batch number of src and tgt must be equal")
        elif self.batch_first and src.size(0) != tgt.size(0) and is_batched:
            raise RuntimeError("the batch number of src and tgt must be equal")

        if src.size(-1) != self.d_model or tgt.size(-1) != self.d_model:
            raise RuntimeError("the feature number of src and tgt must be equal to d_model")

        memory = self.encoder(src, mask=src_mask, src_key_padding_mask=src_key_padding_mask)#encoding forward pass
        output = self.decoder(tgt, memory, tgt_mask=tgt_mask, memory_mask=memory_mask,
                              tgt_key_padding_mask=tgt_key_padding_mask,
                              memory_key_padding_mask=memory_key_padding_mask)#decoding forward pass
        return output
    #the forward pass of decoder is our output

    @staticmethod
    def generate_square_subsequent_mask(sz: int) -> Tensor:
        return torch.triu(torch.full((sz, sz), float('-inf')), diagonal=1)


    def _reset_parameters(self):
        """Initiate parameters in the transformer model."""

        for p in self.parameters():
            if p.dim() > 1:
                xavier_uniform_(p)


In [None]:
def _get_clones(module, N):
    return ModuleList([copy.deepcopy(module) for i in range(N)])

#### Calling Activation Function

In [None]:
def _get_activation_fn(activation: str) -> Callable[[Tensor], Tensor]:
    if activation == "relu":
        return F.relu
    elif activation == "gelu":
        return F.gelu

    raise RuntimeError("activation should be relu/gelu, not {}".format(activation))

### Reading English Data

In [None]:
torch.manual_seed(0)
torch.use_deterministic_algorithms(True)

fileEnglish = open('/content/drive/MyDrive/myfolderUTE/English.txt', mode='rt', encoding='utf-8')
englishDataset = fileEnglish.read()
print(englishDataset[0:500])
file = open("/content/drive/MyDrive/myfolderUTE/English.txt", "r",encoding='utf-8')
x = 0
for line in file:

    if line != "\n":
        x += 1
    #print(line)
file.close()


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-package

Los Angeles has lost night straight and 13 of its first 14 games to start the season.
Opposite qualities of meaning of person's name
To show anger after getting embarrassed
Money earned the wrong way will be taken away
To talk big without having a big position
More mouths will have more talks
To use the available opportunity
Getting involved without having
The grass is always greener on the other side
A person of no principles
Division is main reason for the damage
Evidence does not need proof
A


### Shape of Urdu Data

In [None]:
fileUrdu = open('/content/drive/MyDrive/myfolderUTE/Urdu.txt', mode='rt', encoding='utf-8')
urduDataset = fileUrdu.read()

file = open("/content/drive/MyDrive/myfolderUTE/Urdu.txt", "r",encoding='utf-8')
x = 0
for line in file:

    if line != "\n":
        x += 1
print((x))
file.close()

100000


### Data Splitting (70, 15, 15) & Creating Dataset

In [None]:
train_size_en = int(0.70 * x)
test_size_en = int(0.15 * x)
val_size_en = int(0.15 * x)
#division on the basis of lines

train_dataset = englishDataset[0:train_size_en]
test_dataset = englishDataset[train_size_en+1:train_size_en+test_size_en]
val_dataset = englishDataset[train_size_en+test_size_en+1:x]

train_size_urdu= int(0.70 * x)
test_size_urdu = int(0.15 *x)
val_size_urdu = int(0.15 * x)

train_dataset2 = urduDataset[0:train_size_urdu]
test_dataset2 = urduDataset[train_size_urdu+1:train_size_urdu+test_size_urdu]
val_dataset2 = urduDataset[train_size_urdu+test_size_urdu+1:x]

### Generate Separate Files for Each Split (English)

In [None]:
'''
f=open("english_train.txt","w")

f.write(train_dataset)

f=open("english_test.txt","w")
f.write(test_dataset)

f=open("english_val.txt","w")
f.write(val_dataset)
'''

'\nf=open("english_train.txt","w")\n\nf.write(train_dataset)\n\nf=open("english_test.txt","w")\nf.write(test_dataset)\n\nf=open("english_val.txt","w")\nf.write(val_dataset)\n'

### Generate Separate Files for Each Split (Urdu)

In [None]:
'''
f=open("urdu_train.txt","w",encoding="utf-8")

f.write(train_dataset2)

f=open("urdu_test.txt","w",encoding="utf-8")
f.write(test_dataset2)

f=open("urdu_val.txt","w",encoding="utf-8")
f.write(val_dataset2)
'''

'\nf=open("urdu_train.txt","w",encoding="utf-8")\n\nf.write(train_dataset2)\n\nf=open("urdu_test.txt","w",encoding="utf-8")\nf.write(test_dataset2)\n\nf=open("urdu_val.txt","w",encoding="utf-8")\nf.write(val_dataset2)\n'

### Build Vocab

In [None]:
ur_tokenizer = urdu_sen_tokens
en_tokenizer = eng_sen_tokens

'''
get_tokenizer('spacy', language='en_core_web_sm')
so the tokenizer here points to the function of tokenizer
which then gets passed to build vocab function
and in the tokenizer we pass the line
'''

def build_vocab(filepath, tokenizer):
  counter = Counter() # counter builds dictionary of word with its frequencies
  with io.open(filepath,encoding="utf8",errors='ignore' ) as f:
    for string_ in f:
      #print(tokenizer(string_))
      counter.update(tokenizer(string_))
  return vocab(counter,specials = ['<unk>', '<pad>', '<bos>', '<eos>'])

main_filepaths = ["/content/drive/MyDrive/myfolderUTE/English.txt","/content/drive/MyDrive/myfolderUTE/Urdu.txt"]
train_filepaths = ["/content/drive/MyDrive/myfolderUTE/english_train.txt","/content/drive/MyDrive/myfolderUTE/urdu_train.txt"]
val_filepaths = ["/content/drive/MyDrive/myfolderUTE/english_val.txt","/content/drive/MyDrive/myfolderUTE/urdu_val.txt"]
test_filepaths = ["/content/drive/MyDrive/myfolderUTE/english_test.txt","/content/drive/MyDrive/myfolderUTE/urdu_test.txt"]
en_vocab = build_vocab(train_filepaths[0], en_tokenizer)
de_vocab = build_vocab(train_filepaths[1], ur_tokenizer)

print(de_vocab)

Vocab()


### Data Preprocessing

In [None]:
def data_process(filepaths):
    raw_de_iter = iter(io.open(filepaths[1], encoding="utf8",errors='ignore'))#created iterator for urdu file
    raw_en_iter = iter(io.open(filepaths[0], encoding="utf8",errors='ignore'))#created english iterator for english file
    data = []
    for (raw_de, raw_en) in zip(raw_de_iter, raw_en_iter):#use zip to make alignment between files
        #such as Los angeles with los angeles
        #raw_de ,raw_en is for accessing that tuple(las angeles in urdu,los angeles in englus)
        de_tensor_ = torch.tensor([de_vocab[token] for token in ur_tokenizer(raw_de.rstrip("\n"))],
                                dtype=torch.long)
        #full urdu sentence with \n where the eol character is remover using de_strip is tokenized and each token is search in the vocabulary
        #whereas the voabulary is containing the embedding on the basis of frequency
        en_tensor_ = torch.tensor([en_vocab[token] for token in en_tokenizer(raw_en.rstrip("\n"))],
                                dtype=torch.long)
        data.append((de_tensor_, en_tensor_))
    return data

### Map Train, Test & Validation on Data Process

In [None]:
de_vocab.set_default_index(0)
en_vocab.set_default_index(0) #if no index is found then 0 is returned
train_data = data_process(train_filepaths)
val_data = data_process(val_filepaths)
test_data = data_process(test_filepaths)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Setting Batch Size & Special Tokens

In [None]:
BATCH_SIZE = 128

PAD_IDX = de_vocab['<pad>'] #padding in sentence
BOS_IDX = de_vocab['<bos>'] #beggining of sentence
EOS_IDX = de_vocab['<eos>'] #representing end of sentence

### Creating Dataloaders for Each Dataset

In [None]:
# basically concatenatnation start , end, pad and the tensor to make a element and append it in the batch
def generate_batch(data_batch):
  de_batch, en_batch = [], []
  for (de_item, en_item) in data_batch:
    de_batch.append(torch.cat([torch.tensor([BOS_IDX]), de_item, torch.tensor([EOS_IDX])], dim=0))
    # torch.cat concatenates the tensors
    en_batch.append(torch.cat([torch.tensor([BOS_IDX]), en_item, torch.tensor([EOS_IDX])], dim=0))
  de_batch = pad_sequence(de_batch, padding_value=PAD_IDX)
  en_batch = pad_sequence(en_batch, padding_value=PAD_IDX)
  return de_batch, en_batch

train_iter = DataLoader(train_data, batch_size=BATCH_SIZE,
                        shuffle=True, collate_fn=generate_batch) # dividing the data in batches with the batch size specified
valid_iter = DataLoader(val_data, batch_size=BATCH_SIZE,
                        shuffle=True, collate_fn=generate_batch)
test_iter = DataLoader(test_data, batch_size=BATCH_SIZE,
                       shuffle=True, collate_fn=generate_batch)

### TokenEmbedding

In [None]:
class TokenEmbedding(nn.Module):
    def __init__(self, vocab_size: int, emb_size):
        super(TokenEmbedding, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.emb_size = emb_size
    def forward(self, tokens: Tensor):
        return self.embedding(tokens.long()) * math.sqrt(self.emb_size)

### PositionalEncoding

In [None]:
'''
Values for Positional Encoding PE(pos,i)=sin(pos/10000**2i/d)
i is the index of the word
and pos is the position
where d=size of embeddings
pos 0 means the first positional embedding
pos 1 means the 2nd and so on
and i is the index in position embedding we are filling
'''
class PositionalEncoding(nn.Module):
    def __init__(self, emb_size: int, dropout, maxlen: int = 5000):
        super(PositionalEncoding, self).__init__()
        den = torch.exp(- torch.arange(0, emb_size, 2) * math.log(10000) / emb_size)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        pos_embedding = torch.zeros((maxlen, emb_size))
        pos_embedding[:, 0::2] = torch.sin(pos * den)
        pos_embedding[:, 1::2] = torch.cos(pos * den)
        pos_embedding = pos_embedding.unsqueeze(-2)

        self.dropout = nn.Dropout(dropout)
        self.register_buffer('pos_embedding', pos_embedding)

    def forward(self, token_embedding: Tensor):
        return self.dropout(token_embedding +
                            self.pos_embedding[:token_embedding.size(0),:])

### Seq2Seq Transformer Module

In [None]:
class Seq2SeqTransformer(nn.Module):
    def __init__(self, num_encoder_layers: int, num_decoder_layers: int,
                 emb_size: int, src_vocab_size: int, tgt_vocab_size: int,
                 dim_feedforward:int = 512, dropout:float = 0.1):
        super(Seq2SeqTransformer, self).__init__()
        encoder_layer = TransformerEncoderLayer(d_model=emb_size, nhead=NHEAD,
                                                dim_feedforward=dim_feedforward)
        self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        decoder_layer = TransformerDecoderLayer(d_model=emb_size, nhead=NHEAD,
                                                dim_feedforward=dim_feedforward)
        self.transformer_decoder = TransformerDecoder(decoder_layer, num_layers=num_decoder_layers)

        self.generator = nn.Linear(emb_size, tgt_vocab_size)
        self.src_tok_emb = TokenEmbedding(src_vocab_size, emb_size)
        self.tgt_tok_emb = TokenEmbedding(tgt_vocab_size, emb_size)
        self.positional_encoding = PositionalEncoding(emb_size, dropout=dropout)

    def forward(self, src: Tensor, trg: Tensor, src_mask: Tensor,
                tgt_mask: Tensor, src_padding_mask: Tensor,
                tgt_padding_mask: Tensor, memory_key_padding_mask: Tensor):
        src_emb = self.positional_encoding(self.src_tok_emb(src))
        #print(src_emb)
        tgt_emb = self.positional_encoding(self.tgt_tok_emb(trg))
        #print(tgt-_emb)
        memory = self.transformer_encoder(src_emb, src_mask, src_padding_mask)
        #print(memory)
        outs = self.transformer_decoder(tgt_emb, memory, tgt_mask, None,
                                        tgt_padding_mask, memory_key_padding_mask)
        #print(outs)
        return self.generator(outs)

    def encode(self, src: Tensor, src_mask: Tensor):
        return self.transformer_encoder(self.positional_encoding(
                            self.src_tok_emb(src)), src_mask)

    def decode(self, tgt: Tensor, memory: Tensor, tgt_mask: Tensor):
        return self.transformer_decoder(self.positional_encoding(
                          self.tgt_tok_emb(tgt)), memory,
                          tgt_mask)

In [None]:
def generate_square_subsequent_mask(sz):
    mask = (torch.triu(torch.ones((sz, sz), device=DEVICE)) == 1).transpose(0, 1)
    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
    return mask

def create_mask(src, tgt):
    src_seq_len = src.shape[0]
    tgt_seq_len = tgt.shape[0]

    tgt_mask = generate_square_subsequent_mask(tgt_seq_len)
    src_mask = torch.zeros((src_seq_len, src_seq_len), device=DEVICE).type(torch.bool)

    src_padding_mask = (src == PAD_IDX).transpose(0, 1)
    tgt_padding_mask = (tgt == PAD_IDX).transpose(0, 1)
    return src_mask, tgt_mask, src_padding_mask, tgt_padding_mask

### Defining Model Parameters and Instantiate

In [None]:
SRC_VOCAB_SIZE = len(de_vocab)
TGT_VOCAB_SIZE = len(en_vocab)

EMB_SIZE = 512

NHEAD = 8

FFN_HID_DIM = 512

BATCH_SIZE = 128

NUM_ENCODER_LAYERS = 3

NUM_DECODER_LAYERS = 3

NUM_EPOCHS = 16

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

transformer = Seq2SeqTransformer(NUM_ENCODER_LAYERS, NUM_DECODER_LAYERS,
                                 EMB_SIZE, SRC_VOCAB_SIZE, TGT_VOCAB_SIZE,
                                 FFN_HID_DIM)

for p in transformer.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

transformer = transformer.to(device)

loss_fn = torch.nn.CrossEntropyLoss(ignore_index=PAD_IDX)

optimizer = torch.optim.Adam(
    transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9
)

### Training Phase

In [None]:
def train_epoch(model, train_iter, optimizer):
  model.train()
  losses = 0
  for idx, (src, tgt) in enumerate(train_iter):

      src = src.to(device)
      tgt = tgt.to(device)

      tgt_input = tgt[:-1, :]

      src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_input)

      logits = model(src, tgt_input, src_mask, tgt_mask,src_padding_mask, tgt_padding_mask, src_padding_mask)

      optimizer.zero_grad()

      tgt_out = tgt[1:,:]
      loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
      loss.backward()

      optimizer.step()
      losses += loss.item()
  return losses / len(train_iter)

### Evaluate

In [None]:
def evaluate(model, val_iter):
    model.eval()
    losses = 0
    for idx, (src, tgt) in (enumerate(valid_iter)):
        src = src.to(device)
        tgt = tgt.to(device)

        tgt_input = tgt[:-1, :]

        src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_input)

        logits = model(src, tgt_input, src_mask, tgt_mask,
                                src_padding_mask, tgt_padding_mask, src_padding_mask)
        tgt_out = tgt[1:,:]
        loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
        losses += loss.item()
    return losses / len(val_iter)

### Training

In [None]:
NUM_EPOCHS = 80

for epoch in range(1, NUM_EPOCHS+1):
    start_time = time.time()
    train_loss = train_epoch(transformer, train_iter, optimizer)
    end_time = time.time()
    val_loss = evaluate(transformer, valid_iter)
    print((f"Epoch: {epoch}, Train loss: {train_loss:.3f}, Val loss: {val_loss:.3f}, "
          f"Epoch time = {(end_time - start_time):.3f}s"))

Epoch: 1, Train loss: 0.037, Val loss: 4.791, Epoch time = 0.902s
Epoch: 2, Train loss: 0.036, Val loss: 4.791, Epoch time = 0.903s
Epoch: 3, Train loss: 0.032, Val loss: 4.816, Epoch time = 0.913s
Epoch: 4, Train loss: 0.033, Val loss: 4.821, Epoch time = 0.961s
Epoch: 5, Train loss: 0.031, Val loss: 4.789, Epoch time = 0.911s
Epoch: 6, Train loss: 0.032, Val loss: 4.827, Epoch time = 0.904s
Epoch: 7, Train loss: 0.033, Val loss: 4.756, Epoch time = 1.159s
Epoch: 8, Train loss: 0.029, Val loss: 4.819, Epoch time = 1.647s
Epoch: 9, Train loss: 0.033, Val loss: 4.702, Epoch time = 1.815s
Epoch: 10, Train loss: 0.027, Val loss: 4.800, Epoch time = 1.403s
Epoch: 11, Train loss: 0.031, Val loss: 4.817, Epoch time = 0.917s
Epoch: 12, Train loss: 0.028, Val loss: 4.918, Epoch time = 0.980s
Epoch: 13, Train loss: 0.028, Val loss: 4.880, Epoch time = 0.906s
Epoch: 14, Train loss: 0.028, Val loss: 4.790, Epoch time = 0.908s
Epoch: 15, Train loss: 0.031, Val loss: 4.833, Epoch time = 0.890s
Epoc

### Decoding

In [None]:
def calc_bleu_score(translations, references):
    translations_formatted = [translation.split() for translation in translations]
    references_formatted = [[translation.split()] for translation in references]
    return bleu_score(translations_formatted, references_formatted)

In [None]:
def calc_bleu_score_from_file(filename):
  with open(f'/content/drive/MyDrive/myfolderUTE/english_train.txt') as file:
      translations = file.readlines()

  return calc_bleu_score(translations=translations, references=en_vocab)

In [None]:

def greedy_decode(model, src, src_mask, max_len, start_symbol):
    src = src.to(device)
    src_mask = src_mask.to(device)

    memory = model.encode(src, src_mask)
    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).to(device)
    for i in range(max_len-1):
        memory = memory.to(device)
        memory_mask = torch.zeros(ys.shape[0], memory.shape[0]).to(device).type(torch.bool)
        tgt_mask = (generate_square_subsequent_mask(ys.size(0))
                                    .type(torch.bool)).to(device)
        out = model.decode(ys, memory, tgt_mask)
        out = out.transpose(0, 1)
        prob = model.generator(out[:, -1])

        _, next_word = torch.max(prob, dim = 1)
        next_word = next_word.item()

        ys = torch.cat([ys, torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
        if next_word == EOS_IDX:
            break
    return ys

In [None]:
'''def greedy_decode(model, src, src_mask, max_len, start_symbol, beam_width=5):
    src = src.to(device)
    src_mask = src_mask.to(device)

    memory = model.encode(src, src_mask)
    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).to(device)

    # Beam search implementation
    candidates = [(ys, 0)]  # (sequence, score)

    for _ in range(max_len-1):
        new_candidates = []
        for seq, score in candidates:
            if seq[-1] == EOS_IDX:
                new_candidates.append((seq, score))
                continue

            memory_mask = torch.zeros(seq.shape[0], memory.shape[0]).to(device).type(torch.bool)
            tgt_mask = generate_square_subsequent_mask(seq.size(0)).type(torch.bool).to(device)

            out = model.decode(seq, memory, tgt_mask)
            out = out.transpose(0, 1)
            logits = model.generator(out[:, -1])
            probs = F.softmax(logits, dim=-1)

            top_probs, top_idx = probs.topk(beam_width)
            for i in range(beam_width):
                new_seq = torch.cat([seq, top_idx[0][i].unsqueeze(0).unsqueeze(0)], dim=0)
                new_score = score + torch.log(top_probs[0][i]).item()
                new_candidates.append((new_seq, new_score))

        # Keep top beam_width candidates
        candidates = sorted(new_candidates, key=lambda x: x[1], reverse=True)[:beam_width]

    return candidates[0][0]'''

### Translation

In [None]:
def translate(model, src, src_vocab, tgt_vocab, src_tokenizer):
    model.eval()
    tokens = [BOS_IDX] + [src_vocab.get_stoi()[tok] for tok in src_tokenizer(src)]+ [EOS_IDX]
    num_tokens = len(tokens)
    src = (torch.LongTensor(tokens).reshape(num_tokens, 1) )
    src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool)
    tgt_tokens = greedy_decode(model,  src, src_mask, max_len=num_tokens + 5, start_symbol=BOS_IDX).flatten()
    return " ".join([tgt_vocab.get_itos()[tok] for tok in tgt_tokens]).replace("<bos>", "").replace("<eos>", "")

In [None]:
translate(transformer, "جتنے منہ اتنی باتیں", de_vocab, en_vocab, ur_tokenizer)

' More mouths will have more talks '

In [None]:
translate(transformer, "چور چور مسیرے بھائی", de_vocab, en_vocab, ur_tokenizer)

' Birds of same feather flock together '

In [None]:
translate(transformer, "اندھوں میں کانا راجہ", de_vocab, en_vocab, ur_tokenizer)

' A figure among cyphers '

In [None]:
translate(transformer, "نہ کامیابی حتمی ہوتی ہے اور نہ ہی ناکامی: بلکہ اصل چیز کوشش جاری رکھنے کا حوصلہ ہوتا ہے", de_vocab, en_vocab, ur_tokenizer)

' Success is not final, failure is not fatal: it is the courage to continue that counts '

In [None]:
# Using readlines()
file1 = open('/content/drive/MyDrive/myfolderUTE/urdu_train.txt', 'r',encoding='utf8')
Lines = file1.readlines()

file2=open('trans.txt','w',encoding='utf8')
count = 0
# Strips the newline character
for line in Lines:
    count += 1
    print(line.strip())
    var=translate(transformer, line.strip(), de_vocab, en_vocab, ur_tokenizer)
    print(var)
    file2.writelines(var)
    if count==25:
        break

لاس اینجلس نے سیزن شروع کرنے کے لئے سیدھے رات اور اپنے پہلے 14 میں سے 13 کھیل کھوئے ہیں۔
 Los Angeles has lost night straight and 13 of its first 14 games to start the season. 
آنکھ کا اندھا نام نین سکھ
 Opposite qualities of meaning of person's name 
کھسیانی بلی کھمبا نوچے
 To show anger after getting embarrassed 
چوری کا مال موری میں
 Money earned the wrong way will be taken away 
چھوٹا منہ بڑی بات
 To talk big without having a big position 
جتنے منہ اتنی باتیں
 More mouths will have more talks 
بہتی گنگا میں ہاتھ دھونا
 To use the available opportunity 
مان نہ مان میں تیرا مہمان
 An uninvited guest is never welcomed 
دور کے ڈھول سُہانے
 The grass is always greener on the other side 
گنگا گائے گنگا داس جمنا گائے جمنا داس
 A person of no principles 
گھر کا بھیدی لنکا ڈھائے
 Division is main reason for the damage 
ہاتھ کنگن کو آرسی کیا
 Evidence does not need proof 
دھوبی کا کتا نہ گھر کا نہ گھاٹ کا
 A person try to be on two sides goes nowhere 
انگور کھٹے ہیں
 Sour grapes 
دال میں ک

In [None]:
def bleu(data, model, urdu, english, device):
    targets = []
    outputs = []

    for example in data:
        src = vars(example)["src"]
        trg = vars(example)["trg"]

        prediction = translate(model, src, urdu, english, device)
        prediction = prediction[:-1]  # remove <eos> token

        targets.append([trg])
        outputs.append(prediction)

    return bleu_score(outputs, targets)

In [None]:
#score = bleu(test_dataset, transformer, ur_tokenizer, en_tokenizer, DEVICE)
#print(f"Bleu score {score:.2f}")

## Verifying Dataset

In [None]:
# Verify dataset alignment
with open('/content/drive/MyDrive/myfolderUTE/urdu_train.txt', 'r', encoding='utf-8') as f1, \
     open('/content/drive/MyDrive/myfolderUTE/english_train.txt', 'r', encoding='utf-8') as f2:
    for i, (urdu, eng) in enumerate(zip(f1, f2)):
        print(f"Pair {i+1}:")
        print(f"Urdu: {urdu.strip()}")
        print(f"English: {eng.strip()}")
        print("-----")
        if i > 10: break

Pair 1:
Urdu: لاس اینجلس نے سیزن شروع کرنے کے لئے سیدھے رات اور اپنے پہلے 14 میں سے 13 کھیل کھوئے ہیں۔
English: Los Angeles has lost night straight and 13 of its first 14 games to start the season.
-----
Pair 2:
Urdu: آنکھ کا اندھا نام نین سکھ
English: Opposite qualities of meaning of person's name
-----
Pair 3:
Urdu: کھسیانی بلی کھمبا نوچے
English: To show anger after getting embarrassed
-----
Pair 4:
Urdu: چوری کا مال موری میں
English: Money earned the wrong way will be taken away
-----
Pair 5:
Urdu: چھوٹا منہ بڑی بات
English: To talk big without having a big position
-----
Pair 6:
Urdu: جتنے منہ اتنی باتیں
English: More mouths will have more talks
-----
Pair 7:
Urdu: بہتی گنگا میں ہاتھ دھونا
English: To use the available opportunity
-----
Pair 8:
Urdu: مان نہ مان میں تیرا مہمان
English: Getting involved without having
-----
Pair 9:
Urdu: دور کے ڈھول سُہانے
English: The grass is always greener on the other side
-----
Pair 10:
Urdu: گنگا گائے گنگا داس جمنا گائے جمنا داس
English: A pe

## NLTK for METEOR Metric and BLEU Metric

In [None]:
!pip install nltk



In [None]:
# First, let's install required packages for METEOR
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt_tab')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
from nltk.translate.meteor_score import meteor_score
from nltk.tokenize import word_tokenize
import numpy as np
from torchtext.data.metrics import bleu_score as torchtext_bleu  # Renamed import

def translate_with_fallback(model, src, src_vocab, tgt_vocab, src_tokenizer):
    model.eval()
    # Handle unknown words by using <unk> token
    tokens = [BOS_IDX] + [src_vocab.get_stoi().get(tok, src_vocab['<unk>']) for tok in src_tokenizer(src)] + [EOS_IDX]
    num_tokens = len(tokens)
    src = (torch.LongTensor(tokens).reshape(num_tokens, 1))
    src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool)
    tgt_tokens = greedy_decode(model, src, src_mask, max_len=num_tokens + 5, start_symbol=BOS_IDX).flatten()
    return " ".join([tgt_vocab.get_itos()[tok] for tok in tgt_tokens]).replace("<bos>", "").replace("<eos>", "")

def calculate_bleu_and_meteor(num_examples=25):
    references = []
    hypotheses = []
    meteor_scores = []

    with open('/content/drive/MyDrive/myfolderUTE/urdu_test.txt', 'r', encoding='utf-8') as urdu_file, \
         open('/content/drive/MyDrive/myfolderUTE/english_test.txt', 'r', encoding='utf-8') as english_file:

        urdu_lines = urdu_file.readlines()[:num_examples]
        english_lines = english_file.readlines()[:num_examples]

        for urdu_line, english_line in zip(urdu_lines, english_lines):
            try:
                # Clean lines
                urdu_text = urdu_line.strip()
                english_text = english_line.strip()

                if not urdu_text or not english_text:
                    continue

                # Get translation with fallback for unknown words
                translated_text = translate_with_fallback(
                    transformer,
                    urdu_text,
                    de_vocab,
                    en_vocab,
                    ur_tokenizer
                )

                # Tokenize for metrics
                ref_tokens = word_tokenize(english_text.lower())
                hyp_tokens = word_tokenize(translated_text.lower())

                references.append([ref_tokens])
                hypotheses.append(hyp_tokens)

                # Calculate METEOR
                meteor_scores.append(meteor_score(
                    [ref_tokens],
                    hyp_tokens
                ))

                # Print some examples
                print(f"Urdu: {urdu_text}")
                print(f"Reference: {english_text}")
                print(f"Translation: {translated_text}")
                print("-----")

            except Exception as e:
                print(f"Error processing: {urdu_text}")
                print(f"Error: {str(e)}")
                continue

    # Calculate metrics
    bleu = torchtext_bleu(hypotheses, references)  # Use the renamed import
    avg_meteor = np.mean(meteor_scores) if meteor_scores else 0

    return bleu, avg_meteor

In [None]:
# Calculate metrics
final_bleu, final_meteor = calculate_bleu_and_meteor(num_examples=25)  # Changed variable names

print("\nFinal Scores:")
print(f"BLEU Score: {final_bleu:.4f}")
print(f"METEOR Score: {final_meteor:.4f}")

Urdu: جیتے ہیں ۔
Reference: wing your seed in the sea.
Translation:  Uneasy lies the head that wears a crown 
-----
Urdu: نُکتہ چینی آسان ہے ۔
Reference: Do not do today what you will repent of tomorrow.
Translation:  Uneasy lies the head that wears a crown 
-----
Urdu: ہر نسل میں نقائص ہوتے ہیں ۔
Reference: Do not effusively offer your right hand to everyone.
Translation:  Uneasy lies the head that wears a crown 
-----
Urdu: ہر مفکر و دانشور اچھا اُستاد نہیں ہو سکتا ۔
Reference: Do not entrust your all to one vessel.
Translation:  Uneasy lies the head that wears a crown 
-----
Urdu: کونسا گلزار ہے جِس میں خزاں آئی نہ ہو ۔
Reference: Do not expect friends to do for you what you can do for yourslef.
Translation:  Uneasy lies the head that wears a crown 
-----
Urdu: ہر طرف ۔ بکھرا ہوا ۔
Reference: Do not fan the dying embers.
Translation:  Uneasy lies the head that wears a crown 
-----
Urdu: ہر کمالے را زوال ! ۔
Reference: Do not fight against two adversaries.
Translation:  Uneasy lies t