## KHEEM DHARMANI 22I-0081
## VISHAL KUMAR 22I-2966

## Tokens (Languages)

In [2]:
#URDU NLP SPACY TOKENIZER
import spacy

nlp = spacy.blank('ur')

def urdu_tokens(inp):
   # print("The inp to urdu tokens is :",inp)
    doc=nlp(inp)
    lst=[]
    lst.clear()
    for word in doc:
        word=str(word)
        lst.append(word)

    return lst
    

In [3]:
# loading urdu object from spcay
nlp = spacy.blank('ur')
# generating token for input urdu text
def urdu_sen_tokens(inp):
   # print("The inp to urdu tokens is :",inp)
    doc=nlp(inp)
    doc=str(doc)
    doc=doc.replace("\n","")
    return [doc]

In [4]:
#loading english object from spacy
nlp = spacy.blank('en')
# generating token for input english text
def eng_sen_tokens(inp):
   # print("The inp to eng tokens is :",inp)
    doc=nlp(inp)
    doc=str(doc)
    #print("doc type is :",type(doc))
    
    #print("ret type is ",type([doc]))
    doc=doc.replace("\n","")
    return [doc]

In [5]:
import copy
from typing import Optional, Any, Union, Callable

import torch
import math
import time
import torch.nn as nn
from torch import Tensor
import torch.nn.functional as F
from torch.nn import Module
from torch.nn import MultiheadAttention
from torch.nn import ModuleList
from torch.nn.init import xavier_uniform_
from torch.nn import Dropout
from torch.nn import Linear
from torch.nn import LayerNorm


## Transformers Module 

In [6]:
#n_head =the number of heads in the multiheadattention models

#number of encoder decoder layers stacked up with each other

#dim_feedforward=Matrix of feedforward size

#dropout is applied on hidden and input layers to basically stop some neurons from working to handle overfitting

# Dropout is a technique where randomly selected neurons are ignored during training. They are “dropped-out” randomly. 
# This means that their contribution to the activation of downstream neurons is temporally removed on the forward pass and 
# any weight updates are not applied to the neuron on the backward pass.

# layer norm is applied in transformer and ep value represents the value that is added in denominator of normalization 
# for stability 

# if batch first is true then  input and output tensors are provided  as (batch, seq, feature).  else (seq,batch,feature)
# if ``True``, encoder and decoder layers will perform LayerNorms before other attention and feedforward operations, 
# otherwise after. Default: ``False`

class Transformer(Module):
   #d_model=size of input embeddings is just a vector represenation of the particular word

    def __init__(self, d_model: int = 512, nhead: int = 8, num_encoder_layers: int = 6,
                 num_decoder_layers: int = 6, dim_feedforward: int = 2048, dropout: float = 0.1,
                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
                 custom_encoder: Optional[Any] = None, custom_decoder: Optional[Any] = None,
                 layer_norm_eps: float = 1e-5, batch_first: bool = False, norm_first: bool = False,
                 device=None, dtype=None) -> None:
        
        factory_kwargs = {'device': device, 'dtype': dtype}
        super(Transformer, self).__init__()

        if custom_encoder is not None:
            self.encoder = custom_encoder
        else:
            encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout,
                                                    activation, layer_norm_eps, batch_first, norm_first,
                                                    **factory_kwargs)
            encoder_norm = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
            self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)

        if custom_decoder is not None:
            self.decoder = custom_decoder
        else:
            decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout,
                                                    activation, layer_norm_eps, batch_first, norm_first,
                                                    **factory_kwargs)
            decoder_norm = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
            self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm)

        self._reset_parameters()

        self.d_model = d_model
        self.nhead = nhead

        self.batch_first = batch_first

# just init , make encoder ,decoder,norm,layers
 
# feed forward = bunch of linear layers where relu is in between or any other activatin function(convolution layer)       

# logits = model(src, tgt_input, src_mask, tgt_mask,

# src_padding_mask, tgt_padding_mask, src_padding_mask)        
    def forward(self, src: Tensor, tgt: Tensor, src_mask: Optional[Tensor] = None, tgt_mask: Optional[Tensor] = None,
                memory_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None,
                tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None) -> Tensor:
        
        is_batched = src.dim() == 3
        if not self.batch_first and src.size(1) != tgt.size(1) and is_batched:
            raise RuntimeError("the batch number of src and tgt must be equal")
        elif self.batch_first and src.size(0) != tgt.size(0) and is_batched:
            raise RuntimeError("the batch number of src and tgt must be equal")

        if src.size(-1) != self.d_model or tgt.size(-1) != self.d_model:
            raise RuntimeError("the feature number of src and tgt must be equal to d_model")

        memory = self.encoder(src, mask=src_mask, src_key_padding_mask=src_key_padding_mask)#encoding forward pass
        output = self.decoder(tgt, memory, tgt_mask=tgt_mask, memory_mask=memory_mask,
                              tgt_key_padding_mask=tgt_key_padding_mask,
                              memory_key_padding_mask=memory_key_padding_mask)#decoding forward pass
        return output
    #the forward pass of decoder is our output

    @staticmethod
    def generate_square_subsequent_mask(sz: int) -> Tensor:
        return torch.triu(torch.full((sz, sz), float('-inf')), diagonal=1)

# Generate a square mask for the sequence. The masked positions are filled with float('-inf').
# Unmasked positions are filled with float(0.0).
# return that masked array

    def _reset_parameters(self):
        """Initiate parameters in the transformer model."""

        for p in self.parameters():
            if p.dim() > 1:
                xavier_uniform_(p)

# function of torch for xavier initiliaztion

## Transformer Encoder Module

In [7]:
# TransformerEncoder is a stack of N encoder layers
class TransformerEncoder(Module):

    __constants__ = ['norm']

# encoder_layer: an instance of the TransformerEncoderLayer() class (required).
# num_layers: the number of sub-encoder-layers in the encoder (required).
# norm: the layer normalization component (optional).
    def __init__(self, encoder_layer, num_layers, norm=None, enable_nested_tensor=True):
        super(TransformerEncoder, self).__init__()
        self.layers = _get_clones(encoder_layer, num_layers)
        self.num_layers = num_layers
        self.norm = norm
        self.enable_nested_tensor = enable_nested_tensor

# Pass the input through the encoder layers in turn.
# src: the sequence to the encoder (required).
# mask: the mask for the src sequence (optional).
# src mask=is to do -inf
# tgt mask=0
# memory mask= -inf to some mask
# src_key_padd_mask the ByteTensor mask for src keys per batch (optional). Since your src usually has different lengths sequences it's common to remove the padding vectors you appended at the end. For this you specify the length of each sequence per example in your batch. 
# src_key_padding_mask: the mask for the src keys per batch (optional).
# in this we just have to run the forward passs of encoder layer
    def forward(self, src: Tensor, mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None) -> Tensor:
        output = src
        convert_to_nested = False
        first_layer = self.layers[0]
        if isinstance(first_layer, torch.nn.TransformerEncoderLayer):
            if (not first_layer.norm_first and not first_layer.training and
                    first_layer.self_attn.batch_first and
                    first_layer.self_attn._qkv_same_embed_dim and first_layer.activation_relu_or_gelu and
                    first_layer.norm1.eps == first_layer.norm2.eps and
                    src.dim() == 3 and self.enable_nested_tensor) :
                if src_key_padding_mask is not None and not output.is_nested and mask is None:
                    tensor_args = (
                        src,
                        first_layer.self_attn.in_proj_weight,
                        first_layer.self_attn.in_proj_bias,
                        first_layer.self_attn.out_proj.weight,
                        first_layer.self_attn.out_proj.bias,
                        first_layer.norm1.weight,
                        first_layer.norm1.bias,
                        first_layer.norm2.weight,
                        first_layer.norm2.bias,
                        first_layer.linear1.weight,
                        first_layer.linear1.bias,
                        first_layer.linear2.weight,
                        first_layer.linear2.bias,
                    )
                    if not torch.overrides.has_torch_function(tensor_args):
                        if output.is_cuda or 'cpu' in str(output.device):
                            convert_to_nested = True
                            output = torch._nested_tensor_from_mask(output, src_key_padding_mask.logical_not())

        for mod in self.layers:
            if convert_to_nested:
                output = mod(output, src_mask=mask)
            else:
                output = mod(output, src_mask=mask, src_key_padding_mask=src_key_padding_mask)

        if convert_to_nested:
            output = output.to_padded_tensor(0.)

        if self.norm is not None:
            output = self.norm(output)

        return output


## Transformer Decoder Module

In [8]:
#TransformerDecoder is a stack of N Decoder layers
class TransformerDecoder(Module):
    __constants__ = ['norm']

    def __init__(self, decoder_layer, num_layers, norm=None):
        super(TransformerDecoder, self).__init__()
        self.layers = _get_clones(decoder_layer, num_layers)
        self.num_layers = num_layers
        self.norm = norm

    def forward(self, tgt: Tensor, memory: Tensor, tgt_mask: Optional[Tensor] = None,
                memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None,
                memory_key_padding_mask: Optional[Tensor] = None) -> Tensor:

        output = tgt

        for mod in self.layers:
            output = mod(output, memory, tgt_mask=tgt_mask,
                         memory_mask=memory_mask,
                         tgt_key_padding_mask=tgt_key_padding_mask,
                         memory_key_padding_mask=memory_key_padding_mask)

        if self.norm is not None:
            output = self.norm(output)

        return output


## Transformer Encoder Layer

In [9]:
class TransformerEncoderLayer(Module):
  
    __constants__ = ['batch_first', 'norm_first']

    def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout: float = 0.1,
                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
                 layer_norm_eps: float = 1e-5, batch_first: bool = False, norm_first: bool = False,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first,
                                            **factory_kwargs)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward, **factory_kwargs)#input features,output features
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model, **factory_kwargs)

        self.norm_first = norm_first
        self.norm1 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm2 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)

        # Legacy string support for activation function.
        if isinstance(activation, str):
            activation = _get_activation_fn(activation)

        if activation is F.relu:
            self.activation_relu_or_gelu = 1
        elif activation is F.gelu:
            self.activation_relu_or_gelu = 2
        else:
            self.activation_relu_or_gelu = 0
        self.activation = activation

    def __setstate__(self, state):
        if 'activation' not in state:
            state['activation'] = F.relu
        super(TransformerEncoderLayer, self).__setstate__(state)

    def forward(self, src: Tensor, src_mask: Optional[Tensor] = None,
                src_key_padding_mask: Optional[Tensor] = None) -> Tensor:

        if (src.dim() == 3 and not self.norm_first and not self.training and
            self.self_attn.batch_first and
            self.self_attn._qkv_same_embed_dim and self.activation_relu_or_gelu and
            self.norm1.eps == self.norm2.eps and
            ((src_mask is None and src_key_padding_mask is None)
             if src.is_nested
             else (src_mask is None or src_key_padding_mask is None))):
            tensor_args = (
                src,
                self.self_attn.in_proj_weight,
                self.self_attn.in_proj_bias,
                self.self_attn.out_proj.weight,
                self.self_attn.out_proj.bias,
                self.norm1.weight,
                self.norm1.bias,
                self.norm2.weight,
                self.norm2.bias,
                self.linear1.weight,
                self.linear1.bias,
                self.linear2.weight,
                self.linear2.bias,
            )##biases and weights
            if (not torch.overrides.has_torch_function(tensor_args) and
                    # We have to use a list comprehension here because TorchScript
                    # doesn't support generator expressions.
                    all([(x.is_cuda or 'cpu' in str(x.device)) for x in tensor_args]) and
                    (not torch.is_grad_enabled() or all([not x.requires_grad for x in tensor_args]))):
                return torch._transformer_encoder_layer_fwd(
                    src,
                    self.self_attn.embed_dim,
                    self.self_attn.num_heads,
                    self.self_attn.in_proj_weight,
                    self.self_attn.in_proj_bias,
                    self.self_attn.out_proj.weight,
                    self.self_attn.out_proj.bias,
                    self.activation_relu_or_gelu == 2,
                    False,  # norm_first, currently not supported
                    self.norm1.eps,
                    self.norm1.weight,
                    self.norm1.bias,
                    self.norm2.weight,
                    self.norm2.bias,
                    self.linear1.weight,
                    self.linear1.bias,
                    self.linear2.weight,
                    self.linear2.bias,
                    src_mask if src_mask is not None else src_key_padding_mask,
                )
        x = src
        if self.norm_first:
            x = x + self._sa_block(self.norm1(x), src_mask, src_key_padding_mask)
            x = x + self._ff_block(self.norm2(x))
        else:
            x = self.norm1(x + self._sa_block(x, src_mask, src_key_padding_mask))
            x = self.norm2(x + self._ff_block(x))

        return x

    # self-attention block
    def _sa_block(self, x: Tensor,
                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
        x = self.self_attn(x, x, x,
                           attn_mask=attn_mask,
                           key_padding_mask=key_padding_mask,
                           need_weights=False)[0]
        return self.dropout1(x)

    # feed forward block
    def _ff_block(self, x: Tensor) -> Tensor:
        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
        return self.dropout2(x)

## Transformer Deccoder Layer

In [10]:

class TransformerDecoderLayer(Module):
    __constants__ = ['batch_first', 'norm_first']

    def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout: float = 0.1,
                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
                 layer_norm_eps: float = 1e-5, batch_first: bool = False, norm_first: bool = False,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super(TransformerDecoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first,
                                            **factory_kwargs)
        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first,
                                                 **factory_kwargs)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward, **factory_kwargs)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model, **factory_kwargs)

        self.norm_first = norm_first
        self.norm1 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm2 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm3 = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
        self.dropout3 = Dropout(dropout)

        # Legacy string support for activation function.
        if isinstance(activation, str):
            self.activation = _get_activation_fn(activation)
        else:
            self.activation = activation

    def __setstate__(self, state):
        if 'activation' not in state:
            state['activation'] = F.relu
        super(TransformerDecoderLayer, self).__setstate__(state)

    def forward(self, tgt: Tensor, memory: Tensor, tgt_mask: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None,
                tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None) -> Tensor:

        x = tgt
        if self.norm_first:
            x = x + self._sa_block(self.norm1(x), tgt_mask, tgt_key_padding_mask)
            x = x + self._mha_block(self.norm2(x), memory, memory_mask, memory_key_padding_mask)
            x = x + self._ff_block(self.norm3(x))
        else:
            x = self.norm1(x + self._sa_block(x, tgt_mask, tgt_key_padding_mask))
            x = self.norm2(x + self._mha_block(x, memory, memory_mask, memory_key_padding_mask))
            x = self.norm3(x + self._ff_block(x))

        return x

    # self-attention block
    def _sa_block(self, x: Tensor,
                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
        x = self.self_attn(x, x, x,
                           attn_mask=attn_mask,
                           key_padding_mask=key_padding_mask,
                           need_weights=False)[0]
        return self.dropout1(x)

    # multihead attention block
    def _mha_block(self, x: Tensor, mem: Tensor,
                   attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
        x = self.multihead_attn(x, mem, mem,
                                attn_mask=attn_mask,
                                key_padding_mask=key_padding_mask,
                                need_weights=False)[0]
        return self.dropout2(x)

    # feed forward block
    def _ff_block(self, x: Tensor) -> Tensor:
        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
        return self.dropout3(x)


In [11]:
def _get_clones(module, N):
    return ModuleList([copy.deepcopy(module) for i in range(N)])

## Call Activation Functions

In [12]:
def _get_activation_fn(activation: str) -> Callable[[Tensor], Tensor]:
    if activation == "relu":
        return F.relu
    elif activation == "gelu":
        return F.gelu

    raise RuntimeError("activation should be relu/gelu, not {}".format(activation))

## Reading English Data

In [27]:
torch.manual_seed(0)
torch.use_deterministic_algorithms(True)

fileEnglish = open('English.txt', mode='rt', encoding='utf-8')
englishDataset = fileEnglish.read()
print(englishDataset[0:500])
file = open("English.txt", "r",encoding='utf-8')
x = 0
for line in file:

    if line != "\n":
        x += 1
    #print(line)
file.close()


Los Angeles has lost night straight and 13 of its first 14 games to start the season.
Opposite qualities of meaning of person's name
To show anger after getting embarrassed
Money earned the wrong way will be taken away
To talk big without having a big position
More mouths will have more talks
To use the available opportunity
Getting involved without having
The grass is always greener on the other side
A person of no principles
Division is main reason for the damage
Evidence does not need proof
A


In [24]:
'--NotebookApp.iopub_data_rate_limit=1000000000000000000'

'--NotebookApp.iopub_data_rate_limit=1000000000000000000'

## Reading Urdu Data

In [20]:
fileUrdu = open('Urdu.txt', mode='rt', encoding='utf-8')
urduDataset = fileUrdu.read()

file = open("Urdu.txt", "r",encoding='utf-8')
x = 0
for line in file:

    if line != "\n":
        x += 1
print((x))    
file.close()


100000


## Split Data (70,15,15) & Create Dataset For Each Split

In [28]:
train_size_en = int(0.70 * x)
test_size_en = int(0.15 * x)
val_size_en = int(0.15 * x)
#division on the basis of lines


train_dataset = englishDataset[0:train_size_en]
test_dataset = englishDataset[train_size_en+1:train_size_en+test_size_en]
val_dataset = englishDataset[train_size_en+test_size_en+1:x]


train_size_urdu= int(0.70 * x)
test_size_urdu = int(0.15 *x)
val_size_urdu = int(0.15 * x)

train_dataset2 = urduDataset[0:train_size_urdu]
test_dataset2 = urduDataset[train_size_urdu+1:train_size_urdu+test_size_urdu]
val_dataset2 = urduDataset[train_size_urdu+test_size_urdu+1:x]


## Generates Seperate Files For Each Split (English)

In [29]:
f=open("english_train.txt","w")

f.write(train_dataset)

f=open("english_test.txt","w")
f.write(test_dataset)

f=open("english_val.txt","w")
f.write(val_dataset)

14999

## Generates Seperate Files For Each Split (Urdu)

In [16]:
f=open("urdu_train.txt","w",encoding="utf-8")

f.write(train_dataset2)

f=open("urdu_test.txt","w",encoding="utf-8")
f.write(test_dataset2)

f=open("urdu_val.txt","w",encoding="utf-8")
f.write(val_dataset2)


14999

## Building Vocubalary

In [30]:
from collections import Counter
import io
from torchtext.vocab import vocab

de_tokenizer = urdu_sen_tokens
en_tokenizer = eng_sen_tokens

# get_tokenizer('spacy', language='en_core_web_sm')
# so the tokenizer here points to the function of tokenizer
# which then gets passed to build vocab function 
# and in the tokenizer we pass the line


# Building voabulary on the basis of its frequency

def build_vocab(filepath, tokenizer):
  counter = Counter() # counter builds dictionary of word with its frequencies
  with io.open(filepath,encoding="utf8",errors='ignore' ) as f:
    for string_ in f:
      #print(tokenizer(string_))
      counter.update(tokenizer(string_))
  return vocab(counter,specials = ['<unk>', '<pad>', '<bos>', '<eos>'])

main_filepaths = ["English.txt","Urdu.txt"]
train_filepaths = ["english_train.txt","urdu_train.txt"]
val_filepaths = ["english_val.txt","urdu_val.txt"]
test_filepaths = ["english_test.txt","urdu_test.txt"]
en_vocab = build_vocab(train_filepaths[0], en_tokenizer)
de_vocab = build_vocab(train_filepaths[1], de_tokenizer)

In [51]:
print(de_vocab)

Vocab()


## Data Processing

In [71]:
def data_process(filepaths):
  raw_de_iter = iter(io.open(filepaths[1], encoding="utf8",errors='ignore'))#created iterator for urdu file
  raw_en_iter = iter(io.open(filepaths[0], encoding="utf8",errors='ignore'))#created english iterator for english file
  data = []
  for (raw_de, raw_en) in zip(raw_de_iter, raw_en_iter):#use zip to make alignment between files
        #such as Los angeles with los angeles
        #raw_de ,raw_en is for accessing that tuple(las angeles in urdu,los angeles in englus)
    de_tensor_ = torch.tensor([de_vocab[token] for token in de_tokenizer(raw_de.rstrip("\n"))],
                            dtype=torch.long)
    #full urdu sentence with \n where the eol character is remover using de_strip is tokenized and each token is search in the vocabulary
    #whereas the voabulary is containing the embedding on the basis of frequency
    en_tensor_ = torch.tensor([en_vocab[token] for token in en_tokenizer(raw_en.rstrip("\n"))],
                            dtype=torch.long)
    data.append((de_tensor_, en_tensor_))
    #print(data[0:10])
  return data

## Mapping Train, Test & Validation Data on Data Process

In [46]:
de_vocab.set_default_index(0)
en_vocab.set_default_index(0) #if no index is found then 0 is returned
train_data = data_process(train_filepaths)
val_data = data_process(val_filepaths)
test_data = data_process(test_filepaths)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


[(tensor([4]), tensor([4]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor(

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor([8])), (tensor([9]), tensor([9])), (tensor([10]), tensor([10])), (tensor([11]), tensor([11])), (tensor([12]), tensor([12])), (tensor([13]), tensor([13]))]
[(tensor([4]), tensor([4])), (tensor([5]), tensor([5])), (tensor([6]), tensor([6])), (tensor([7]), tensor([7])), (tensor([8]), tensor

[(tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0])

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), te

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), te

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), te

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), te

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), te

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), te

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), te

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), te

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), te

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), te

[(tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0])

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor

[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([2013]), tensor([0])), (tensor([0]), tensor([0]))]
[(tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor([0]), tensor([0])), (tensor

## Setting Bacth Size & Special Tokens

In [52]:
BATCH_SIZE = 128

PAD_IDX = de_vocab['<pad>'] #padding in sentence
BOS_IDX = de_vocab['<bos>'] #beggining of sentence
EOS_IDX = de_vocab['<eos>'] #representing end of sentence

## Creating Dataloaders For Each Dataset

In [60]:
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

# basically concatenatnation start , end, pad and the tensor to make a element and append it in the batch
def generate_batch(data_batch):
  de_batch, en_batch = [], []
  for (de_item, en_item) in data_batch:
    de_batch.append(torch.cat([torch.tensor([BOS_IDX]), de_item, torch.tensor([EOS_IDX])], dim=0))
    # torch.cat concatenates the tensors
    en_batch.append(torch.cat([torch.tensor([BOS_IDX]), en_item, torch.tensor([EOS_IDX])], dim=0))
  de_batch = pad_sequence(de_batch, padding_value=PAD_IDX)
  en_batch = pad_sequence(en_batch, padding_value=PAD_IDX)
  return de_batch, en_batch

train_iter = DataLoader(train_data, batch_size=BATCH_SIZE,
                        shuffle=True, collate_fn=generate_batch) # dividing the data in batches with the batch size specified
valid_iter = DataLoader(val_data, batch_size=BATCH_SIZE,
                        shuffle=True, collate_fn=generate_batch)
test_iter = DataLoader(test_data, batch_size=BATCH_SIZE,
                       shuffle=True, collate_fn=generate_batch)
#print(train_iter)

## Seq2Seq Transformer Module

In [69]:
class Seq2SeqTransformer(nn.Module):
    def __init__(self, num_encoder_layers: int, num_decoder_layers: int,
                 emb_size: int, src_vocab_size: int, tgt_vocab_size: int,
                 dim_feedforward:int = 512, dropout:float = 0.1):
        super(Seq2SeqTransformer, self).__init__()
        encoder_layer = TransformerEncoderLayer(d_model=emb_size, nhead=NHEAD,
                                                dim_feedforward=dim_feedforward)
        self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        decoder_layer = TransformerDecoderLayer(d_model=emb_size, nhead=NHEAD,
                                                dim_feedforward=dim_feedforward)
        self.transformer_decoder = TransformerDecoder(decoder_layer, num_layers=num_decoder_layers)

        self.generator = nn.Linear(emb_size, tgt_vocab_size)
        self.src_tok_emb = TokenEmbedding(src_vocab_size, emb_size)
        self.tgt_tok_emb = TokenEmbedding(tgt_vocab_size, emb_size)
        self.positional_encoding = PositionalEncoding(emb_size, dropout=dropout)

    def forward(self, src: Tensor, trg: Tensor, src_mask: Tensor,
                tgt_mask: Tensor, src_padding_mask: Tensor,
                tgt_padding_mask: Tensor, memory_key_padding_mask: Tensor):
        src_emb = self.positional_encoding(self.src_tok_emb(src))
        #print(src_emb)
        tgt_emb = self.positional_encoding(self.tgt_tok_emb(trg))
        #print(tgt-_emb)
        memory = self.transformer_encoder(src_emb, src_mask, src_padding_mask)
        #print(memory)
        outs = self.transformer_decoder(tgt_emb, memory, tgt_mask, None,
                                        tgt_padding_mask, memory_key_padding_mask)
        #print(outs)
        return self.generator(outs)

    def encode(self, src: Tensor, src_mask: Tensor):
        return self.transformer_encoder(self.positional_encoding(
                            self.src_tok_emb(src)), src_mask)

    def decode(self, tgt: Tensor, memory: Tensor, tgt_mask: Tensor):
        return self.transformer_decoder(self.positional_encoding(
                          self.tgt_tok_emb(tgt)), memory,
                          tgt_mask)

## Positional Encoding

### Text tokens are represented by using token embeddings. Positional encoding is added to the token embedding to introduce a notion of word order.




In [62]:
# Values for Positional Encoding PE(pos,i)=sin(pos/10000**2i/d)
# i is the index of the word 
# and pos is the position
# where d=size of embeddings
# pos 0 means the first positional embedding
# pos 1 means the 2nd and so on
# and i is the index in position embedding we are filling
class PositionalEncoding(nn.Module):
    def __init__(self, emb_size: int, dropout, maxlen: int = 5000):
        super(PositionalEncoding, self).__init__()
        den = torch.exp(- torch.arange(0, emb_size, 2) * math.log(10000) / emb_size)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        pos_embedding = torch.zeros((maxlen, emb_size))
        pos_embedding[:, 0::2] = torch.sin(pos * den)
        pos_embedding[:, 1::2] = torch.cos(pos * den)
        pos_embedding = pos_embedding.unsqueeze(-2)

        self.dropout = nn.Dropout(dropout)
        self.register_buffer('pos_embedding', pos_embedding)

    def forward(self, token_embedding: Tensor):
        return self.dropout(token_embedding + 
                            self.pos_embedding[:token_embedding.size(0),:])

## Token Embeddings

In [63]:
class TokenEmbedding(nn.Module):
    def __init__(self, vocab_size: int, emb_size):
        super(TokenEmbedding, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.emb_size = emb_size
    def forward(self, tokens: Tensor):
        return self.embedding(tokens.long()) * math.sqrt(self.emb_size)

### We create a ``subsequent word`` mask to stop a target word from attending to its subsequent words. We also create masks, for masking source and target padding tokens

In [64]:
def generate_square_subsequent_mask(sz):
    mask = (torch.triu(torch.ones((sz, sz), device=DEVICE)) == 1).transpose(0, 1)
    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
    return mask

def create_mask(src, tgt):
  src_seq_len = src.shape[0]
  tgt_seq_len = tgt.shape[0]

  tgt_mask = generate_square_subsequent_mask(tgt_seq_len)
  src_mask = torch.zeros((src_seq_len, src_seq_len), device=DEVICE).type(torch.bool)

  src_padding_mask = (src == PAD_IDX).transpose(0, 1)
  tgt_padding_mask = (tgt == PAD_IDX).transpose(0, 1)
  return src_mask, tgt_mask, src_padding_mask, tgt_padding_mask

### Define model parameters and instantiate model 

In [65]:
SRC_VOCAB_SIZE = len(de_vocab)
TGT_VOCAB_SIZE = len(en_vocab)

EMB_SIZE = 512

NHEAD = 8

FFN_HID_DIM = 512

BATCH_SIZE = 128

NUM_ENCODER_LAYERS = 3

NUM_DECODER_LAYERS = 3

NUM_EPOCHS = 16

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

transformer = Seq2SeqTransformer(NUM_ENCODER_LAYERS, NUM_DECODER_LAYERS,
                                 EMB_SIZE, SRC_VOCAB_SIZE, TGT_VOCAB_SIZE,
                                 FFN_HID_DIM)

for p in transformer.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

transformer = transformer.to(device)

loss_fn = torch.nn.CrossEntropyLoss(ignore_index=PAD_IDX)

optimizer = torch.optim.Adam(
    transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9
)

## Training Phase

In [66]:
def train_epoch(model, train_iter, optimizer):
  model.train()
  losses = 0
  for idx, (src, tgt) in enumerate(train_iter):

      src = src.to(device)
      tgt = tgt.to(device)
            
      tgt_input = tgt[:-1, :]

      src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_input)

      logits = model(src, tgt_input, src_mask, tgt_mask,src_padding_mask, tgt_padding_mask, src_padding_mask)
      
      optimizer.zero_grad()
      
      tgt_out = tgt[1:,:]
      loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
      loss.backward()

      optimizer.step()
      losses += loss.item()
  return losses / len(train_iter)

# cross entropy loss with masking and unmasking?
# for mask filter we make a matrix of target_size^target_size
# and pass 0 and negative inf
# add it with attention filter to make mask-attention filter


## Evaluation Phase

In [67]:
def evaluate(model, val_iter):
  model.eval()
  losses = 0
  for idx, (src, tgt) in (enumerate(valid_iter)):
    src = src.to(device)
    tgt = tgt.to(device)

    tgt_input = tgt[:-1, :]

    src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_input)

    logits = model(src, tgt_input, src_mask, tgt_mask,
                              src_padding_mask, tgt_padding_mask, src_padding_mask)
    tgt_out = tgt[1:,:]
    loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
    losses += loss.item()
  return losses / len(val_iter)

## Train model 

In [98]:
from nltk.translate.bleu_score import sentence_bleu
NUM_EPOCHS = 3

for epoch in range(1, NUM_EPOCHS+1): 
    start_time = time.time()
    train_loss = train_epoch(transformer, train_iter, optimizer)
    end_time = time.time()
    val_loss = evaluate(transformer, valid_iter)
    print((f"Epoch: {epoch}, Train loss: {train_loss:.3f}, Val loss: {val_loss:.3f}, "
          f"Epoch time = {(end_time - start_time):.3f}s"))

    #in each iteration calculate bleu score

tensor([[[ 1.1505e+00,  7.8692e-01,  5.3196e-01,  ...,  6.5408e-02,
           1.1184e+00,  3.2086e-01],
         [ 1.1505e+00,  7.8692e-01,  5.3196e-01,  ...,  6.5408e-02,
           1.1184e+00,  3.2086e-01],
         [ 1.1505e+00,  7.8692e-01,  5.3196e-01,  ...,  6.5408e-02,
           1.1184e+00,  3.2086e-01],
         ...,
         [ 1.1505e+00,  0.0000e+00,  5.3196e-01,  ...,  6.5408e-02,
           0.0000e+00,  3.2086e-01],
         [ 0.0000e+00,  7.8692e-01,  5.3196e-01,  ...,  6.5408e-02,
           1.1184e+00,  3.2086e-01],
         [ 1.1505e+00,  7.8692e-01,  5.3196e-01,  ...,  6.5408e-02,
           1.1184e+00,  3.2086e-01]],

        [[ 1.8849e+00, -3.3139e-01,  1.6145e+00,  ...,  1.6773e+00,
           1.0465e+00,  5.8182e-01],
         [ 2.0028e+00,  3.6180e-01,  6.3695e-01,  ...,  2.0972e-01,
          -4.5555e-01,  1.5619e+00],
         [ 2.7647e-01,  6.0356e-01,  1.0200e+00,  ...,  1.8586e+00,
           4.7895e-01,  2.0002e+00],
         ...,
         [ 1.8413e+00,  5

tensor([[[ 1.1541,  0.7908,  0.5322,  ...,  0.0575,  1.1139,  0.3246],
         [ 1.1541,  0.7908,  0.5322,  ...,  0.0000,  1.1139,  0.3246],
         [ 1.1541,  0.7908,  0.5322,  ...,  0.0575,  1.1139,  0.0000],
         ...,
         [ 1.1541,  0.7908,  0.5322,  ...,  0.0000,  1.1139,  0.3246],
         [ 1.1541,  0.7908,  0.5322,  ...,  0.0575,  1.1139,  0.3246],
         [ 1.1541,  0.7908,  0.5322,  ...,  0.0575,  1.1139,  0.3246]],

        [[ 0.4694,  0.4180,  1.0968,  ...,  1.5636, -0.0000,  1.8314],
         [ 1.5635,  1.7385,  0.1745,  ...,  2.1352, -0.5067,  0.0000],
         [-0.0138,  1.1985,  0.1021,  ...,  1.8794, -0.0000,  0.0000],
         ...,
         [ 0.0000,  0.8899,  0.0000,  ...,  1.6680, -0.0000,  1.8082],
         [ 0.2168, -0.2286,  0.8936,  ...,  0.1532,  1.0451,  2.1459],
         [ 1.3168,  0.1534,  0.2264,  ...,  0.2265,  0.3500,  0.0977]],

        [[ 1.6359, -0.0082,  0.1288,  ...,  0.4351, -0.0000,  1.8833],
         [ 1.6359, -0.0082,  0.0000,  ...,  0

tensor([[[ 1.1477,  0.7866,  0.5275,  ...,  0.0505,  0.0000,  0.3284],
         [ 1.1477,  0.7866,  0.5275,  ...,  0.0505,  1.1016,  0.0000],
         [ 1.1477,  0.7866,  0.5275,  ...,  0.0505,  1.1016,  0.3284],
         ...,
         [ 1.1477,  0.7866,  0.5275,  ...,  0.0505,  1.1016,  0.3284],
         [ 1.1477,  0.7866,  0.5275,  ...,  0.0505,  1.1016,  0.3284],
         [ 1.1477,  0.0000,  0.5275,  ...,  0.0505,  1.1016,  0.3284]],

        [[ 0.2612,  1.1026, -0.0000,  ...,  2.2597, -0.2933,  2.0872],
         [ 1.2030,  0.6257,  1.2251,  ...,  1.3530,  0.9398,  1.5023],
         [ 0.5480,  0.2391,  0.6008,  ...,  1.5285, -0.0036,  0.0941],
         ...,
         [ 0.5961,  1.0201, -0.2368,  ...,  1.0740, -0.7199,  0.8854],
         [-0.0288,  0.6859,  1.5427,  ...,  1.9566,  0.0000,  1.6933],
         [ 0.1008, -0.1947,  1.9490,  ...,  2.0458, -0.8743,  0.0881]],

        [[ 0.0000, -0.0034,  0.1267,  ...,  0.4385, -0.0447,  1.8775],
         [ 1.6371, -0.0034,  0.1267,  ...,  0

tensor([[[ 1.0283e+00,  7.0390e-01,  4.7075e-01,  ...,  4.0353e-02,
           9.9058e-01,  2.9537e-01],
         [ 1.0283e+00,  7.0390e-01,  4.7075e-01,  ...,  4.0353e-02,
           9.9058e-01,  2.9537e-01],
         [ 1.0283e+00,  7.0390e-01,  4.7075e-01,  ...,  4.0353e-02,
           9.9058e-01,  2.9537e-01],
         ...,
         [ 1.0283e+00,  7.0390e-01,  4.7075e-01,  ...,  4.0353e-02,
           9.9058e-01,  2.9537e-01],
         [ 1.0283e+00,  7.0390e-01,  4.7075e-01,  ...,  4.0353e-02,
           9.9058e-01,  2.9537e-01],
         [ 1.0283e+00,  7.0390e-01,  4.7075e-01,  ...,  4.0353e-02,
           9.9058e-01,  2.9537e-01]],

        [[ 1.1354e-01,  3.0956e-01,  7.0548e-02,  ...,  1.5504e+00,
           8.1091e-01,  6.8720e-01],
         [ 1.1354e-01,  3.0956e-01,  7.0548e-02,  ...,  1.5504e+00,
           8.1091e-01,  6.8720e-01],
         [ 1.1354e-01,  3.0956e-01,  7.0548e-02,  ...,  1.5504e+00,
           8.1091e-01,  6.8720e-01],
         ...,
         [ 1.1354e-01,  3

tensor([[[ 1.1418e+00,  7.8106e-01,  5.2262e-01,  ...,  4.4623e-02,
           1.1007e+00,  3.2811e-01],
         [ 1.1418e+00,  7.8106e-01,  5.2262e-01,  ...,  4.4623e-02,
           1.1007e+00,  3.2811e-01],
         [ 1.1418e+00,  7.8106e-01,  5.2262e-01,  ...,  4.4623e-02,
           1.1007e+00,  3.2811e-01],
         ...,
         [ 1.1418e+00,  7.8106e-01,  0.0000e+00,  ...,  4.4623e-02,
           1.1007e+00,  3.2811e-01],
         [ 1.1418e+00,  7.8106e-01,  5.2262e-01,  ...,  4.4623e-02,
           1.1007e+00,  3.2811e-01],
         [ 1.1418e+00,  7.8106e-01,  5.2262e-01,  ...,  4.4623e-02,
           1.1007e+00,  3.2811e-01]],

        [[ 1.4146e+00,  7.0555e-01,  6.8856e-01,  ...,  3.5392e-02,
           5.8163e-01,  2.2559e-01],
         [ 0.0000e+00,  1.4998e+00,  0.0000e+00,  ...,  1.1539e+00,
          -0.0000e+00,  1.5381e+00],
         [ 4.5175e-01,  5.2455e-01,  9.9984e-01,  ...,  6.5531e-01,
           4.6410e-01,  2.1437e+00],
         ...,
         [ 5.5650e-01,  1

tensor([[[ 1.1391,  0.7750,  0.0000,  ...,  0.0427,  1.0990,  0.3268],
         [ 1.1391,  0.7750,  0.5194,  ...,  0.0427,  1.0990,  0.3268],
         [ 1.1391,  0.7750,  0.5194,  ...,  0.0427,  1.0990,  0.3268],
         ...,
         [ 1.1391,  0.7750,  0.5194,  ...,  0.0427,  1.0990,  0.3268],
         [ 1.1391,  0.7750,  0.5194,  ...,  0.0427,  1.0990,  0.3268],
         [ 1.1391,  0.7750,  0.5194,  ...,  0.0427,  1.0990,  0.3268]],

        [[ 2.0435,  1.4902,  0.1253,  ...,  1.3622, -1.1070,  0.6905],
         [ 1.8121,  1.1668,  0.9309,  ...,  0.0000, -0.5007,  2.1063],
         [ 0.4368,  0.0584, -0.0261,  ...,  0.6188,  0.6552,  1.8332],
         ...,
         [ 0.4674,  1.1166,  0.3135,  ...,  0.1365,  0.5892,  1.1030],
         [ 0.4847, -0.0694,  0.8901,  ...,  1.7835, -0.2134,  0.0926],
         [ 0.1835,  0.3541, -0.2529,  ...,  1.5540, -0.4787,  2.0271]],

        [[ 1.6359,  0.0070,  0.1182,  ...,  0.4356, -0.0411,  0.0000],
         [ 1.6359,  0.0070,  0.0000,  ...,  0

tensor([[[ 1.1386,  0.0000,  0.5155,  ...,  0.0434,  1.0971,  0.3266],
         [ 1.1386,  0.7777,  0.5155,  ...,  0.0434,  1.0971,  0.0000],
         [ 1.1386,  0.7777,  0.5155,  ...,  0.0434,  1.0971,  0.3266],
         ...,
         [ 1.1386,  0.7777,  0.5155,  ...,  0.0434,  1.0971,  0.3266],
         [ 0.0000,  0.7777,  0.5155,  ...,  0.0434,  1.0971,  0.3266],
         [ 1.1386,  0.7777,  0.5155,  ...,  0.0434,  1.0971,  0.3266]],

        [[ 1.3856, -0.6220,  1.6378,  ...,  2.0346, -0.5818,  0.6940],
         [-0.1655,  0.9486,  1.9178,  ...,  1.1204, -0.4924,  0.0798],
         [ 0.0000,  0.2346,  1.5475,  ...,  0.6557,  0.8280,  0.8032],
         ...,
         [ 0.0000,  0.9360,  0.3207,  ...,  0.0251, -1.0856,  1.8419],
         [ 1.9755,  1.0362,  1.3937,  ...,  0.5616,  0.0000,  0.1287],
         [ 0.0000,  1.1470,  0.0000,  ...,  1.9578,  0.3874,  0.1351]],

        [[ 1.6388,  0.0107,  0.1170,  ...,  0.4319, -0.0000,  1.8596],
         [ 1.6388,  0.0107,  0.1170,  ...,  0

tensor([[[ 1.0237,  0.6972,  0.4601,  ...,  0.0381,  0.9853,  0.3000],
         [ 1.0237,  0.6972,  0.4601,  ...,  0.0381,  0.9853,  0.3000],
         [ 1.0237,  0.6972,  0.4601,  ...,  0.0381,  0.9853,  0.3000],
         ...,
         [ 1.0237,  0.6972,  0.4601,  ...,  0.0381,  0.9853,  0.3000],
         [ 1.0237,  0.6972,  0.4601,  ...,  0.0381,  0.9853,  0.3000],
         [ 1.0237,  0.6972,  0.4601,  ...,  0.0381,  0.9853,  0.3000]],

        [[ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872],
         [ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872],
         [ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872],
         ...,
         [ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872],
         [ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872],
         [ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872]],

        [[ 1.4759,  0.0046,  0.1041,  ...,  0.3855, -0.0378,  1.6703],
         [ 1.4759,  0.0046,  0.1041,  ...,  0

tensor([[[ 0.0000,  0.7713,  0.5077,  ...,  0.0424,  1.0930,  0.0000],
         [ 1.1363,  0.7713,  0.5077,  ...,  0.0424,  1.0930,  0.3416],
         [ 1.1363,  0.7713,  0.5077,  ...,  0.0424,  1.0930,  0.3416],
         ...,
         [ 1.1363,  0.7713,  0.5077,  ...,  0.0424,  1.0930,  0.3416],
         [ 1.1363,  0.7713,  0.5077,  ...,  0.0000,  1.0930,  0.3416],
         [ 1.1363,  0.0000,  0.5077,  ...,  0.0424,  1.0930,  0.3416]],

        [[ 0.8806,  1.0816,  2.0173,  ...,  1.2557, -0.9873,  0.0000],
         [-0.1640,  0.8317,  0.0000,  ...,  1.1307,  0.8633,  2.0212],
         [ 0.0000, -0.2368,  1.9078,  ...,  2.0919, -1.0438,  2.1147],
         ...,
         [ 1.8956, -0.4437,  1.0126,  ...,  0.2499, -1.0775,  0.4142],
         [ 1.3305,  1.4410,  1.0863,  ...,  1.5172, -0.2186,  1.3974],
         [ 1.5829,  0.1776,  0.4443,  ...,  0.2501,  0.6078,  2.1165]],

        [[ 1.6403,  0.0032,  0.0000,  ...,  0.4277, -0.0406,  1.8557],
         [ 1.6403,  0.0000,  0.1167,  ...,  0

tensor([[[ 1.1262,  0.7707,  0.0000,  ...,  0.0455,  1.0919,  0.3451],
         [ 0.0000,  0.7707,  0.5025,  ...,  0.0455,  1.0919,  0.3451],
         [ 1.1262,  0.7707,  0.5025,  ...,  0.0455,  0.0000,  0.3451],
         ...,
         [ 1.1262,  0.7707,  0.5025,  ...,  0.0455,  1.0919,  0.3451],
         [ 1.1262,  0.7707,  0.5025,  ...,  0.0455,  1.0919,  0.3451],
         [ 1.1262,  0.7707,  0.5025,  ...,  0.0455,  1.0919,  0.3451]],

        [[ 0.7744,  1.4387,  0.5464,  ...,  1.9230,  0.4114,  2.1556],
         [-0.0821,  1.8112,  1.4100,  ...,  1.1840, -0.1602,  2.0674],
         [ 0.3310,  0.0000,  1.0043,  ...,  1.1368, -0.0000,  0.0000],
         ...,
         [ 0.4131,  0.0241,  0.0000,  ...,  0.5755, -0.8642,  1.1892],
         [ 0.1263,  1.3657,  0.2572,  ...,  0.3436,  0.1284,  0.0000],
         [ 0.5576,  0.1701,  1.6836,  ...,  0.7563,  0.6253,  1.9125]],

        [[ 1.6355,  0.0063,  0.1159,  ...,  0.4264, -0.0408,  0.0000],
         [ 1.6355,  0.0063,  0.1159,  ...,  0

tensor([[[ 1.0106,  0.6939,  0.4449,  ...,  0.0386,  0.9790,  0.3104],
         [ 1.0106,  0.6939,  0.4449,  ...,  0.0386,  0.9790,  0.3104],
         [ 1.0106,  0.6939,  0.4449,  ...,  0.0386,  0.9790,  0.3104],
         ...,
         [ 1.0106,  0.6939,  0.4449,  ...,  0.0386,  0.9790,  0.3104],
         [ 1.0106,  0.6939,  0.4449,  ...,  0.0386,  0.9790,  0.3104],
         [ 1.0106,  0.6939,  0.4449,  ...,  0.0386,  0.9790,  0.3104]],

        [[ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872],
         [ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872],
         [ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872],
         ...,
         [ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872],
         [ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872],
         [ 0.1135,  0.3096,  0.0705,  ...,  1.5504,  0.8109,  0.6872]],

        [[ 1.4653,  0.0057,  0.0994,  ...,  0.3806, -0.0358,  1.6539],
         [ 1.4653,  0.0057,  0.0994,  ...,  0

## Decoding

In [101]:
from torchtext.data.metrics import bleu_score


In [102]:
def calc_bleu_score(translations, references):
    translations_formatted = [translation.split() for translation in translations]
    references_formatted = [[translation.split()] for translation in references]
    return bleu_score(translations_formatted, references_formatted)


In [107]:
def calc_bleu_score_from_file(filename):
  with open(f'english_train.txt') as file:
      translations = file.readlines()

  return calc_bleu_score(translations=translations, references=en_vocab)


In [108]:
print(calc_bleu_score_from_file(filename='beam_beam5_translations.txt'))

TypeError: __getitem__(): incompatible function arguments. The following argument types are supported:
    1. (self: torchtext._torchtext.Vocab, arg0: str) -> int

Invoked with: <torchtext._torchtext.Vocab object at 0x0000024ED2721FB0>, 0

In [81]:
def greedy_decode(model, src, src_mask, max_len, start_symbol):
    src = src.to(device)
    src_mask = src_mask.to(device)

    memory = model.encode(src, src_mask)
    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).to(device)
    for i in range(max_len-1):
        memory = memory.to(device)
        memory_mask = torch.zeros(ys.shape[0], memory.shape[0]).to(device).type(torch.bool)
        tgt_mask = (generate_square_subsequent_mask(ys.size(0))
                                    .type(torch.bool)).to(device)
        out = model.decode(ys, memory, tgt_mask)
        out = out.transpose(0, 1)
        prob = model.generator(out[:, -1])
        #print(prob)
        _, next_word = torch.max(prob, dim = 1)
        next_word = next_word.item()

        ys = torch.cat([ys,
                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
        if next_word == EOS_IDX:
          break
    return ys

## Translation 

In [82]:
def translate(model, src, src_vocab, tgt_vocab, src_tokenizer):
  model.eval()
  tokens = [BOS_IDX] + [src_vocab.get_stoi()[tok] for tok in src_tokenizer(src)]+ [EOS_IDX]
  num_tokens = len(tokens)
  src = (torch.LongTensor(tokens).reshape(num_tokens, 1) )
  src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool)
  tgt_tokens = greedy_decode(model,  src, src_mask, max_len=num_tokens + 5, start_symbol=BOS_IDX).flatten()
  return " ".join([tgt_vocab.get_itos()[tok] for tok in tgt_tokens]).replace("<bos>", "").replace("<eos>", "")

In [83]:
translate(transformer, "جتنے منہ اتنی باتیں", de_vocab, en_vocab, de_tokenizer)

tensor([[-1.5569, -1.9675, -1.8646,  ..., -0.2607,  0.0160,  0.3012]],
       grad_fn=<AddmmBackward0>)


' '

In [79]:
translate(transformer, "چور چور مسیرے بھائی", de_vocab, en_vocab, de_tokenizer)

tensor([[-1.5648, -2.0226, -1.9407,  ..., -0.2604, -0.0215,  0.3111]],
       grad_fn=<AddmmBackward0>)


' '

In [84]:
translate(transformer, "اندھوں میں کانا راجہ", de_vocab, en_vocab, de_tokenizer)

tensor([[-1.6195, -1.9894, -1.9117,  ..., -0.2289, -0.0105,  0.3811]],
       grad_fn=<AddmmBackward0>)


' '

In [85]:
translate(transformer, "نہ کامیابی حتمی ہوتی ہے اور نہ ہی ناکامی: بلکہ اصل چیز کوشش جاری رکھنے کا حوصلہ ہوتا ہے", de_vocab, en_vocab, de_tokenizer)

tensor([[-1.5671, -1.9501, -1.9125,  ..., -0.2018, -0.0152,  0.2737]],
       grad_fn=<AddmmBackward0>)


' '

In [86]:
# Using readlines()
file1 = open('urdu_train.txt', 'r',encoding='utf8')
Lines = file1.readlines()
  
file2=open('trans.txt','w',encoding='utf8')
count = 0
# Strips the newline character
for line in Lines:
    count += 1
    print(line.strip())
    var=translate(transformer, line.strip(), de_vocab, en_vocab, de_tokenizer)
    print(var)
    file2.writelines(var)
    if count==25:
        break

لاس اینجلس نے سیزن شروع کرنے کے لئے سیدھے رات اور اپنے پہلے 14 میں سے 13 کھیل کھوئے ہیں۔
tensor([[-1.5782, -1.9660, -1.8787,  ..., -0.2001, -0.0545,  0.3672]],
       grad_fn=<AddmmBackward0>)
 
آنکھ کا اندھا نام نین سکھ
tensor([[-1.6177, -1.9464, -1.8673,  ..., -0.2233,  0.0033,  0.2748]],
       grad_fn=<AddmmBackward0>)
 
کھسیانی بلی کھمبا نوچے
tensor([[-1.5923, -1.9341, -1.8842,  ..., -0.2371, -0.0300,  0.2807]],
       grad_fn=<AddmmBackward0>)
 
چوری کا مال موری میں
tensor([[-1.5357, -2.0384, -1.9141,  ..., -0.2708, -0.0568,  0.2822]],
       grad_fn=<AddmmBackward0>)
 
چھوٹا منہ بڑی بات
tensor([[-1.5942, -1.9807, -1.8881,  ..., -0.2031, -0.0570,  0.2857]],
       grad_fn=<AddmmBackward0>)
 
جتنے منہ اتنی باتیں
tensor([[-1.5569, -1.9675, -1.8646,  ..., -0.2607,  0.0160,  0.3012]],
       grad_fn=<AddmmBackward0>)
 
بہتی گنگا میں ہاتھ دھونا
tensor([[-1.5524, -1.9942, -1.9099,  ..., -0.2171,  0.0282,  0.2975]],
       grad_fn=<AddmmBackward0>)
 
مان نہ مان میں تیرا مہمان
tensor([[

In [89]:
def bleu(data, model, german, english, device):
    targets = []
    outputs = []

    for example in data:
        src = vars(example)["src"]
        trg = vars(example)["trg"]

        prediction = translate_sentence(model, src, urdu, english, device)
        prediction = prediction[:-1]  # remove <eos> token

        targets.append([trg])
        outputs.append(prediction)

    return bleu_score(outputs, targets)

In [91]:
transformer

Seq2SeqTransformer(
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=512, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=512, out_features=512, bias=True)
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=512, bias=True)
        (dropout): D

In [93]:
import torch
import spacy
from torchtext.data.metrics import bleu_score
import sys
score = bleu(test_dataset, transformer, de_tokenizer, en_tokenizer, DEVICE)
print(f"Bleu score {score:.2f}")

TypeError: vars() argument must have __dict__ attribute