In [1]:
# Torch Imports
import torch as t
import torch.nn as nn
from torch.nn.functional import pad
from torch.utils.data import DataLoader

# Distribution Imports
# from torch.utils.data.distributed import DistributedSampler
# import torch.distributed as dist
# import torch.multiprocessing as mp
# from torch.nn.parallel import DistributedDataParallel as DDP

# NLP Imports
from torchtext.vocab import build_vocab_from_iterator
import torchtext.datasets as datasets
import spacy
from torchtext.data.functional import to_map_style_dataset

# Miscellaneous Imports
from os.path import exists
from math import sqrt
import time
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Setting device
device = t.device('cpu')

In [3]:
# Loading Data
train, val, test = datasets.Multi30k(language_pair=("de", "en"))

In [4]:
# Converting Generator Style Dataset to Map Style Dataset
train = to_map_style_dataset(train)
test = to_map_style_dataset(test)

In [5]:
# Uncomment to see 3rd German-English sentence pair in dataset
#train[2]

In [5]:
# Loading Dependencies
spacy_de = spacy.load("de_core_news_sm")
spacy_en = spacy.load("en_core_web_sm")

In [6]:
# Tokenize in source and target language
def tokenize(text, tokenizer):
    return [tok.text for tok in tokenizer.tokenizer(text)]

def tokenize_de(text):
    return tokenize(text, spacy_de)

def tokenize_en(text):
    return tokenize(text, spacy_en)

In [7]:
# Tokenizes from each sentence in dataset 
def yield_tokens(data_iter, tokenizer, index):
    for from_to_tuple in data_iter:
        yield tokenizer(from_to_tuple[index])

In [9]:
# Uncomment for seeing sentence pairs in vocabulary
# for label, line in train:
#     print(label)
#     print(line)
#     break

In [8]:
# Builds Vocabulary of source and target language
def build_vocabulary(spacy_de, spacy_en):

    print("Building German Vocabulary ...")
    train, val, test = datasets.Multi30k(language_pair=("de", "en"))
    vocab_src = build_vocab_from_iterator(
        yield_tokens(train + val + test, tokenize_de, index=0),
        min_freq=2,
        specials=["<s>", "</s>", "<blank>", "<unk>"],
    )

    print("Building English Vocabulary ...")
    train, val, test = datasets.Multi30k(language_pair=("de", "en"))
    vocab_tgt = build_vocab_from_iterator(
        yield_tokens(train + val + test, tokenize_en, index=1),
        min_freq=2,
        specials=["<s>", "</s>", "<blank>", "<unk>"],
    )

    vocab_src.set_default_index(vocab_src["<unk>"])
    vocab_tgt.set_default_index(vocab_tgt["<unk>"])

    return vocab_src, vocab_tgt

In [9]:
# Loads Vocabulary
def load_vocab(spacy_de, spacy_en):
    if not exists("vocab.pt"):
        vocab_src, vocab_tgt = build_vocabulary(spacy_de, spacy_en)
        t.save((vocab_src, vocab_tgt), "vocab.pt")
    else:
        vocab_src, vocab_tgt = t.load("vocab.pt")
    print("Finished.\nVocabulary sizes:")
    print(len(vocab_src))
    print(len(vocab_tgt))
    return vocab_src, vocab_tgt

In [10]:
source_vocab,target_vocab = load_vocab(spacy_de, spacy_en)

Finished.
Vocabulary sizes:
8315
6384


In [13]:
#Uncomment for length of target vocabulary
#len(target_vocab)

In [14]:
# Uncomment for indices of each word
#target_vocab(tokenize_en("Two young, White males are outside near many bushes."))

In [15]:
# Uncomment this for vocabulary in target language
#target_vocab.lookup_tokens(range(target_vocab.__len__()))

In [11]:
# Sentence pre-processing Helper Function
def collate_batch(
    batch,
    device,
    max_padding=128,
    pad_id=2,
):
    bs_id = t.tensor([0], device=device)  # <s> token id
    eos_id = t.tensor([1], device=device)  # </s> token id
    src_list, tgt_list = [], []
    for (_src, _tgt) in batch:
        processed_src = t.cat(
            [
                bs_id,
                t.tensor(
                    source_vocab(tokenize_de(_src)),
                    dtype=t.int64,
                    device=device,
                ),
                eos_id,
            ]
        )
        processed_tgt = t.cat(
            [
                bs_id,
                t.tensor(
                    target_vocab(tokenize_en(_tgt)),
                    dtype=t.int64,
                    device=device,
                ),
                eos_id,
            ]
        )
        src_list.append(
            # warning - overwrites values for negative values of padding - len
            pad(
                processed_src,
                (
                    0,
                    max_padding - len(processed_src),
                ),
                value=pad_id,
            )
        )
        tgt_list.append(
            pad(
                processed_tgt,
                (0, max_padding - len(processed_tgt)),
                value=pad_id,
            )
        )

    src = t.stack(src_list)
    tgt = t.stack(tgt_list)
    return (src, tgt)

In [12]:
# Collate Function for DataLoader
def collate_fn(batch):
    return collate_batch(
    batch,
    device
    )

In [18]:
# train_sampler = DistributedSampler(train_iter_map)

In [13]:
train_dataloader = DataLoader(
        train,
        batch_size=100,
        shuffle=True,
        collate_fn=collate_fn,
#         sampler = train_sampler,
    )

In [14]:
for data in train_dataloader:
    print(data)
    break

(tensor([[   0,    5,   12,  ...,    2,    2,    2],
        [   0,    5,   12,  ...,    2,    2,    2],
        [   0,   60,  120,  ...,    2,    2,    2],
        ...,
        [   0,    5, 2480,  ...,    2,    2,    2],
        [   0,    5,  100,  ...,    2,    2,    2],
        [   0,    5,   75,  ...,    2,    2,    2]]), tensor([[   0,  111,   21,  ...,    2,    2,    2],
        [   0,    6,   12,  ...,    2,    2,    2],
        [   0,  223,   17,  ...,    2,    2,    2],
        ...,
        [   0, 5038,    7,  ...,    2,    2,    2],
        [   0,    6,   25,  ...,    2,    2,    2],
        [   0,    6,   77,  ...,    2,    2,    2]]))


In [15]:
data = t.stack(list(data))

In [16]:
data.shape

torch.Size([2, 100, 128])

In [269]:
# Positional Encoding
class PositionalEncoding(nn.Module):
    def __init__(self,d_model):
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model
        
    def forward(self, X):
        y = t.arange(self.d_model/2)        
        y = t.repeat_interleave(y,2)
        Z = t.empty((128,512))
        for _ in range(Z.shape[0]):
            Z[_] = y
        Z = Z/self.d_model
        Z = 1/(1e4)**Z
        Z = t.arange(X.shape[1])*Z
        Z[:, 0::2] = torch.sin(Z[:, 0::2])
        Z[:, 1::2] = torch.cos(Z[:, 1::2])
        Z = nn.LayerNorm(Z.shape)(Z+X)
        return Z

In [17]:
# nHEADS is number of heads in multi-attention
# d_model refers to representation size of each word
# HIDDEN_SIZE refers to size of query, key and value vectors.
nHEADS = 8
d_model = 512
HIDDEN_SIZE = 64

In [18]:
# Word Embeddings
class Embeddings(nn.Module):
    def __init__(self, d_model, vocab):
        super(Embeddings, self).__init__()
        self.lut = nn.Embedding(vocab, d_model,device=device)
        self.d_model = d_model

    def forward(self, X):
        return self.lut(X) * sqrt(self.d_model)

In [204]:
# Self-Attention in Encoder
class EncAttention(nn.Module):
    def __init__(self):
        super(EncAttention, self).__init__()
        self.Wq = nn.Parameter(t.rand(nHEADS, d_model, HIDDEN_SIZE)).to(device)
        self.Wk = nn.Parameter(t.rand(nHEADS, d_model, HIDDEN_SIZE)).to(device)
        self.Wv = nn.Parameter(t.rand(nHEADS, d_model, HIDDEN_SIZE)).to(device)
        self.Wo = nn.Parameter(t.rand(nHEADS*HIDDEN_SIZE, d_model)).to(device)
        
    def forward(self,data):
        ans = t.empty_like(data)
        for i,X in enumerate(data):
            X = X.to(device)
            Q = X@self.Wq
            K = X@self.Wk
            V = X@self.Wv
            Z = t.bmm(Q,K.transpose(1,2))/sqrt(HIDDEN_SIZE)
            Z = nn.Softmax(dim=2)(Z)
            Z = t.einsum('ijj->ij',[Z])
            Z = t.einsum('ij,ijk->ijk',Z,V)
            Z = t.reshape(Z,(Z.shape[1],-1))
            Z = Z@self.Wo
            Z = nn.Dropout(p=0.1)(Z)
            Z = nn.LayerNorm(Z.shape,device=device)(Z+X)
            ans[i] = Z
        return ans

In [271]:
model1 = Encoder()

In [272]:
deu_embed = Embeddings(512,len(source_vocab))

In [273]:
deuinput = deu_embed(data[0])

In [274]:
deuinput.shape

torch.Size([100, 128, 512])

In [270]:
Zf

tensor([[[ 3.2945e+00,  2.5852e+00,  2.6700e+00,  ..., -6.6141e-02,
           2.7845e+00,  1.4183e+00],
         [-7.2415e-02, -5.8308e-02, -6.5369e-02,  ..., -6.2374e-02,
          -5.8457e-02, -6.5039e-02],
         [-2.0307e+00, -2.1051e+00, -2.1845e+00,  ..., -2.0963e+00,
          -2.1695e+00, -2.3917e+00],
         ...,
         [-6.8764e-02, -6.6751e-02, -6.9084e-02,  ..., -6.6754e-02,
          -6.0466e-02, -6.5416e-02],
         [-6.8764e-02, -6.6751e-02, -6.9084e-02,  ..., -6.6754e-02,
          -6.0466e-02, -6.5416e-02],
         [-6.8764e-02, -5.5299e-02, -6.9084e-02,  ..., -6.6754e-02,
          -6.0466e-02, -6.5416e-02]],

        [[ 3.2058e+00,  2.8280e+00,  3.1228e+00,  ...,  3.2207e+00,
           3.0587e+00,  3.8805e+00],
         [-3.9858e+00, -3.1613e+00, -3.6970e+00,  ...,  2.5524e-01,
          -3.4223e+00, -3.9652e+00],
         [ 8.7728e-03,  1.7313e-02,  7.3248e-03,  ...,  2.1989e-02,
           1.1406e-02,  1.7662e-02],
         ...,
         [ 1.0039e-02,  1

In [None]:
Zf = model1(deuinput)

In [149]:
Zf.shape

torch.Size([100, 128, 512])

In [205]:
# Feed-Forward Network
class FeedForward(nn.Module):
    def __init__(self):
        super(FeedForward, self).__init__()
        self.linear1 = nn.Linear(512,2048,device=device)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(2048,512,device=device)
        self.relu2 = nn.ReLU()
            
    def forward(self,data):
        ans = t.empty_like(data)
        for i,X in enumerate(data):
            Z = self.linear1(X)
            Z = self.relu1(Z)
            Z = self.linear2(Z)
            Z = self.relu2(Z)
            Z = nn.Dropout(p=0.1)(Z)
            Z = nn.LayerNorm(Z.shape,device=device)(Z+X)
            ans[i] = Z
        return ans

In [29]:
# Encoder
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.attention = EncAttention()
        self.feedforward = FeedForward()
        
    def forward(self,X):
        Z = self.attention(X)
        Z = self.feedforward(Z)
        return Z

In [206]:
# Masked Attention in Decoder
class MaskedAttention(nn.Module):
    def __init__(self):
        super(MaskedAttention, self).__init__()
        self.Wq = nn.Parameter(t.rand(nHEADS, d_model, HIDDEN_SIZE)).to(device)
        self.Wk = nn.Parameter(t.rand(nHEADS, d_model, HIDDEN_SIZE)).to(device)
        self.Wv = nn.Parameter(t.rand(nHEADS, d_model, HIDDEN_SIZE)).to(device)
        self.Wo = nn.Parameter(t.rand(nHEADS*HIDDEN_SIZE, d_model)).to(device)
        
        
    def forward(self,data):
        ans = t.empty_like(data)
        rQ = t.empty(data.shape[0],nHEADS,128,HIDDEN_SIZE)
        for i,X in enumerate(data):
            Q = X@self.Wq
            K = X@self.Wk
            V = X@self.Wv
            Z = t.bmm(Q,K.transpose(1,2))/sqrt(HIDDEN_SIZE)
            r,c = t.triu_indices(Z.shape[1],Z.shape[1],1)
            Z[:,r,c] = float('-inf')
            Z = nn.Softmax(dim=2)(Z)
            Z = t.einsum('ijj->ij',[Z])
            Z = t.einsum('ij,ijk->ijk',Z,V)
            Z = t.reshape(Z,(Z.shape[1],-1))
            Z = Z@self.Wo
            Z = nn.Dropout(p=0.1)(Z)
            Z = nn.LayerNorm(Z.shape,device=device)(Z+X)
            ans[i] = Z
            rQ[i] = Q
        return ans.reshape(ans.shape[0],nHEADS,ans.shape[1],-1), rQ

In [178]:
model2 = MaskedAttention()

In [190]:
eng_embed = Embeddings(512,len(target_vocab))
enginput = eng_embed(data[1])
enginput.shape

torch.Size([100, 128, 512])

In [179]:
Zf2 = model2(enginput)

0it [00:00, ?it/s]

In [146]:
Zf2[0].shape,Zf2[1].shape

(torch.Size([100, 8, 128, 64]), torch.Size([100, 8, 128, 64]))

In [145]:
Zf.shape

torch.Size([100, 128, 512])

In [169]:
model3 = EncDecAttention()

In [170]:
Zf3 = model3(Zf2[0],Zf2[1],Zf)

0it [00:00, ?it/s]

In [171]:
Zf3.shape

torch.Size([100, 128, 512])

In [172]:
model4 = FeedForward()

In [173]:
Zf4 = model4(Zf3)

0it [00:00, ?it/s]

In [174]:
Zf4.shape

torch.Size([100, 128, 512])

In [246]:
model10 = Encoder()

In [248]:
Zf10 = model10(deuinput)

In [249]:
Zf10

tensor([[[-1.3430,  0.1841,  0.7273,  ...,  0.3474, -0.6324,  0.3642],
         [-0.6915, -0.7025,  0.4740,  ...,  1.2667, -0.3313,  0.8768],
         [-0.5146,  0.6635,  0.0222,  ...,  0.0369, -0.5946,  1.2212],
         ...,
         [ 0.2136,  0.5731, -0.3884,  ...,  0.6604, -0.3898,  0.5286],
         [ 0.2136,  0.5731, -0.3884,  ...,  0.6604, -0.3898,  0.5286],
         [ 0.2136,  0.5731, -0.3884,  ...,  0.6604, -0.3898,  0.5286]],

        [[-1.2767,  0.2291,  0.6541,  ...,  0.4024, -0.6779,  0.3755],
         [-0.6438, -0.6876,  0.5122,  ...,  1.2951, -0.3327,  0.7990],
         [-0.4932,  0.7473, -0.0041,  ...,  0.0363, -0.6141,  1.2131],
         ...,
         [ 0.2151,  0.5731, -0.3877,  ...,  0.6585, -0.3884,  0.5291],
         [ 0.2151,  0.5731, -0.3877,  ...,  0.6585, -0.6249,  0.5291],
         [ 0.2151,  0.5731, -0.3877,  ...,  0.6585, -0.3884,  0.5291]],

        [[-1.3429,  0.0608,  0.6564,  ...,  0.3815, -0.7313,  0.3517],
         [ 0.5192,  1.0687,  0.9969,  ..., -0

In [247]:
for p in model10.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

In [199]:
model5 = Decoder()

In [200]:
Zf5 = model5(enginput,Zf)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [210]:
model6 = EncoderStack()

In [211]:
Zf6 = model6(deuinput)

Encoder 1 Completed
Encoder 2 Completed
Encoder 3 Completed
Encoder 4 Completed
Encoder 5 Completed
Encoder 6 Completed


In [212]:
Zf6.shape

torch.Size([100, 128, 512])

In [241]:
Zf6

tensor([[[-0.1429, -0.1112, -0.1361,  ..., -0.1492, -0.1491, -0.1322],
         [-0.1429, -0.1112, -0.1361,  ..., -0.1492, -0.1492, -0.1322],
         [-0.1429, -0.1112, -0.1361,  ..., -0.1492, -0.1491, -0.1322],
         ...,
         [-0.1426, -0.1109, -0.1356,  ..., -0.1487, -0.1488, -0.1320],
         [-0.1492, -0.1112, -0.1492,  ..., -0.1492, -0.1491, -0.1322],
         [ 4.2928,  3.3573,  3.7632,  ...,  4.9766,  3.5077, -0.1492]],

        [[-0.2145, -0.1938, -0.2507,  ..., -0.2507, -0.2507, -0.2189],
         [-0.2145, -0.1938, -0.2287,  ..., -0.2507, -0.2507, -0.2189],
         [-0.2145, -0.2507, -0.2287,  ..., -0.2507, -0.2507, -0.2189],
         ...,
         [-0.2145, -0.1938, -0.2287,  ..., -0.2507, -0.2507, -0.2189],
         [-0.2502, -0.1939, -0.2282,  ..., -0.2507, -0.2500, -0.2188],
         [-0.2145, -0.1938, -0.2287,  ..., -0.2507, -0.2507, -0.2189]],

        [[-0.1250, -0.0914, -0.1152,  ..., -0.1250, -0.1230, -0.1119],
         [-0.1250, -0.0914, -0.1152,  ..., -0

In [216]:
model7 = DecoderStack()

In [217]:
Zf7 = model7(enginput,Zf6)

Decoder 1 Completed
Decoder 2 Completed
Decoder 3 Completed
Decoder 4 Completed
Decoder 5 Completed
Decoder 6 Completed


In [218]:
Zf7.shape

torch.Size([100, 128, 512])

In [261]:
model8 = Transformer()

In [262]:
Zf8 = model8(data[0],data[1])

Encoder 1 Completed
Encoder 2 Completed
Encoder 3 Completed
Encoder 4 Completed
Encoder 5 Completed
Encoder 6 Completed
Decoder 1 Completed
Decoder 2 Completed
Decoder 3 Completed
Decoder 4 Completed
Decoder 5 Completed
Decoder 6 Completed


In [263]:
Zf8.shape

torch.Size([100, 128])

In [239]:
Zf7.shape

torch.Size([100, 128, 512])

In [257]:
model9 = finalComponent()

In [258]:
Zf9 = model9(Zf7)

In [259]:
Zf9

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [207]:
# Encoder-Decoder Attention in Decoder
class EncDecAttention(nn.Module):
    def __init__(self):
        super(EncDecAttention,self).__init__()
        self.Wo = nn.Parameter(t.rand(nHEADS*HIDDEN_SIZE, d_model)).to(device)
        
        
    def forward(self, data, maskquery, enc_output):
        ans = t.empty_like(enc_output)
        enc_output = enc_output.reshape(enc_output.shape[0],nHEADS,enc_output.shape[1],-1)
        for i,X in enumerate(data):
            Q = maskquery[i]
            K = enc_output[i]
            V = enc_output[i]
            Z = t.bmm(Q,K.transpose(1,2))/sqrt(HIDDEN_SIZE)
            r,c = t.triu_indices(Z.shape[0],Z.shape[1],1)
            Z[r,c] = float('-inf')            
            Z = nn.Softmax(dim=2)(Z)
            Z = t.einsum('ijj->ij',[Z])           
            Z = t.einsum('ij,ijk->ijk',Z,V)
            Z = t.reshape(Z,(Z.shape[1],-1))
            Z = Z@self.Wo
            Z = nn.Dropout(p=0.1)(Z)            
            Z = nn.LayerNorm(Z.shape,device=device)(Z+X.reshape(X.shape[1],-1))
            ans[i] = Z
        return ans

In [198]:
# Decoder
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder,self).__init__()
        self.masked = MaskedAttention()
        self.encdec = EncDecAttention()
        self.feedforward = FeedForward()
        
    def forward(self, X, enc_output):
        Z, Q = self.masked(X)
        Z = self.encdec(Z, Q, enc_output)
        Z = self.feedforward(Z)
        return Z

In [209]:
# Encoder Stack
class EncoderStack(nn.Module):
    def __init__(self):
        super(EncoderStack,self).__init__()
        self.enc1 = Encoder()
        self.enc2 = Encoder()
        self.enc3 = Encoder()
        self.enc4 = Encoder()
        self.enc5 = Encoder()
        self.enc6 = Encoder()
        
    def forward(self,X):
        Z = self.enc1(X)
        print("Encoder 1 Completed")
        Z = self.enc2(Z)
        print("Encoder 2 Completed")
        Z = self.enc3(Z)
        print("Encoder 3 Completed")
        Z = self.enc4(Z)
        print("Encoder 4 Completed")
        Z = self.enc5(Z)
        print("Encoder 5 Completed")
        Z = self.enc6(Z)
        print("Encoder 6 Completed")
        return Z

In [215]:
# Decoder Stack
class DecoderStack(nn.Module):
    def __init__(self):
        super(DecoderStack,self).__init__()
        self.dec1 = Decoder()
        self.dec2 = Decoder()
        self.dec3 = Decoder()
        self.dec4 = Decoder()
        self.dec5 = Decoder()
        self.dec6 = Decoder()
        
    def forward(self,X, enc_output):
        Z = self.dec1(X, enc_output)
        print("Decoder 1 Completed")
        Z = self.dec2(Z, enc_output)
        print("Decoder 2 Completed")
        Z = self.dec3(Z, enc_output)
        print("Decoder 3 Completed")
        Z = self.dec4(Z, enc_output)
        print("Decoder 4 Completed")
        Z = self.dec5(Z, enc_output)
        print("Decoder 5 Completed")
        Z = self.dec6(Z, enc_output)
        print("Decoder 6 Completed")
        return Z

In [256]:
# Final Component of Fully-Connected and Softmax Layer
class finalComponent(nn.Module):
    def __init__(self):
        super(finalComponent,self).__init__()
        self.fc = nn.Linear(d_model,len(target_vocab))
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self,data):
        ans = t.empty(data.shape[0],128)
        for i,X in enumerate(data):
            Z = self.fc(X)
            Z = self.softmax(Z)
            ans[i] = t.argmax(Z,dim=1)
        return ans

In [260]:
# Merging all components into 1 class
class Transformer(nn.Module):
    def __init__(self):
        super(Transformer,self).__init__()
        self.deu_embed = Embeddings(512,len(source_vocab))
        self.eng_embed = Embeddings(512,len(target_vocab))
        self.pe = Posi
        self.EncStack = EncoderStack()
        self.DecStack = DecoderStack()
        self.finalComponent = finalComponent()
    
    def forward(self, Xdeu, Xeng):
        Xdeu = self.deu_embed(Xdeu)
        Z = self.EncStack(Xdeu)
        Xeng = self.eng_embed(Xeng)        
        Z = self.DecStack(Xeng,Z)
        Z = self.finalComponent(Z)
        return Z

In [264]:
truemodel = Transformer()

In [265]:
for p in truemodel.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

In [266]:
begin = time.time()
Zfinal = truemodel(data[0],data[1])
end = time.time()

Encoder 1 Completed
Encoder 2 Completed
Encoder 3 Completed
Encoder 4 Completed
Encoder 5 Completed
Encoder 6 Completed
Decoder 1 Completed
Decoder 2 Completed
Decoder 3 Completed
Decoder 4 Completed
Decoder 5 Completed
Decoder 6 Completed


In [267]:
end-begin

20.76242685317993

In [268]:
Zfinal

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])