<a href="https://colab.research.google.com/github/Pranav-2509/NIGHT_VISION_SELF/blob/main/Vision_Transformer_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

-------------------------
#Imports
-------------------------

In [None]:
import torch
import torch.nn as nn
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset

--------------------------
#Device Agnostic Code
-------------------------

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
print (device)

cuda


------------------------
#Loading the Data
-----------------------

In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define a transform to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])




# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Define batch size
batch_size = 64

print (type(train_dataset))
# Create DataLoaders for training and test sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Verify the shape of the batches
for images, labels in train_loader:
    print(images.shape, labels.shape)
    break


<class 'torchvision.datasets.mnist.MNIST'>
torch.Size([64, 1, 28, 28]) torch.Size([64])


In [None]:
num_classes = 10

In [None]:
# import torch
# import torchvision
# import torchvision.transforms as transforms

# # Define transformations
# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.5, 0.5, 0.5), (0.5 ,0.5, 0.5))
# ])
# batch_size = 128

# # Download and load the CIFAR-100 training set
# trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

# # Download and load the CIFAR-100 test set
# testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
# testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

# # Classes in CIFAR-100
# classes = trainset.classes
# num_classes = len (classes)

# # Example to iterate through the training set
# dataiter = iter(trainloader)
# images, labels = next(dataiter)

# print('Loaded CIFAR-10 dataset')
# print('Batch of images shape:', images.shape)
# print('Batch of labels shape:', labels.shape)


-------------------------
#Splitting image into tokens
-------------------------

Patch Tokenizer

In [None]:
patch_size = 4
image_dim = images[0].shape[1]
token_length = 50

In [None]:
class Patch_Tokenization (nn.Module):
  def __init__ (self, img_size, patch_size : int = 50, token_length : int = 768):

    super().__init__()

    self.img_size = img_size
    c, h, w = self.img_size
    self.patch_size = patch_size
    self.token_length = token_length

    self.no_tokens = (h*w)/(patch_size**2)

    #Layers
    self.split = nn.Unfold (kernel_size = self.patch_size,stride = self.patch_size, padding = 0)
    self.linear = nn.Linear (in_features = c * (self.patch_size)**2, out_features = self.token_length)


  def forward (self, img):
    img = self.split (img).transpose(1, 2)
    img = self.linear (img)
    return img

In [None]:
tokeniser = Patch_Tokenization (images[0].shape, patch_size = patch_size, token_length = token_length)
tokeniser = tokeniser.to(device)

In [None]:
num_tokens = int ((image_dim/patch_size )**2)       #If its not int, then it will be float

--------------------
#Position Encoding
-------------------

In [None]:
def position_embedding (num_tokens : int, token_length : int):
  positions = torch.zeros ((num_tokens, token_length))
  for i in range (num_tokens):
    for j in range (token_length):
      angle = i/ (np.power(1000, (2* (j//2))/token_length))
      if (j%2==0):
        positions[i][j] = np.sin (angle)
      else:
        positions[i][j] = np.cos (angle)

  return positions

In [None]:
position_encoding = position_embedding (num_tokens+1, token_length)

In [None]:
position_encoding = position_encoding.unsqueeze (dim = 0)
print (position_encoding.shape)

torch.Size([1, 50, 50])


In [None]:
print (position_encoding)
position_encoding = position_encoding.to(device)
print (position_encoding.device)

tensor([[[ 0.0000,  1.0000,  0.0000,  ...,  1.0000,  0.0000,  1.0000],
         [ 0.8415,  0.5403,  0.6879,  ...,  1.0000,  0.0013,  1.0000],
         [ 0.9093, -0.4161,  0.9986,  ...,  1.0000,  0.0026,  1.0000],
         ...,
         [ 0.1236, -0.9923, -0.8892,  ...,  0.9967,  0.0619,  0.9981],
         [-0.7683, -0.6401, -0.9601,  ...,  0.9965,  0.0632,  0.9980],
         [-0.9538,  0.3006, -0.5045,  ...,  0.9964,  0.0645,  0.9979]]])
cuda:0


------------------
#Attention Block
------------------

In [None]:
class Attention(nn.Module):
    def __init__(self,
                dim: int,
                chan: int,
                num_heads: int=1,
                qkv_bias: bool=False,
                qk_scale = None):

        """ Attention Module

            Args:
                dim (int): input size of a single token
                chan (int): resulting size of a single token after concatenating the heads
                num_heads(int): number of attention heads in MSA
                qkv_bias (bool): determines if the qkv layer learns an addative bias
                qk_scale (NoneFloat): value to scale the queries and keys by;
                                    if None, queries and keys are scaled by ``head_dim ** -0.5``
        """

        super().__init__()

        ## Define Constants
        self.num_heads = num_heads
        self.chan = chan
        self.head_dim = self.chan // self.num_heads
        self.scale = qk_scale or self.head_dim ** -0.5
        assert self.chan % self.num_heads == 0, '"Chan" must be evenly divisible by "num_heads".'

        ## Define Layers
        self.q = nn.Linear(dim, chan , bias=qkv_bias)
        self.k = nn.Linear (dim, chan, bias = qkv_bias)
        self.v = nn.Linear (dim, chan, bias = qkv_bias)

        #### Each token gets projected from starting length (dim) to channel length (chan) 3 times (for each Q, K, V)
        self.proj = nn.Linear(chan, chan)



    #     self._initialize_weights()

    # def _initialize_weights(self):
    #     for m in self.modules():
    #         if isinstance(m, nn.Linear):
    #             torch.nn.init.kaiming_uniform_(m.weight, a=0, mode='fan_in', nonlinearity='relu')
    #             if m.bias is not None:
    #                 nn.init.zeros_(m.bias)

    def forward(self, x):
        B, N, C = x.shape
        ## Dimensions: (batch, num_tokens, token_len)

        ## Calcuate QKVs

        # qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4)
        #### Dimensions: (3, batch, heads, num_tokens, chan/num_heads = head_dim)
        q_matrix, k_matrix, v_matrix = self.q (x), self.k (x), self.v (x)
        q_matrix = q_matrix.reshape (B, self.num_heads, N, self.head_dim)
        k_matrix = k_matrix.reshape (B, self.num_heads, N, self.head_dim)
        v_matrix = v_matrix.reshape (B, self.num_heads, N, self.head_dim)


        ## Calculate Attention
        attn = (q_matrix * self.scale) @ k_matrix.transpose(-2, -1)
        attn = attn.softmax(dim=-1)
        #### Dimensions: (batch, heads, num_tokens, num_tokens)

        ## Attention Layer
        x = (attn @ v_matrix).reshape(B, N, self.chan)
        #### Dimensions: (batch, heads, num_tokens, chan)

        ## Projection Layers
        x = self.proj(x)
        ## Skip Connection Layer
        v_matrix = v_matrix.reshape(B, N, self.chan)
        x = v_matrix + x
        #### Because the original x has different size with current x, use v to do skip connection
        return x

In [None]:
# sample_input = torch.rand ((5, 17, 50))
# e = Attention (50, 100, 2)
# output = e (sample_input)
# print (output.shape)

----------------------------------
#Neural Net at the end of the Encoder
-----------------------------------

In [None]:
class NeuralNet(nn.Module):
    def __init__(self,
       in_chan: int,
       hidden_chan = None,
       out_chan = None,
       act_layer = nn.GELU()):

        super().__init__()

        ## Define Number of Channels
        hidden_chan = hidden_chan
        out_chan = out_chan

        ## Define Layers
        self.fc1 = nn.Linear(in_chan, hidden_chan)
        self.act = act_layer()
        self.fc2 = nn.Linear(hidden_chan, out_chan)


    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.fc2(x)
        return x

-----------------------------------
#Probabilities extraction
------------------------------------


In [None]:
class probabilities (nn.Module ):
  def __init__ (self,in_size : int, num_classes : int):

    super().__init__()

    self.layer1 = nn.Linear (in_size, 70)
    self.activation = nn.ReLU()
    self.layer2 = nn.Linear (70, num_classes)

  #   self._initialize_weights()

  # def _initialize_weights(self):
  #       for m in self.modules():
  #           if isinstance(m, nn.Linear):
  #               torch.nn.init.kaiming_uniform_(m.weight, a=0, mode='fan_in', nonlinearity='relu')
  #               if m.bias is not None:
  #                   nn.init.zeros_(m.bias)


  def forward (self, x):
    x =  (self.layer1(x))
    x = self.activation (x)
    x = (self.layer2(x))
    return x

--------------------------------
#Encoding Block
--------------------------------

In [None]:
class Encoding(nn.Module):

    def __init__(self,
       dim: int,
       num_heads: int=1,
       hidden_chan_mul = 4.,
       qkv_bias = False,
       qk_scale = None,
       act_layer=nn.ReLU,
       norm_layer=nn.LayerNorm):

        """ Encoding Block

            Args:
                dim (int): size of a single token
                num_heads(int): number of attention heads in MSA
                hidden_chan_mul (float): multiplier to determine the number of hidden channels (features) in the NeuralNet component
                qkv_bias (bool): determines if the qkv layer learns an addative bias
                qk_scale (NoneFloat): value to scale the queries and keys by;
                                    if None, queries and keys are scaled by ``head_dim ** -0.5``
                act_layer(nn.modules.activation): torch neural network layer class to use as activation
                norm_layer(nn.modules.normalization): torch neural network layer class to use as normalization
        """

        super().__init__()

        ## Define Layers
        self.norm1 = norm_layer(dim)
        self.attn = Attention(dim=dim,
                            chan=dim,
                            num_heads=num_heads,
                            qkv_bias=qkv_bias,
                            qk_scale=qk_scale)
        self.dropout = nn.Dropout (p = 0.5)
        self.norm2 = norm_layer(dim)
        self.neuralnet = NeuralNet(in_chan=dim,
                                hidden_chan=int(dim*hidden_chan_mul),
                                out_chan=dim,
                                act_layer=act_layer)

        self.probabs = probabilities (dim, num_classes)

    #     self._initialize_weights()

    # def _initialize_weights(self):
    #     for m in self.modules():
    #         if isinstance(m, nn.Linear):
    #             torch.nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in', nonlinearity='relu')
    #             if m.bias is not None:
    #                 nn.init.zeros_(m.bias)


    def forward(self, x):
        x = x + self.dropout (self.attn(self.norm1(x)))
        # print ('1', x)
        x = x + self.dropout (self.neuralnet(self.norm2(x)))
        # print ('2', x)
        x = x[:, 0]
        x = self.probabs (x)
        # print ('3', x)
        return x

In [None]:
encoder  = Encoding (dim = token_length, num_heads = 2, hidden_chan_mul = 1, qkv_bias = True)
encoder = encoder.to (device)

----------------------------
#Training Loop
---------------------------

In [None]:
loss_fn = nn.CrossEntropyLoss ()

In [None]:
import torch.optim as optim
optimizer = optim.SGD(encoder.parameters(), lr=0.005, momentum = 0.9)

In [None]:
epochs = 25
num_batches = len (train_loader)

for i in range (epochs):
  print (f'EPOCH : {i+1}')

  tot = 0
  correct = 0
  train_loss = 0
  test_loss = 0
  test_acc = 0
  for batch, (X, y) in enumerate (train_loader):

    X = X.to(device)
    y = y.to(device)
    if (batch == num_batches - 1):
      break

    encoder.train()
    X = tokeniser (X)
    # print (X)
    zero_tensor = torch.zeros ((batch_size, 1, token_length))
    zero_tensor = zero_tensor.to (device)

    X_appended = torch.concat ((zero_tensor, X), dim = 1)
    # print (X)
    X_positional_encoded = X_appended + position_encoding
    X_post_encoder = encoder (X_positional_encoded)
    # break

    optimizer.zero_grad()
    loss = loss_fn (X_post_encoder, y)
    train_loss+=loss
    loss.backward()
    optimizer.step()
  # break

  print (f'Train Loss for batch : {train_loss/len (train_loader)}')
  test_loss = 0
  for batch, (X, y) in enumerate (test_loader):

    tot +=len (X)
    if (batch == len (test_loader) - 1):
      break

    encoder.eval()

    X = X.to(device)
    y = y.to (device)

    X = tokeniser (X)
    zero_tensor = torch.zeros ((batch_size, 1, token_length))
    zero_tensor = zero_tensor.to (device)
    X_appended = torch.concat ((zero_tensor, X), dim = 1)
    X_positional_encoded = X_appended + position_encoding
    X_post_encoder = encoder (X_positional_encoded)
    test_loss += loss_fn (X_post_encoder, y)
    X_post_encoder = torch.argmax ((X_post_encoder), dim = 1)

    correct += (X_post_encoder == y).sum().item()
  print (f'test_loss per batch = {test_loss/len (test_loader)}')
  print (f'test accuracy : {correct/tot}')











EPOCH : 1
Train Loss for batch : 2.170410394668579
test_loss per batch = 1.7404710054397583
test accuracy : 0.3108
EPOCH : 2
Train Loss for batch : 1.5191140174865723
test_loss per batch = 1.3463551998138428
test accuracy : 0.5143
EPOCH : 3
Train Loss for batch : 1.313015103340149
test_loss per batch = 1.1157431602478027
test accuracy : 0.5837
EPOCH : 4
Train Loss for batch : 1.1352977752685547
test_loss per batch = 0.9671016931533813
test accuracy : 0.6466
EPOCH : 5
Train Loss for batch : 1.0309875011444092
test_loss per batch = 0.903078019618988
test accuracy : 0.6699
EPOCH : 6
Train Loss for batch : 0.9691396355628967
test_loss per batch = 0.8638983368873596
test accuracy : 0.6864
EPOCH : 7
Train Loss for batch : 0.9266437292098999
test_loss per batch = 0.784250020980835
test accuracy : 0.7234
EPOCH : 8
Train Loss for batch : 0.8966166973114014
test_loss per batch = 0.8030461668968201
test accuracy : 0.7204
EPOCH : 9
Train Loss for batch : 0.8419533371925354
test_loss per batch = 0.

------------------
#Experimentation
------------------

In [None]:
# import torch
# import torch.nn as nn

# patch_size = 50
# unfold = nn.Unfold(kernel_size=patch_size, stride=patch_size, padding=0)

In [None]:
# x = torch.randn(1, 1, 100, 100)  # Batch size of 1, 1 channel, 100x100 image

In [None]:
# patches = unfold(x)

In [None]:
# print (x)

In [None]:
# print (patches)

In [None]:
# layer = nn.Linear (in_features = 2500, out_features = 768)

In [None]:
# input = torch.randn((100, 2500))
# print (input)

In [None]:
# output = layer(input)
# print (output.shape)

In [None]:
# batch, sentence_length, embedding_dim = 20, 5, 10
# embedding = torch.randn(batch, sentence_length, embedding_dim)
# layer_norm = nn.LayerNorm(embedding_dim)
# embedding_2 = layer_norm(embedding)

In [None]:
# embedding_1 = embedding[0][0]
# mean = embedding_1.mean()

In [None]:
# var = torch.var (embedding_1)

In [None]:
# embedding_1 = (embedding_1 - mean)/(var)**1/2
# print (embedding_1)
# print (embedding_2[0][0])

experimentation for attention

In [None]:
# a = torch.rand (3, 4, 12)
# b = a.reshape (3, 3, 2 , 4, 2)

In [None]:
# print (a)

In [None]:
# print (b)

checking how attention weights are calculated

In [None]:
batch_size, num_heads, num_tokens, head_dim = 2, 4, 5, 6

q = torch.randint (1, 10, (batch_size, num_heads, num_tokens, head_dim))
q = q.float()

k = torch.randint (1, 10, (batch_size, num_heads, num_tokens, head_dim))
k = k.float()

In [None]:
print (q, '\n', k)

tensor([[[[3., 5., 5., 2., 5., 1.],
          [4., 8., 5., 6., 6., 9.],
          [6., 2., 1., 7., 4., 4.],
          [5., 7., 1., 7., 7., 2.],
          [8., 9., 7., 5., 9., 1.]],

         [[7., 2., 9., 2., 5., 6.],
          [1., 3., 8., 7., 8., 3.],
          [9., 3., 1., 3., 8., 8.],
          [6., 1., 2., 6., 5., 1.],
          [7., 2., 3., 8., 6., 2.]],

         [[3., 2., 9., 7., 8., 7.],
          [1., 1., 2., 1., 3., 2.],
          [5., 6., 7., 6., 3., 5.],
          [8., 9., 4., 4., 3., 2.],
          [6., 1., 8., 6., 2., 7.]],

         [[2., 3., 6., 5., 9., 1.],
          [4., 1., 8., 6., 2., 1.],
          [4., 9., 2., 5., 1., 7.],
          [4., 9., 6., 3., 2., 8.],
          [2., 8., 4., 7., 1., 2.]]],


        [[[5., 8., 6., 8., 9., 9.],
          [4., 9., 7., 6., 6., 2.],
          [7., 9., 4., 9., 9., 1.],
          [2., 5., 8., 4., 5., 1.],
          [8., 1., 3., 6., 8., 8.]],

         [[4., 7., 2., 3., 2., 8.],
          [1., 2., 2., 8., 1., 3.],
          [1., 9

In [None]:
print (k.transpose (-2, -1))

tensor([[[[8., 6., 7., 6., 3.],
          [2., 3., 6., 7., 8.],
          [7., 6., 7., 2., 9.],
          [5., 6., 1., 5., 2.],
          [1., 2., 3., 8., 5.],
          [8., 2., 6., 8., 7.]],

         [[8., 8., 9., 8., 3.],
          [1., 9., 7., 2., 3.],
          [5., 2., 3., 5., 7.],
          [8., 1., 8., 6., 6.],
          [8., 5., 8., 7., 6.],
          [7., 5., 4., 8., 1.]],

         [[7., 3., 9., 7., 3.],
          [2., 4., 4., 1., 7.],
          [4., 2., 7., 1., 8.],
          [2., 5., 7., 3., 3.],
          [3., 2., 2., 5., 2.],
          [4., 9., 4., 1., 7.]],

         [[6., 5., 6., 9., 6.],
          [4., 6., 7., 5., 7.],
          [7., 3., 8., 6., 1.],
          [9., 2., 3., 5., 3.],
          [5., 4., 8., 4., 1.],
          [8., 3., 1., 5., 3.]]],


        [[[4., 2., 6., 3., 9.],
          [2., 3., 2., 7., 6.],
          [3., 5., 5., 3., 5.],
          [3., 5., 3., 2., 9.],
          [5., 7., 2., 3., 8.],
          [8., 8., 2., 2., 8.]],

         [[6., 6., 4., 3., 5

In [None]:
print (q.shape)

weights = q@ (k.transpose (-2, -1))

print (weights.shape)

torch.Size([2, 4, 5, 6])
torch.Size([2, 4, 5, 5])


In [None]:
weights = weights.softmax (dim = -1)
print (weights.shape)

torch.Size([2, 4, 5, 5])


In [None]:
print (q)

tensor([[[[3., 5., 5., 2., 5., 1.],
          [4., 8., 5., 6., 6., 9.],
          [6., 2., 1., 7., 4., 4.],
          [5., 7., 1., 7., 7., 2.],
          [8., 9., 7., 5., 9., 1.]],

         [[7., 2., 9., 2., 5., 6.],
          [1., 3., 8., 7., 8., 3.],
          [9., 3., 1., 3., 8., 8.],
          [6., 1., 2., 6., 5., 1.],
          [7., 2., 3., 8., 6., 2.]],

         [[3., 2., 9., 7., 8., 7.],
          [1., 1., 2., 1., 3., 2.],
          [5., 6., 7., 6., 3., 5.],
          [8., 9., 4., 4., 3., 2.],
          [6., 1., 8., 6., 2., 7.]],

         [[2., 3., 6., 5., 9., 1.],
          [4., 1., 8., 6., 2., 1.],
          [4., 9., 2., 5., 1., 7.],
          [4., 9., 6., 3., 2., 8.],
          [2., 8., 4., 7., 1., 2.]]],


        [[[5., 8., 6., 8., 9., 9.],
          [4., 9., 7., 6., 6., 2.],
          [7., 9., 4., 9., 9., 1.],
          [2., 5., 8., 4., 5., 1.],
          [8., 1., 3., 6., 8., 8.]],

         [[4., 7., 2., 3., 2., 8.],
          [1., 2., 2., 8., 1., 3.],
          [1., 9

In [None]:
q_modified = q.transpose (1, 2)
print (q_modified)

tensor([[[[3., 5., 5., 2., 5., 1.],
          [7., 2., 9., 2., 5., 6.],
          [3., 2., 9., 7., 8., 7.],
          [2., 3., 6., 5., 9., 1.]],

         [[4., 8., 5., 6., 6., 9.],
          [1., 3., 8., 7., 8., 3.],
          [1., 1., 2., 1., 3., 2.],
          [4., 1., 8., 6., 2., 1.]],

         [[6., 2., 1., 7., 4., 4.],
          [9., 3., 1., 3., 8., 8.],
          [5., 6., 7., 6., 3., 5.],
          [4., 9., 2., 5., 1., 7.]],

         [[5., 7., 1., 7., 7., 2.],
          [6., 1., 2., 6., 5., 1.],
          [8., 9., 4., 4., 3., 2.],
          [4., 9., 6., 3., 2., 8.]],

         [[8., 9., 7., 5., 9., 1.],
          [7., 2., 3., 8., 6., 2.],
          [6., 1., 8., 6., 2., 7.],
          [2., 8., 4., 7., 1., 2.]]],


        [[[5., 8., 6., 8., 9., 9.],
          [4., 7., 2., 3., 2., 8.],
          [3., 1., 5., 4., 3., 4.],
          [1., 7., 2., 2., 2., 4.]],

         [[4., 9., 7., 6., 6., 2.],
          [1., 2., 2., 8., 1., 3.],
          [7., 3., 5., 7., 7., 6.],
          [8.,

In [None]:
q_ultra_modified = q_modified.reshape (batch_size, num_tokens, num_heads*head_dim)
print (q_ultra_modified)

tensor([[[3., 5., 5., 2., 5., 1., 7., 2., 9., 2., 5., 6., 3., 2., 9., 7., 8.,
          7., 2., 3., 6., 5., 9., 1.],
         [4., 8., 5., 6., 6., 9., 1., 3., 8., 7., 8., 3., 1., 1., 2., 1., 3.,
          2., 4., 1., 8., 6., 2., 1.],
         [6., 2., 1., 7., 4., 4., 9., 3., 1., 3., 8., 8., 5., 6., 7., 6., 3.,
          5., 4., 9., 2., 5., 1., 7.],
         [5., 7., 1., 7., 7., 2., 6., 1., 2., 6., 5., 1., 8., 9., 4., 4., 3.,
          2., 4., 9., 6., 3., 2., 8.],
         [8., 9., 7., 5., 9., 1., 7., 2., 3., 8., 6., 2., 6., 1., 8., 6., 2.,
          7., 2., 8., 4., 7., 1., 2.]],

        [[5., 8., 6., 8., 9., 9., 4., 7., 2., 3., 2., 8., 3., 1., 5., 4., 3.,
          4., 1., 7., 2., 2., 2., 4.],
         [4., 9., 7., 6., 6., 2., 1., 2., 2., 8., 1., 3., 7., 3., 5., 7., 7.,
          6., 8., 4., 4., 6., 9., 7.],
         [7., 9., 4., 9., 9., 1., 1., 9., 4., 3., 8., 1., 6., 2., 1., 5., 9.,
          9., 5., 9., 2., 7., 9., 5.],
         [2., 5., 8., 4., 5., 1., 3., 5., 3., 2., 6., 5., 4., 