In [None]:
# run this cell to download the right packages (only needed once)
!python --version

!pip install cifar10
!pip install imageio numpy scipy    
!pip install git+https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks

Python 3.9.16
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting cifar10
  Downloading cifar10-1.0.0-py3-none-any.whl (7.9 kB)
Installing collected packages: cifar10
Successfully installed cifar10-1.0.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks
  Cloning https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks to /tmp/pip-req-build-we8ykjyv
  Running command git clone --filter=blob:none --quiet https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks /tmp/pip-req-build-we8ykjyv
  Resolved https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks to commit 28caa7cde240e354fd7b87280450fd233cd494c3
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building w

In [None]:
import time
import torch

import numpy as np
import torch.nn as nn
import torch.nn.functional as F

from pathlib import Path
from torch.utils.data import DataLoader
from torchsummary import summary
from torchvision import datasets, transforms

from core_qnn.quaternion_layers import QuaternionConv, QuaternionLinear
from core_qnn.quaternion_ops import check_input, q_normalize

device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

In [None]:
%%time

# import and download the CIFAR10 dataset
transform_train = transforms.Compose([transforms.ToTensor(), transforms.Normalize((.5,.5,.5),(.5,.5,.5))])
transform_test = transforms.Compose([transforms.ToTensor(), transforms.Normalize((.5,.5,.5),(.5,.5,.5))])

train_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_set = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 69836548.66it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
CPU times: user 3.55 s, sys: 1 s, total: 4.55 s
Wall time: 9.69 s


In [None]:
class QuaternionLin(nn.Module):
  """Reproduction class of the quaternion linear layer."""
  
  def __init__(self, in_channels, out_channels, dimension=2, bias=True):
    """Create the quaterion linear layer."""
    super(QuaternionLin, self).__init__()

    self.in_channels = np.floor_divide(in_channels, 4)
    self.out_channels = np.floor_divide(out_channels, 4)

    self.weight_shape = self.get_weight_shape(self.in_channels, self.out_channels)
    self._weights = self.weight_tensors(self.weight_shape)

    self.r_weight, self.k_weight, self.i_weight, self.j_weight = self._weights

    if bias:
      self.bias = nn.Parameter(torch.Tensor(out_channels))
      nn.init.constant_(self.bias, 0)

  def forward(self, input_x):
    """Apply forward pass of input through quaternion linear layer."""
    cat_kernels_4_r = torch.cat([self.r_weight, -self.i_weight, -self.j_weight, -self.k_weight], dim=0)
    cat_kernels_4_i = torch.cat([self.i_weight,  self.r_weight, -self.k_weight, self.j_weight], dim=0)
    cat_kernels_4_j = torch.cat([self.j_weight,  self.k_weight, self.r_weight, -self.i_weight], dim=0)
    cat_kernels_4_k = torch.cat([self.k_weight,  -self.j_weight, self.i_weight, self.r_weight], dim=0)

    cat_kernels_4_quaternion   = torch.cat([cat_kernels_4_r, cat_kernels_4_i, cat_kernels_4_j, cat_kernels_4_k], dim=1)

    if self.bias is not None:
      return torch.addmm(self.bias, input_x, cat_kernels_4_quaternion)

    return torch.matmul(input_x, cat_kernels_4_quaternion)

  @staticmethod
  def weight_tensors(weight_shape):
    """Create and initialise the weight tensors according to quaternion rules."""
    modulus = nn.Parameter(torch.Tensor(*weight_shape))
    modulus = nn.init.xavier_uniform_(modulus, gain=1.0)

    i_weight = 2.0 * torch.rand(*weight_shape) - 1.0
    j_weight = 2.0 * torch.rand(*weight_shape) - 1.0
    k_weight = 2.0 * torch.rand(*weight_shape) - 1.0

    sum_imaginary_parts = i_weight.abs() + j_weight.abs() + k_weight.abs()

    i_weight = torch.div(i_weight, sum_imaginary_parts)
    j_weight = torch.div(j_weight, sum_imaginary_parts)
    k_weight = torch.div(k_weight, sum_imaginary_parts)

    phase = torch.rand(*weight_shape) * (2 * torch.tensor([np.pi])) - torch.tensor([np.pi])

    r_weight = modulus * np.cos(phase)
    i_weight = modulus * i_weight * np.sin(phase)
    j_weight = modulus * j_weight * np.sin(phase)
    k_weight = modulus * k_weight * np.sin(phase)

    return nn.Parameter(r_weight), nn.Parameter(i_weight), nn.Parameter(j_weight), nn.Parameter(k_weight)

  @staticmethod
  def get_weight_shape(in_channels, out_channels):
    """Construct weight shape based on the input/output channels."""
    return (in_channels, out_channels)

  def __repr__(self):
      return self.__class__.__name__ + '(' \
          + 'in_channels='      + str(self.in_channels) \
          + ', out_channels='   + str(self.out_channels) + ')'


In [None]:
%%time

class CustomQCNN(nn.Module):
  """Reproduction QCNN to validate quaternion convolution layer."""

  def __init__(self, in_channels, hidden_channels, out_features, kernel_size):
    super(CustomQCNN, self).__init__()

    self.conv_1 = QuaternionConv(in_channels, hidden_channels[0], kernel_size, 1)
    self.conv_2 = QuaternionConv(hidden_channels[0], hidden_channels[1], kernel_size, 1)

    self.pool_1 = nn.MaxPool2d(2, 2)
    self.dropout_1 = nn.Dropout(0.25)

    self.conv_3 = QuaternionConv(hidden_channels[1], hidden_channels[2], kernel_size, 1)
    self.conv_4 = QuaternionConv(hidden_channels[2], hidden_channels[3], kernel_size, 1)

    self.pool_2 = nn.MaxPool2d(2, 2)
    self.dropout_2 = nn.Dropout(0.25)

    self.fc_1 = QuaternionLin(12800, 512)
    self.fc_2 = nn.Linear(512, out_features)

    self.dropout_3 = nn.Dropout(0.5)
    self.sm = nn.Softmax(dim=1)

  def forward(self, x):
    x = F.relu(self.conv_1(x))
    x = F.relu(self.conv_2(x))
    x = self.pool_1(x)
    x = self.dropout_1(x)

    x = F.relu(self.conv_3(x))
    x = F.relu(self.conv_4(x))
    x = self.pool_2(x)
    x = self.dropout_2(x)

    x = torch.flatten(x, start_dim=1) 

    x = F.relu(self.fc_1(x))
    x = self.dropout_3(x)
    x = self.fc_2(x)
    x = self.sm(x)

    return x

# Model parameters
in_channels = 4
hidden_channels = [64, 128, 256, 512]
out_features = 10
kernel_size = (3, 3)

batch_size = 32

custom_qcnn = CustomQCNN(in_channels, hidden_channels, out_features, kernel_size)
custom_qcnn = custom_qcnn.cuda()

print("Number of trainable parameters: ", sum(p.numel() for p in custom_qcnn.parameters() if p.requires_grad))
summary(custom_qcnn, input_size=(in_channels, 32, 32), batch_size=batch_size, device=device.type)

Number of trainable parameters:  2032650
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
    QuaternionConv-1           [32, 64, 30, 30]              64
    QuaternionConv-2          [32, 128, 28, 28]             128
         MaxPool2d-3          [32, 128, 14, 14]               0
           Dropout-4          [32, 128, 14, 14]               0
    QuaternionConv-5          [32, 256, 12, 12]             256
    QuaternionConv-6          [32, 512, 10, 10]             512
         MaxPool2d-7            [32, 512, 5, 5]               0
           Dropout-8            [32, 512, 5, 5]               0
     QuaternionLin-9                  [32, 512]             512
          Dropout-10                  [32, 512]               0
           Linear-11                   [32, 10]           5,130
          Softmax-12                   [32, 10]               0
Total params: 6,602
Trainable params: 5,130
Non-trainable para

In [None]:
%%time
paper_qcnn_lin_layer = QuaternionLinear(12800, 512)

print("Number of trainable parameters: ", sum(p.numel() for p in paper_qcnn_lin_layer.parameters() if p.requires_grad))
paper_qcnn_lin_layer.i_weight[0][:50]

Number of trainable parameters:  1638912
CPU times: user 2.46 s, sys: 4.87 ms, total: 2.46 s
Wall time: 2.53 s


tensor([-9.8619e-03,  1.7768e-02, -3.9290e-03, -8.4551e-03, -9.6563e-03,
         6.3405e-03, -9.6673e-03, -8.2083e-03,  1.0748e-02,  1.6635e-03,
         7.7834e-03, -1.8080e-02,  2.0989e-04, -1.2731e-02, -6.1434e-03,
        -8.1282e-03,  1.1271e-02, -1.2184e-02,  1.2324e-03,  1.0997e-02,
        -4.6240e-03,  1.9024e-02,  3.5174e-03,  7.7954e-03,  6.4949e-03,
         2.2042e-02, -1.5830e-03, -1.0838e-02, -5.8510e-03,  3.0616e-03,
        -6.7673e-03, -1.7402e-02, -9.4927e-03, -1.1655e-02, -4.9278e-04,
         7.3662e-04, -1.3883e-02, -9.8667e-06,  9.0085e-03,  2.5610e-03,
        -9.9933e-03, -1.7828e-03,  6.2730e-03, -4.0444e-03, -7.5603e-04,
        -6.5449e-03, -2.2364e-03,  8.6866e-04, -3.7308e-04,  1.2813e-02],
       grad_fn=<SliceBackward0>)

In [None]:
%%time
custom_qcnn_lin_layer = QuaternionLin(12800, 512)

print("Number of trainable parameters: ", sum(p.numel() for p in custom_qcnn_lin_layer.parameters() if p.requires_grad))
custom_qcnn_lin_layer.i_weight[0][:50]

Number of trainable parameters:  1638912
CPU times: user 31.7 ms, sys: 0 ns, total: 31.7 ms
Wall time: 31.7 ms


tensor([ 4.2403e-05, -3.0373e-03,  2.4217e-04,  1.2262e-02,  4.8097e-04,
         3.4563e-04,  1.0125e-02,  4.6110e-05, -4.4688e-03,  2.1976e-03,
        -1.1161e-02,  1.7865e-03,  2.6552e-04, -2.0037e-03,  7.9006e-03,
         5.3779e-03, -5.1694e-03, -3.0157e-03,  9.0666e-03,  5.5432e-04,
         1.8890e-04, -2.3435e-03,  1.7509e-02,  3.2966e-03, -3.0224e-03,
        -7.9977e-04, -4.0333e-03,  1.6774e-03, -1.7125e-03,  4.8665e-04,
        -1.2629e-04,  3.5356e-03, -1.4928e-02,  1.4676e-02, -1.2352e-04,
        -3.9960e-03,  6.0097e-03, -2.1924e-04, -7.7298e-03,  9.5267e-04,
        -5.3146e-03, -3.2860e-03,  7.6674e-03,  4.6479e-03, -2.3704e-03,
         1.2056e-04,  8.2204e-03,  3.5477e-04, -6.8082e-03, -3.2168e-03],
       grad_fn=<SliceBackward0>)

In [None]:
%%time
num_epochs = 80
amount_of_trainings = 3

learning_rate = 0.0001
learning_rate_decay = 1e-6

batch_size = 32

custom_qcnn_accs = []
trainings_seed_excution_time = []

for training_seed in range(amount_of_trainings):
  print(f'Start training seed {training_seed + 1}')
  start_time_training_seed = time.time()

  train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)
  test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=2)

  custom_qcnn = CustomQCNN(in_channels, hidden_channels, out_features, kernel_size)
  custom_qcnn = custom_qcnn.cuda()

  optimizer = torch.optim.RMSprop(custom_qcnn.parameters(),lr=learning_rate, weight_decay=learning_rate_decay)
  criterion = nn.CrossEntropyLoss()

  for epoch in range(1, num_epochs):
    
    custom_qcnn.train()

    for index, (x_batch, y_batch) in enumerate(train_loader):
      zeros_channel = torch.zeros((x_batch.shape[0], 1, x_batch.shape[2], x_batch.shape[3]))
      x_batch = torch.cat([x_batch, zeros_channel], dim=1)

      # Check if the input size is correct
      check_input(x_batch)

      x_batch = x_batch.cuda()
      y_batch = y_batch.cuda()
      
      # Perform forward pass
      y_pred = custom_qcnn(x_batch)

      # Compute the loss
      loss = criterion(y_pred, y_batch)

      # Backpropagation
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

    if (epoch / 10).is_integer():
      print (f'Epoch [{epoch + 1}/{num_epochs}], Last loss: {loss.item():.4f}')

  with torch.no_grad():
      n_correct = 0
      n_samples = 0

      custom_qcnn.eval()

      for index, (x_batch, y_batch) in enumerate(test_loader):
        zeros_channel = torch.zeros((x_batch.shape[0], 1, x_batch.shape[2], x_batch.shape[3]))
        x_batch = torch.cat([x_batch, zeros_channel], dim=1)

        x_batch = x_batch.cuda()
        y_batch = y_batch.cuda()

        # Check if the input size is correct
        check_input(x_batch)

        # Perform forward pass
        y_pred = custom_qcnn(x_batch)

        _, predicted = torch.max(y_pred,1)
        n_samples += y_batch.size(0)
        n_correct += (predicted == y_batch).sum().item()

      acc = 100 * n_correct / n_samples
      custom_qcnn_accs.append(acc)
  
  elapsed_training_time = int(time.time() - start_time_training_seed)
  trainings_seed_excution_time.append(start_time_training_seed)

  print(f'Finished training seed {training_seed + 1}, accuracy of the network: {acc}%, elapsed time: {elapsed_training_time} sec')

print(f'Average accuracy over {amount_of_trainings}, {num_epochs} epochs each results in: {sum(custom_qcnn_accs) / amount_of_trainings}')

Start training seed 1
Epoch [11/80], Last loss: 1.9361
Epoch [21/80], Last loss: 1.6766
Epoch [31/80], Last loss: 1.5703
Epoch [41/80], Last loss: 1.8227
Epoch [51/80], Last loss: 1.5918
Epoch [61/80], Last loss: 1.5288
Epoch [71/80], Last loss: 1.6498
Finished training seed 1, accuracy of the network: 76.81%, elapsed time: 1778 sec
Start training seed 2
Epoch [11/80], Last loss: 1.7154
Epoch [21/80], Last loss: 1.7754
Epoch [31/80], Last loss: 1.5692
Epoch [41/80], Last loss: 1.5782
Epoch [51/80], Last loss: 1.6479
Epoch [61/80], Last loss: 1.4617
Epoch [71/80], Last loss: 1.5288
Finished training seed 2, accuracy of the network: 78.19%, elapsed time: 1747 sec
Start training seed 3
Epoch [11/80], Last loss: 1.6538
Epoch [21/80], Last loss: 1.7058
Epoch [31/80], Last loss: 1.5524
Epoch [41/80], Last loss: 1.6071
Epoch [51/80], Last loss: 1.5236
Epoch [61/80], Last loss: 1.5637
Epoch [71/80], Last loss: 1.5399
Finished training seed 3, accuracy of the network: 78.02%, elapsed time: 1747