# Group and Shuffle Convolutions
https://pytorch.org/hub/pytorch_vision_shufflenet_v2/

https://towardsdatascience.com/a-comprehensive-introduction-to-different-types-of-convolutions-in-deep-learning-669281e58215

In [1]:
from fastai.basics import *

In [2]:
def conv1x1(in_channels, out_channels, groups=1, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=1, groups=groups, stride=stride, bias=False)

In [107]:
in_channels = 6
img_size = 5
groups = 3
out_channels = 6
vals = [x for x in range(1 * in_channels * img_size * img_size)] #torch.rand((1, in_channels, img_size, img_size))
data = torch.tensor(vals, dtype=torch.float32).view((1, in_channels, img_size, img_size)) 
data.shape, data

(torch.Size([1, 6, 5, 5]),
 tensor([[[[  0.,   1.,   2.,   3.,   4.],
           [  5.,   6.,   7.,   8.,   9.],
           [ 10.,  11.,  12.,  13.,  14.],
           [ 15.,  16.,  17.,  18.,  19.],
           [ 20.,  21.,  22.,  23.,  24.]],
 
          [[ 25.,  26.,  27.,  28.,  29.],
           [ 30.,  31.,  32.,  33.,  34.],
           [ 35.,  36.,  37.,  38.,  39.],
           [ 40.,  41.,  42.,  43.,  44.],
           [ 45.,  46.,  47.,  48.,  49.]],
 
          [[ 50.,  51.,  52.,  53.,  54.],
           [ 55.,  56.,  57.,  58.,  59.],
           [ 60.,  61.,  62.,  63.,  64.],
           [ 65.,  66.,  67.,  68.,  69.],
           [ 70.,  71.,  72.,  73.,  74.]],
 
          [[ 75.,  76.,  77.,  78.,  79.],
           [ 80.,  81.,  82.,  83.,  84.],
           [ 85.,  86.,  87.,  88.,  89.],
           [ 90.,  91.,  92.,  93.,  94.],
           [ 95.,  96.,  97.,  98.,  99.]],
 
          [[100., 101., 102., 103., 104.],
           [105., 106., 107., 108., 109.],
           [110

In [86]:
model = conv1x1(in_channels, out_channels, groups=groups)

In [91]:
res = model(data)
res.shape

torch.Size([1, 6, 5, 5])

In [5]:
def channel_shuffle(x, groups):
    # type: (torch.Tensor, int) -> torch.Tensor
    batchsize, num_channels, height, width = x.data.size()
    channels_per_group = num_channels // groups

    # reshape
    x = x.view(batchsize, groups,
               channels_per_group, height, width)

    x = torch.transpose(x, 1, 2).contiguous()

    # flatten
    x = x.view(batchsize, -1, height, width)

    return x

In [10]:
??torch.Tensor.contiguous

[0;31mDocstring:[0m
contiguous() -> Tensor

Returns a contiguous tensor containing the same data as :attr:`self` tensor. If
:attr:`self` tensor is contiguous, this function returns the :attr:`self`
tensor.
[0;31mType:[0m      method_descriptor


In [6]:
time_data = torch.rand((1000, 600, 50, 50)).to(device="cuda")

In [7]:
%%time
shuffled_data = channel_shuffle(time_data, 10)
shuffled_data.shape

CPU times: user 3.4 ms, sys: 5.16 ms, total: 8.57 ms
Wall time: 8.63 ms


torch.Size([1000, 600, 50, 50])

In [47]:
grouped_res = data.view(1, groups, img_size, img_size)
grouped_res.shape

NameError: name 'data' is not defined

## Speed of Groups

In [4]:
import timeit

In [5]:
in_size = 10000
out_size = 2000
bs = 1024

In [6]:
convs = [conv1x1(in_size, out_size, g) for g in [1, 2, 5, 10]]

In [7]:
for c in convs:
    print(f"Groups: {c.groups}, Weights: {c.weight.shape}")

Groups: 1, Weights: torch.Size([2000, 10000, 1, 1])
Groups: 2, Weights: torch.Size([2000, 5000, 1, 1])
Groups: 5, Weights: torch.Size([2000, 2000, 1, 1])
Groups: 10, Weights: torch.Size([2000, 1000, 1, 1])


In [8]:
data = torch.randn((out_size, in_size, 1, 1))

In [19]:
for c in convs:
    print(c.groups, "::", timeit.timeit(lambda: c(data), number=1))

1 :: 6.357881048694253
2 :: 3.564614233095199
5 :: 0.9097015741281211
10 :: 0.8136862958781421


In [30]:
in_size = 2
out_size = 4

In [63]:
a = conv1x1(in_size, out_size)

In [64]:
b = conv1x1(in_size, out_size, groups=2)

In [65]:
a.weight.shape, a.weight

(torch.Size([4, 2, 1, 1]),
 Parameter containing:
 tensor([[[[ 0.1615]],
 
          [[ 0.2314]]],
 
 
         [[[-0.2153]],
 
          [[-0.0568]]],
 
 
         [[[-0.0012]],
 
          [[ 0.2334]]],
 
 
         [[[-0.1612]],
 
          [[ 0.1873]]]], requires_grad=True))

In [55]:
b.weight.shape, b.weight

(torch.Size([4, 1, 1, 1]),
 Parameter containing:
 tensor([[[[-0.9219]]],
 
 
         [[[-0.0821]]],
 
 
         [[[ 0.7273]]],
 
 
         [[[-0.3001]]]], requires_grad=True))

In [60]:
def easy_filter_vals_(c):
    w_val = 1
    groups = c.groups
    for i in range(out_size):
        for j in range(in_size // groups):
            c.weight[i,j,0,0] = w_val
            w_val += 1

In [66]:
easy_filter_vals_(a)
easy_filter_vals_(b)

In [79]:
a.weight

Parameter containing:
tensor([[[[1.]],

         [[2.]]],


        [[[3.]],

         [[4.]]],


        [[[5.]],

         [[6.]]],


        [[[7.]],

         [[8.]]]], grad_fn=<CopySlices>)

In [69]:
b.weight

Parameter containing:
tensor([[[[1.]]],


        [[[2.]]],


        [[[3.]]],


        [[[4.]]]], grad_fn=<CopySlices>)

In [90]:
#data = torch.ones((1,in_size,1,1)); data
data = torch.tensor([1., 2.])
data = data[None, :, None, None]

In [91]:
a(data)

tensor([[[[ 5.]],

         [[11.]],

         [[17.]],

         [[23.]]]], grad_fn=<MkldnnConvolutionBackward>)

In [92]:
b(data)

tensor([[[[1.]],

         [[2.]],

         [[6.]],

         [[8.]]]], grad_fn=<MkldnnConvolutionBackward>)