In [None]:
!pip3 install git+https://github.com/arogozhnikov/einops

Collecting git+https://github.com/arogozhnikov/einops
  Cloning https://github.com/arogozhnikov/einops to /tmp/pip-req-build-ct7p7ar7
  Running command git clone --filter=blob:none --quiet https://github.com/arogozhnikov/einops /tmp/pip-req-build-ct7p7ar7
  Resolved https://github.com/arogozhnikov/einops to commit 5906eb80cebc2e60b4355d4d68f7b5dc26783e45
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


# Separable convolutions

![](https://miro.medium.com/v2/resize:fit:4800/format:webp/1*o3mKhG3nHS-1dWa_plCeFw.png)

In [None]:
import torch
from torch import nn

In [None]:
class SeparableConv2d(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, bias=False):
      """
      Used data from https://iq.opengenus.org/separable-convolution/
      """
      super(SeparableConv2d, self).__init__()
      self.depthwise = nn.Conv2d(
          in_channels,
          in_channels,
          kernel_size,
          groups=in_channels
      )
      self.pointwise = nn.Conv2d(
          in_channels,
          out_channels,
          1
      )

  def forward(self, x):
      out = self.depthwise(x)
      out = self.pointwise(out)
      return out

In [None]:
c1 = SeparableConv2d(3, 128, 3)
t = torch.rand(1, 3, 7, 7)
print(t.shape)
print(c1(t).shape)

torch.Size([1, 3, 7, 7])
torch.Size([1, 128, 5, 5])


# R(2+1) conv

https://www.tensorflow.org/tutorials/video/video_classification

https://paperswithcode.com/method/2-1-d-convolution

![alt text](https://drive.google.com/uc?export=view&id=1DDI_5xclb7wb1V2vtDzgoAKm2psjd1qb)

In [1]:
import torch
from torch import nn as nn

In [19]:
class R2_and1_conv(torch.nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size):
    super().__init__()
    self.in_channels = in_channels
    self.out_channels = out_channels
    self.kernel_size = kernel_size

    # first step: [c, t, h, w] -> [c, t, h1, w1]
    self.conv2d = nn.Conv3d(in_channels, in_channels, (1, kernel_size[1], kernel_size[2]))
    print(self.conv2d)
    # second step: [c, t, h1, w1] -> [c, t2, h1, w1]
    self.conv1d = nn.Conv3d(in_channels, out_channels, (kernel_size[0], 1, 1))
    print(self.conv1d)

  def forward(self, x):
    # [c, t, h, w]
    x = self.conv2d(x)
    print(x.shape)
    x = self.conv1d(x)
    print(x.shape)
    return x

In [7]:
# [c, t, h, w]
x = torch.rand((1, 10, 5, 5))
conv3d = nn.Conv3d(in_channels=1, out_channels=1, kernel_size=3)

In [4]:
out_x = conv3d(x)
out_x.shape

torch.Size([1, 8, 3, 3])

In [20]:
out_x2 = R2_and1_conv(1, 1, [3, 3, 3])(x)
out_x2.shape

Conv3d(1, 1, kernel_size=(1, 3, 3), stride=(1, 1, 1))
Conv3d(1, 1, kernel_size=(3, 1, 1), stride=(1, 1, 1))
torch.Size([1, 10, 3, 3])
torch.Size([1, 8, 3, 3])


torch.Size([1, 8, 3, 3])

#Temporal attention

[GLTR](https://openaccess.thecvf.com/content_ICCV_2019/papers/Li_Global-Local_Temporal_Representations_for_Video_Person_Re-Identification_ICCV_2019_paper.pdf)

In [21]:
import torch
from torch import nn as nn

In [22]:
T = 100 #10frames
d = 20 #inner dim size
input_features = torch.rand((d, T))

In [65]:
class GLRT(nn.Module):
  def __init__(self, d):
    super().__init__()
    self.DTP = nn.ModuleList([
        nn.Sequential(
            nn.Conv1d(
                d,
                d,
                3,
                dilation=2**k,
                padding=(2**(k + 1) + 1) // 2
            )
        )
        for k in range(3)
    ])
    self.conv1 = nn.Conv1d(d, d, 3, padding=1)
    self.conv3 = nn.Conv1d(3*d, 3*d, 3, padding=1)
    print(self.DTP)
    self.softmax = nn.Softmax(dim=1)
    self.pool = nn.AdaptiveAvgPool1d((1,))


  def forward(self, f):
    F = torch.zeros((3, f.shape[0], f.shape[1]))
    for i in range(3):
      F[i] = self.DTP[i].forward(f)
    print(F.shape)
    F = F.reshape((3*f.shape[0],f.shape[1]))
    print(F.shape)
    B = self.conv3(F)
    C = self.conv3(F)
    _F = self.conv3(F)
    BC = B.t() @ C
    print(BC.shape)
    BC = self.softmax(BC)
    M = _F @ BC
    M = self.conv3(M)
    print(M.shape)
    F += M
    print(F.shape)
    F = self.pool(F)
    return F

In [66]:
print(input_features.shape)
GLRT(d)(input_features).shape

torch.Size([20, 100])
ModuleList(
  (0): Sequential(
    (0): Conv1d(20, 20, kernel_size=(3,), stride=(1,), padding=(1,))
  )
  (1): Sequential(
    (0): Conv1d(20, 20, kernel_size=(3,), stride=(1,), padding=(2,), dilation=(2,))
  )
  (2): Sequential(
    (0): Conv1d(20, 20, kernel_size=(3,), stride=(1,), padding=(4,), dilation=(4,))
  )
)
torch.Size([3, 20, 100])
torch.Size([60, 100])
torch.Size([100, 100])
torch.Size([60, 100])
torch.Size([60, 100])


torch.Size([60, 1])