In [1]:
import torch 
import torch.nn as nn
import os 

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

from torchinfo import summary

# The Pytorch Video Module Zoo

In [2]:
from pytorchvideo.models import x3d

model = x3d.create_x3d(
    input_channel=3,
    model_num_class=2,
    input_clip_length=4,
    input_crop_size=160,
    
)

In [3]:
from pytorchvideo.models import slowfast

# todo
model = slowfast.create_slowfast(
    input_channels=(3, 3), 
    model_num_class=2, 
    model_depth=50,
    norm=nn.BatchNorm3d,
    activation=nn.ReLU
)

In [4]:
from pytorchvideo.models import r2plus1d

model = r2plus1d.create_r2plus1d(
    input_channel=3,
    model_depth=50,
    model_num_class=2,
    norm=nn.BatchNorm3d,
    activation=nn.ReLU,
)

In [12]:
from pytorchvideo.models import csn

model = csn.create_csn(
    input_channel=3,
    model_depth=50, 
    norm=nn.BatchNorm3d,
    activation=nn.ReLU,
    model_num_class=2,
)

In [6]:
from pytorchvideo.models import resnet

model = resnet.create_resnet(
    input_channel=3,
    model_depth=50,
    norm=nn.BatchNorm3d,
    activation=nn.ReLU,
    model_num_class=1,
)

In [13]:
from torchinfo import summary

batch_size = 4

# summayer = summary(
#     model, 
#     input_size=(batch_size, 3, 8, 256, 256), 
#     col_names=["input_size", "output_size", "num_params", "mult_adds", "kernel_size", "trainable"],
    
#     )

summayer = summary(
    model,
    input_size=(batch_size, 3, 16, 224, 224),
    col_names=["input_size", "output_size", "num_params", "kernel_size"],
    depth=50
)

print(summayer)


Layer (type:depth-idx)                             Input Shape               Output Shape              Param #                   Kernel Shape
Net                                                [4, 3, 16, 224, 224]      [4, 2]                    --                        --
├─ModuleList: 1-1                                  --                        --                        --                        --
│    └─ResNetBasicStem: 2-1                        [4, 3, 16, 224, 224]      [4, 64, 16, 112, 112]     --                        --
│    │    └─Conv3d: 3-1                            [4, 3, 16, 224, 224]      [4, 64, 16, 112, 112]     28,224                    [3, 7, 7]
│    │    └─BatchNorm3d: 3-2                       [4, 64, 16, 112, 112]     [4, 64, 16, 112, 112]     128                       --
│    │    └─ReLU: 3-3                              [4, 64, 16, 112, 112]     [4, 64, 16, 112, 112]     --                        --
│    └─ResStage: 2-2                               [4, 64, 

# The TorchHub Module Zoo

In [8]:
model_name = 'slow_r50'
slow = torch.hub.load('facebookresearch/pytorchvideo', model_name, pretrained=True)

slow

Using cache found in /root/.cache/torch/hub/facebookresearch_pytorchvideo_main


Net(
  (blocks): ModuleList(
    (0): ResNetBasicStem(
      (conv): Conv3d(3, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=False)
      (norm): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): ReLU()
      (pool): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=[0, 1, 1], dilation=1, ceil_mode=False)
    )
    (1): ResStage(
      (res_blocks): ModuleList(
        (0): ResBlock(
          (branch1_conv): Conv3d(64, 256, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
          (branch1_norm): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (branch2): BottleneckBlock(
            (conv_a): Conv3d(64, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
            (norm_a): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act_a): ReLU()
            (conv_b): Conv3d(64, 64, kernel_size=(1, 3, 3), stride=(1, 

In [9]:
from torchvision.models.video import r3d_18 

model = r3d_18(pretrained=True)

model.fc



Linear(in_features=512, out_features=400, bias=True)

In [10]:
slow

Net(
  (blocks): ModuleList(
    (0): ResNetBasicStem(
      (conv): Conv3d(3, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=False)
      (norm): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): ReLU()
      (pool): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=[0, 1, 1], dilation=1, ceil_mode=False)
    )
    (1): ResStage(
      (res_blocks): ModuleList(
        (0): ResBlock(
          (branch1_conv): Conv3d(64, 256, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
          (branch1_norm): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (branch2): BottleneckBlock(
            (conv_a): Conv3d(64, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
            (norm_a): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act_a): ReLU()
            (conv_b): Conv3d(64, 64, kernel_size=(1, 3, 3), stride=(1, 

In [11]:
import pytorchvideo.layers as layers 

swish = layers.swish.Swish()
input_tensor = torch.zeros(2, 256, 4 ,14,14)
output = swish(input_tensor)

output.shape

torch.Size([2, 256, 4, 14, 14])