# 装载谷歌云端硬盘

In [1]:
# 在谷歌coalb运行
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


# PyTorchVideo

## Install PyTorchVideo

In [2]:
!pip install pytorchvideo

Collecting pytorchvideo
  Downloading pytorchvideo-0.1.3.tar.gz (128 kB)
[K     |████████████████████████████████| 128 kB 5.4 MB/s 
[?25hCollecting fvcore
  Downloading fvcore-0.1.5.post20210924.tar.gz (49 kB)
[K     |████████████████████████████████| 49 kB 5.7 MB/s 
[?25hCollecting av
  Downloading av-8.0.3-cp37-cp37m-manylinux2010_x86_64.whl (37.2 MB)
[K     |████████████████████████████████| 37.2 MB 32 kB/s 
[?25hCollecting parameterized
  Downloading parameterized-0.8.1-py2.py3-none-any.whl (26 kB)
Collecting iopath
  Downloading iopath-0.1.9-py3-none-any.whl (27 kB)
Collecting yacs>=0.1.6
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting pyyaml>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 37.9 MB/s 
Collecting portalocker
  Downloading portalocker-2.3.2-py2.py3-none-any.whl (15 kB)
Building wheels for collected packages: pytorchvideo, fvcore
  Building wheel for pytorchvideo (setup.py) 

## Imports

In [None]:
import torch
import json
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample,
    UniformCropVideo
)
from typing import Dict

import torchvision

  "The _functional_video module is deprecated. Please use the functional module instead."
  "The _transforms_video module is deprecated. Please use the transforms module instead."


## Load model

In [None]:
# Device on which to run the model
# Set to cuda to load on GPU
# Set to cpu to load on CPU
device = "cuda"

# Pick a pretrained model and load the pretrained weights
model_name = "slowfast_r50"
# This is a bug introduced in pytorch 1.9
# 这个bug似乎只会出现在colab上
# https://stackoverflow.com/questions/68901236/urllib-error-httperror-http-error-403-rate-limit-exceeded-when-loading-resnet1
torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
# 代码默认主分支是master，github最近的默认主分支都从master迁移到了main
model = torch.hub.load("facebookresearch/pytorchvideo:main", model=model_name, pretrained=True)

# Set to eval mode and move to desired device
model = model.to(device)
model = model.eval()

Downloading: "https://github.com/facebookresearch/pytorchvideo/archive/main.zip" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/pytorchvideo/model_zoo/kinetics/SLOWFAST_8x8_R50.pyth" to /root/.cache/torch/hub/checkpoints/SLOWFAST_8x8_R50.pyth


  0%|          | 0.00/264M [00:00<?, ?B/s]

## Setup labels

In [None]:
!wget https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json

--2021-10-09 07:45:41--  https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.22.75.142, 104.22.74.142, 172.67.9.4, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.22.75.142|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10326 (10K) [text/plain]
Saving to: ‘kinetics_classnames.json’


2021-10-09 07:45:42 (80.4 MB/s) - ‘kinetics_classnames.json’ saved [10326/10326]



In [None]:
with open("kinetics_classnames.json", "r") as f:
    kinetics_classnames = json.load(f)

# Create an id to label name mapping
kinetics_id_to_classname = {}
for k, v in kinetics_classnames.items():
    kinetics_id_to_classname[v] = str(k).replace('"', "")

## Input transform

In [None]:
####################
# SlowFast transform
####################

side_size = 256
mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]
crop_size = 256
num_frames = 32
sampling_rate = 2
frames_per_second = 30
alpha = 4

class PackPathway(torch.nn.Module):
    """
    Transform for converting video frames as a list of tensors.
    """
    def __init__(self):
        super().__init__()

    def forward(self, frames: torch.Tensor):
        fast_pathway = frames
        # Perform temporal sampling from the fast pathway.
        slow_pathway = torch.index_select(
            frames,
            1,
            torch.linspace(
                0, frames.shape[1] - 1, frames.shape[1] // alpha
            ).long(),
        )
        frame_list = [slow_pathway, fast_pathway]
        return frame_list

transform =  ApplyTransformToKey(
    key="video",
    transform=Compose(
        [
            UniformTemporalSubsample(num_frames),
            Lambda(lambda x: x/255.0),
            NormalizeVideo(mean, std),
            ShortSideScale(
                size=side_size
            ),
            CenterCropVideo(crop_size),
            PackPathway()
        ]
    ),
)

# The duration of the input clip is also specific to the model.
clip_duration = (num_frames * sampling_rate)/frames_per_second

## Load an example video

In [None]:
# Download the example video file
!wget https://dl.fbaipublicfiles.com/pytorchvideo/projects/archery.mp4

--2021-10-09 07:46:45--  https://dl.fbaipublicfiles.com/pytorchvideo/projects/archery.mp4
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.22.75.142, 172.67.9.4, 104.22.74.142, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.22.75.142|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 549197 (536K) [video/mp4]
Saving to: ‘archery.mp4’


2021-10-09 07:46:47 (722 KB/s) - ‘archery.mp4’ saved [549197/549197]



In [None]:
# Load the example video
video_path = "archery.mp4"

# Select the duration of the clip to load by specifying the start and end duration
# The start_sec should correspond to where the action occurs in the video
start_sec = 0
end_sec = start_sec + clip_duration

# Initialize an EncodedVideo helper class
video = EncodedVideo.from_path(video_path)

# Load the desired clip
video_data = video.get_clip(start_sec=start_sec, end_sec=end_sec)

# Apply a transform to normalize the video input
video_data = transform(video_data)

# Move the inputs to the desired device
inputs = video_data["video"]
inputs = [i.to(device)[None, ...] for i in inputs]

## Get model prediction

In [None]:
# Pass the input clip through the model
preds = model(inputs)

In [None]:
# Get the predicted classes
post_act = torch.nn.Softmax(dim=1)
preds = post_act(preds)
pred_classes = preds.topk(k=5).indices

# Map the predicted classes to the label names
pred_class_names = [kinetics_id_to_classname[int(i)] for i in pred_classes[0]]
print("Predicted labels: %s" % ", ".join(pred_class_names))

Predicted labels: archery, throwing axe, playing paintball, disc golfing, riding or walking with horse


# MMAction2

## Install

In [8]:
!pip install mmcv

Collecting mmcv
  Downloading mmcv-1.3.14.tar.gz (324 kB)
[?25l[K     |█                               | 10 kB 25.2 MB/s eta 0:00:01[K     |██                              | 20 kB 19.2 MB/s eta 0:00:01[K     |███                             | 30 kB 10.3 MB/s eta 0:00:01[K     |████                            | 40 kB 8.3 MB/s eta 0:00:01[K     |█████                           | 51 kB 4.8 MB/s eta 0:00:01[K     |██████                          | 61 kB 5.4 MB/s eta 0:00:01[K     |███████                         | 71 kB 5.9 MB/s eta 0:00:01[K     |████████                        | 81 kB 6.6 MB/s eta 0:00:01[K     |█████████                       | 92 kB 6.2 MB/s eta 0:00:01[K     |██████████▏                     | 102 kB 5.4 MB/s eta 0:00:01[K     |███████████▏                    | 112 kB 5.4 MB/s eta 0:00:01[K     |████████████▏                   | 122 kB 5.4 MB/s eta 0:00:01[K     |█████████████▏                  | 133 kB 5.4 MB/s eta 0:00:01[K     |█████████

In [10]:
!pip install git+https://github.com/open-mmlab/mim.git
!mim install mmaction2

Collecting git+https://github.com/open-mmlab/mim.git
  Cloning https://github.com/open-mmlab/mim.git to /tmp/pip-req-build-ckqg0_12
  Running command git clone -q https://github.com/open-mmlab/mim.git /tmp/pip-req-build-ckqg0_12
Collecting colorama
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Collecting model-index
  Downloading model_index-0.1.11-py3-none-any.whl (34 kB)
Collecting ordered-set
  Downloading ordered-set-4.0.2.tar.gz (10 kB)
Building wheels for collected packages: openmim, ordered-set
  Building wheel for openmim (setup.py) ... [?25l[?25hdone
  Created wheel for openmim: filename=openmim-0.1.5-py2.py3-none-any.whl size=46666 sha256=5b08d538ac5056306c4b932824c1d422bc2a65dd63bed24b0b812164079e3ea5
  Stored in directory: /tmp/pip-ephem-wheel-cache-9lg7rpy6/wheels/c3/19/91/68ae39ecb699cd4626f6984662f71231a46bfa60cf5bb94631
  Building wheel for ordered-set (setup.py) ... [?25l[?25hdone
  Created wheel for ordered-set: filename=ordered_set-4.0.2-py2.py3-none-

In [6]:
# 将mmaction2下载到云端硬盘中
# 地址修改为运行时的地址
!git clone https://github.com/open-mmlab/mmaction2.git /content/gdrive/MyDrive/ColabNotebooks/benchmarking_pytorchvideo_and_mmaction2/mmaction2

Cloning into '/content/gdrive/MyDrive/ColabNotebooks/benchmarking_pytorchvideo_and_mmaction2/mmaction2'...
remote: Enumerating objects: 13952, done.[K
remote: Counting objects: 100% (207/207), done.[K
remote: Compressing objects: 100% (162/162), done.[K
remote: Total 13952 (delta 57), reused 160 (delta 45), pack-reused 13745[K
Receiving objects: 100% (13952/13952), 44.02 MiB | 10.78 MiB/s, done.
Resolving deltas: 100% (10111/10111), done.
Checking out files: 100% (909/909), done.


In [13]:
# 验证安装
import torch
from mmaction.apis import init_recognizer, inference_recognizer

# 云端硬盘中的路径
# config_file为mmaction2的下载地址
config_file = '/content/gdrive/MyDrive/ColabNotebooks/benchmarking_pytorchvideo_and_mmaction2/mmaction2/configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py'
device = 'cuda:0' # 或 'cpu'
device = torch.device(device)

model = init_recognizer(config_file, device=device)
# 进行演示视频的推理
inference_recognizer(model, '/content/gdrive/MyDrive/ColabNotebooks/benchmarking_pytorchvideo_and_mmaction2/mmaction2/demo/demo.mp4')

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[(128, 67.84383),
 (398, 57.854355),
 (6, 57.40088),
 (276, 49.40184),
 (363, 49.26752)]

## Preparation

In [34]:
# 从模型库中下载并放到checkpoints文件下
# 后面为运行时具体的地址
!wget https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth -P /content/gdrive/MyDrive/ColabNotebooks/benchmarking_pytorchvideo_and_mmaction2/mmaction2/checkpoints/

--2021-10-09 09:12:47--  https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth
Resolving download.openmmlab.com (download.openmmlab.com)... 47.252.96.35
Connecting to download.openmmlab.com (download.openmmlab.com)|47.252.96.35|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 97579339 (93M) [application/octet-stream]
Saving to: ‘/content/gdrive/MyDrive/ColabNotebooks/benchmarking_pytorchvideo_and_mmaction2/mmaction2/checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’


2021-10-09 09:13:01 (7.63 MB/s) - ‘/content/gdrive/MyDrive/ColabNotebooks/benchmarking_pytorchvideo_and_mmaction2/mmaction2/checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’ saved [97579339/97579339]



## Imports

In [37]:
import os
# 运行时目录改为mmaction2所在的目录
os.environ['MMACTION2'] = '/content/gdrive/MyDrive/ColabNotebooks/benchmarking_pytorchvideo_and_mmaction2/mmaction2'
os.chdir('/content/gdrive/MyDrive/ColabNotebooks/benchmarking_pytorchvideo_and_mmaction2/mmaction2')

In [38]:
import torch

from mmaction.apis import init_recognizer, inference_recognizer

## Load model

In [39]:
config_file = 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py'
# 从模型库中下载检测点，并把它放到 `checkpoints/` 文件夹下
checkpoint_file = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'

# 指定设备
device = 'cuda:0' # or 'cpu'
device = torch.device(device)

 # 根据配置文件和检查点来建立模型
model = init_recognizer(config_file, checkpoint_file, device=device)

Use load_from_local loader


## Get model prediction

In [44]:
# 测试单个视频并显示其结果
video = 'demo/demo.mp4'
# labels = 'tools/data/kinetics/label_map_k400.txt'
results = inference_recognizer(model, video)

In [45]:
# 显示结果
labels = open('tools/data/kinetics/label_map_k400.txt').readlines()
labels = [x.strip() for x in labels]
results = [(labels[k[0]], k[1]) for k in results]

print(f'The top-5 labels with corresponding scores are:')
for result in results:
    print(f'{result[0]}: ', result[1])

The top-5 labels with corresponding scores are:
arm wrestling:  29.616438
rock scissors paper:  10.754841
shaking hands:  9.908401
clapping:  9.189911
massaging feet:  8.305306
