In [1]:
# Video encoder

import sys
from pathlib import Path

import numpy as np
import torch

from torch.utils.data import DataLoader
from torchvision.transforms import Compose
from einops.layers.torch import Rearrange

In [2]:
# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [3]:
# Downlaod a sample video
! wget https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp
! chmod +x yt-dlp

! ./yt-dlp https://www.youtube.com/watch?v=rEq1Z0bjdwc -f "best[height<=720]" -o video.mp4 --downloader-args "ffmpeg:-filter:v fps=25 -vcodec h264"

--2023-05-23 15:51:04--  https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp
Resolving github.com (github.com)... 140.82.114.3
Connecting to github.com (github.com)|140.82.114.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/yt-dlp/yt-dlp/releases/download/2023.03.04/yt-dlp [following]
--2023-05-23 15:51:04--  https://github.com/yt-dlp/yt-dlp/releases/download/2023.03.04/yt-dlp
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/307260205/80ae136d-529c-4c5f-bac4-be687c782b28?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230523%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230523T225104Z&X-Amz-Expires=300&X-Amz-Signature=da57702cdbfc8b802f127c8e9d089a86a608b87269cf054dfd2ed9635c51f6b6&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=307260205&response-content-dis

In [4]:
# Clone relevant repositories
! git clone https://github.com/moabitcoin/ig65m-pytorch.git ig65m
! git clone https://github.com/antoine77340/S3D_HowTo100M.git s3d

Cloning into 'ig65m'...
remote: Enumerating objects: 369, done.[K
remote: Counting objects: 100% (72/72), done.[K
remote: Compressing objects: 100% (39/39), done.[K
remote: Total 369 (delta 32), reused 53 (delta 31), pack-reused 297[K
Receiving objects: 100% (369/369), 20.56 MiB | 17.71 MiB/s, done.
Resolving deltas: 100% (220/220), done.
Cloning into 's3d'...
remote: Enumerating objects: 38, done.[K
remote: Counting objects: 100% (38/38), done.[K
remote: Compressing objects: 100% (31/31), done.[K
remote: Total 38 (delta 13), reused 21 (delta 7), pack-reused 0[K
Unpacking objects: 100% (38/38), 14.43 KiB | 307.00 KiB/s, done.


In [5]:
# Download model weights
! wget https://www.rocq.inria.fr/cluster-willow/amiech/howto100m/s3d_howto100m.pth
! wget https://www.rocq.inria.fr/cluster-willow/amiech/howto100m/s3d_dict.npy

--2023-05-23 15:52:31--  https://www.rocq.inria.fr/cluster-willow/amiech/howto100m/s3d_howto100m.pth
Resolving www.rocq.inria.fr (www.rocq.inria.fr)... 128.93.96.7
Connecting to www.rocq.inria.fr (www.rocq.inria.fr)|128.93.96.7|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 125031128 (119M)
Saving to: ‘s3d_howto100m.pth’


2023-05-23 15:55:14 (764 KB/s) - ‘s3d_howto100m.pth’ saved [125031128/125031128]

--2023-05-23 15:55:14--  https://www.rocq.inria.fr/cluster-willow/amiech/howto100m/s3d_dict.npy
Resolving www.rocq.inria.fr (www.rocq.inria.fr)... 128.93.96.7
Connecting to www.rocq.inria.fr (www.rocq.inria.fr)|128.93.96.7|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5830040 (5.6M)
Saving to: ‘s3d_dict.npy’


2023-05-23 15:55:18 (2.11 MB/s) - ‘s3d_dict.npy’ saved [5830040/5830040]



In [6]:
# Extract R(2+1)D-IG65M features and load them into python
! cd ig65m/ && python -m ig65m.cli extract ../video.mp4 ../video.npy --frame-size 112

if65m_features = np.load("video.npy")

🐎 Running on GPU(s)
Downloading: "https://github.com/moabitcoin/ig65m-pytorch/releases/download/v1.0.0/r2plus1d_34_clip32_ig65m_from_scratch-449a7af9.pth" to /home/william/.cache/torch/hub/checkpoints/r2plus1d_34_clip32_ig65m_from_scratch-449a7af9.pth
100%|████████████████████████████████████████| 243M/243M [00:03<00:00, 79.2MB/s]
    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.
100%|███████████████████████████████████████████| 13/13 [00:10<00:00,  1.29it/s]
🍪 Done


In [7]:
# Extract S3D_HowTo100M features
from s3d.s3dg import S3D
# Instantiate the model
s3d_net = S3D('s3d_dict.npy', 512)
# Load the model weights
s3d_net.load_state_dict(torch.load('s3d_howto100m.pth'))
s3d_net.eval()
s3d_net = s3d_net.to(device)

In [8]:
# Define video processing pipeline
from ig65m.ig65m.datasets import VideoDataset
from ig65m.ig65m.transforms import ToTensor, Resize, Normalize

transform_how100 = Compose([
    ToTensor(),
    Rearrange("t h w c -> c t h w"),
    Resize([224, 224]),
])

vd = VideoDataset(Path("video.mp4"), clip=32, transform=transform_how100)
vd = DataLoader(vd, batch_size=1, num_workers=1, shuffle=False)

In [9]:
features = []
for _input in vd:
    s3d_features = s3d_net(_input.to(device))["video_embedding"].data.cpu().numpy()
    features.append(s3d_features.flatten())
s3d_features = np.stack(features)

In [12]:
_input.shape

torch.Size([1, 3, 32, 224, 224])

In [10]:
# Check the expected feature shape
assert if65m_features.shape == (13, 512)
assert s3d_features.shape == (13, 512)

In [None]:
# Remove the downlaoded files and repos
! rm -rf video.mp4 video.npy yt-dlp s3d_howto100m.pth s3d_dict.npy ig65m s3d