# Use pretrained backbone to get video embeddings

In [None]:
import torch
import torchvision
import pytorchvideo

In [None]:
torch.cuda.empty_cache()
#device = "cpu"
device = "cuda:1" if torch.cuda.is_available() else "cpu"
print("Device: " + device)
print(f"Devices count: {torch.cuda.device_count()}")

In [None]:
from pathlib import Path

In [None]:
import json
import pandas
import numpy

In [None]:
from tqdm import tqdm

In [None]:
from misc.utils_mvit import *

In [None]:
data_path = Path("./data")
videos_path = data_path / "train_dataset"
metadata_path = data_path / "train.csv"

In [None]:
filenames = walk_directory(videos_path)
filenames = sorted(filenames)

## Model

In [None]:
video_transform = VideoTransform()

In [None]:
#model_name = "mvit_v1_b"
model_name = "mvit_v2_s"
model = getattr(torchvision.models.video, model_name)(weights=torchvision.models.video.MViT_V2_S_Weights.KINETICS400_V1)

In [None]:
model = model.to(device)
model = model.eval()

In [None]:
embeddings, durations = encode_videos(
    videos_path,
    tqdm(filenames),
    model,
    video_transform,
    torchvision.models.video.MViT_V1_B_Weights.KINETICS400_V1.transforms(),
    device,
    batch_size=32
)

## Save embeddings

In [None]:
import os

In [None]:
embeddings_path = data_path / model_name
os.makedirs(embeddings_path, exist_ok=True)

In [None]:
embeddings_path_torch = embeddings_path / "embeddings.pt"
embeddings_path_numpy = embeddings_path / "embeddings"
embeddings_uuid_path  = embeddings_path / "embeddings_uuid.csv"

In [None]:
torch.save(embeddings, embeddings_path_torch)
numpy.savez(embeddings_path_numpy, embeddings.numpy())

In [None]:
embeddings_uuid = pandas.DataFrame([filename.split('.')[0] for filename in filenames], columns=["uuid"])

In [None]:
embeddings_uuid.to_csv(embeddings_uuid_path, index=False)