In [45]:
import os

import av
import cv2
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import albumentations as A

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from transformers import AutoProcessor, AutoModel

In [46]:
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

In [47]:
batch_size = 4
root_dir = 'UCF-101/'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

# Load pretrained transformer model

In [48]:
processor = AutoProcessor.from_pretrained("microsoft/xclip-base-patch32")
model = AutoModel.from_pretrained("microsoft/xclip-base-patch32")
model.to(device)

XCLIPModel(
  (text_model): XCLIPTextTransformer(
    (embeddings): XCLIPTextEmbeddings(
      (token_embedding): Embedding(49408, 512)
      (position_embedding): Embedding(77, 512)
    )
    (encoder): XCLIPEncoder(
      (layers): ModuleList(
        (0-11): 12 x XCLIPEncoderLayer(
          (self_attn): XCLIPAttention(
            (k_proj): Linear(in_features=512, out_features=512, bias=True)
            (v_proj): Linear(in_features=512, out_features=512, bias=True)
            (q_proj): Linear(in_features=512, out_features=512, bias=True)
            (out_proj): Linear(in_features=512, out_features=512, bias=True)
          )
          (layer_norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (mlp): XCLIPMLP(
            (activation_fn): QuickGELUActivation()
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
          )
          (layer_norm2): LayerNorm((512,), eps

# Dataset preparation

In [49]:
df = pd.read_csv("data\kinetics_700\dancing.csv")
df["video_path"] = "data/kinetics_700/clips/" + df["youtube_id"] + ".mp4"
df.head()

Unnamed: 0.1,Unnamed: 0,label,youtube_id,time_start,time_end,split,video_path
0,23948,belly dancing,uhX8rmHFLaY,116,126,train,data/kinetics_700/clips/uhX8rmHFLaY.mp4
1,23843,belly dancing,-q0Wpf0WThk,317,327,train,data/kinetics_700/clips/-q0Wpf0WThk.mp4
2,24142,belly dancing,P9mOEmlCEXY,62,72,train,data/kinetics_700/clips/P9mOEmlCEXY.mp4
3,23467,belly dancing,mm5S2ftbb-k,32,42,train,data/kinetics_700/clips/mm5S2ftbb-k.mp4
4,23982,belly dancing,TCbfIBb87hQ,134,144,train,data/kinetics_700/clips/TCbfIBb87hQ.mp4


In [50]:
print("Before:", df.shape[0])
for i, row in df.iterrows():
    if not os.path.exists(row['video_path']):
        print(row['video_path'])
        df.drop(i, inplace=True)
df.reset_index(drop=True, inplace=True)
print("After:", df.shape[0])

Before: 3000
data/kinetics_700/clips/uhX8rmHFLaY.mp4
data/kinetics_700/clips/P9mOEmlCEXY.mp4
data/kinetics_700/clips/mm5S2ftbb-k.mp4
data/kinetics_700/clips/TCbfIBb87hQ.mp4
data/kinetics_700/clips/1envheMuhqk.mp4
data/kinetics_700/clips/EsywHXAlAvA.mp4
data/kinetics_700/clips/WLRc2qJKj3I.mp4
data/kinetics_700/clips/PL3ex5IAQNw.mp4
data/kinetics_700/clips/3b0-rDnm7lc.mp4
data/kinetics_700/clips/n8OjlDQ8bAg.mp4
data/kinetics_700/clips/7g-zIlZy4JM.mp4
data/kinetics_700/clips/mK05v3IMAf8.mp4
data/kinetics_700/clips/ID7DG2Spc10.mp4
data/kinetics_700/clips/TwSgem18NZw.mp4
data/kinetics_700/clips/LL0D5nPYrE0.mp4
data/kinetics_700/clips/WLamjieXcvo.mp4
data/kinetics_700/clips/v9w9mQx7MN4.mp4
data/kinetics_700/clips/NJAIp24I9oQ.mp4
data/kinetics_700/clips/tNvKHre8hYI.mp4
data/kinetics_700/clips/_ubj_hjLdVc.mp4
data/kinetics_700/clips/dMKCYXzgHrE.mp4
data/kinetics_700/clips/JU1sgKMVIlg.mp4
data/kinetics_700/clips/iQuTmRkOuIo.mp4
data/kinetics_700/clips/5v86EutTWhI.mp4
data/kinetics_700/clips/hLm

In [52]:
X_train, X_val, _, _ = train_test_split(df, df['label'])
X_train.reset_index(drop=True, inplace=True)
X_val.reset_index(drop=True, inplace=True)

In [53]:
labels = X_train["label"].unique()
labels2id = {label: i for i, label in enumerate(labels)}

In [54]:
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.5, contrast_limit=0.5, p=0.5)
], additional_targets={
    f'image{i}': 'image' for i in range(1, 8)
})

In [57]:
from utils.video_processing import sample_frame_indices, read_video_pyav, apply_video_augmentations


class ActionDataset(Dataset):

    def __init__(self, meta, transform=None):
        self.meta = meta
        self.transform = transform

    def __len__(self):
        return len(self.meta)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        while True:
            try:
                file_path = self.meta['video_path'].iloc[idx]
                container = av.open(file_path)

                indices = sample_frame_indices(clip_len=8, frame_sample_rate=5,
                                               seg_len=container.streams.video[0].frames)
                video = read_video_pyav(container, indices)
                while video.shape[0] < 8:
                    video = np.vstack([video, video[-1:]])

            except Exception as e:
                print("loop Error: ", e)
                continue
            break

        if self.transform:
            transformed = apply_video_augmentations(video, self.transform)
            video = transformed

        inputs = processor(
            text=[self.meta['label'].iloc[idx]],
            videos=list(video),
            return_tensors="pt",
            padding='max_length',
            max_length=8
        )
        for i in inputs:
            inputs[i] = inputs[i][0]

        return inputs

    def validate_videos(self):
        for i, row in self.meta.iterrows():
            if not os.path.exists(row['video_path']):
                print(row['video_path'])
                self.meta.drop(i, inplace=True)
                continue

            self.__getitem__(i)
        self.meta.reset_index(drop=True, inplace=True)
        return self.meta

In [27]:
class VideoDataset(Dataset):
    def __init__(self, df, tokenizer, max_length=128, transform=None):
        self.df = df
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        video = self.load_video(row['video'])
        video = self.transform(video)
        text = row['text']
        text = self.tokenizer(text, max_length=self.max_length, padding='max_length', truncation=True,
                              return_tensors='pt')
        return video, text['input_ids'].squeeze(), text['attention_mask'].squeeze()

    def load_video(self, path):
        container = av.open(path)
        video = []
        for packet in container.demux():
            for frame in packet.decode():
                image = frame.to_image()
                image = np.array(image)
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                video.append(image)
        video = np.stack(video)
        return video

# Training

In [58]:
train_dataset = ActionDataset(meta=X_train)
train_dataloader = DataLoader(train_dataset, batch_size=31, shuffle=False)

In [59]:
epochs = 1
lr = 1e-5

optimizer = optim.AdamW(model.parameters(), lr)  # 289

In [60]:
for epoch in range(1):

    model.train()

    train_loss = []
    for i, batch in enumerate(tqdm(train_dataloader, desc=f"Epoch: {epoch}")):
        print(i)
        optimizer.zero_grad()

        batch = batch.to(device)

        outputs = model(**batch, return_loss=True)

        loss = outputs.loss
        loss.backward()
        optimizer.step()
        print(i)
        train_loss.append(loss.item())

    print('Training loss:', np.mean(train_loss))

    model.eval()

    val_targets = []
    val_preds = []
    for line in tqdm(X_val.itertuples()):

        while True:
            try:
                file_path = line.video_path
                container = av.open(file_path)
                indices = sample_frame_indices(clip_len=8, frame_sample_rate=5,
                                               seg_len=container.streams.video[0].frames)
                video = read_video_pyav(container, indices)
                while video.shape[0] < 8:
                    video = np.vstack([video, video[-1:]])
            except Exception as e:
                continue

            break

        inputs = processor(
            text=labels,
            videos=list(video),
            return_tensors="pt",
            padding=True,
        )

        inputs = inputs.to(device)

        with torch.no_grad():
            outputs = model(**inputs)

        logits_per_video = outputs.logits_per_video
        probs = logits_per_video.softmax(dim=1)

        val_targets.append(line.label_id)
        val_preds.append(probs.argmax(axis=1).cpu().numpy()[0])

    print('F1:', f1_score(val_targets, val_preds, average='macro'))

Epoch: 0:   0%|          | 0/28 [00:00<?, ?it/s]

Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
mmco: unref short failure
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_len

0


Unused or unrecognized kwargs: max_length, padding.


0


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

1


Unused or unrecognized kwargs: max_length, padding.


1


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

2


Unused or unrecognized kwargs: max_length, padding.


2


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

3


Unused or unrecognized kwargs: max_length, padding.


3


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

4


Unused or unrecognized kwargs: max_length, padding.


4


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

5


Unused or unrecognized kwargs: max_length, padding.


5


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

6


Unused or unrecognized kwargs: max_length, padding.


6


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

7


Unused or unrecognized kwargs: max_length, padding.


7


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

8


Unused or unrecognized kwargs: max_length, padding.


8


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

9


Unused or unrecognized kwargs: max_length, padding.


9


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

10


Unused or unrecognized kwargs: max_length, padding.


10


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

11


Unused or unrecognized kwargs: max_length, padding.


11


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

12
12


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

13


Unused or unrecognized kwargs: max_length, padding.


13


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

14


Unused or unrecognized kwargs: max_length, padding.


14


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

15


Unused or unrecognized kwargs: max_length, padding.


15


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

16


Unused or unrecognized kwargs: max_length, padding.


16


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

17


Unused or unrecognized kwargs: max_length, padding.


17


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

18


Unused or unrecognized kwargs: max_length, padding.


18


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

19


Unused or unrecognized kwargs: max_length, padding.


19


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

20


Unused or unrecognized kwargs: max_length, padding.


20


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

21


Unused or unrecognized kwargs: max_length, padding.


21


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

22


Unused or unrecognized kwargs: max_length, padding.


22


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

23


Unused or unrecognized kwargs: max_length, padding.


23


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

24


Unused or unrecognized kwargs: max_length, padding.


24


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

25


Unused or unrecognized kwargs: max_length, padding.


25


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

26


Unused or unrecognized kwargs: max_length, padding.


26


Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

27
27
Training loss: 2.374143532344273


0it [00:00, ?it/s]

ValueError: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).

In [70]:
labels2id[val_dataset.meta.iloc[i]['label']]

7

In [71]:
val_targets = []
val_preds = []
val_dataset = ActionDataset(meta=X_val)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False)

for i, batch in enumerate(tqdm(val_dataloader)):
    batch = batch.to(device)

    with torch.no_grad():
        outputs = model(**batch)

    logits_per_video = outputs.logits_per_video
    probs = logits_per_video.softmax(dim=1)

    val_targets.append(labels2id[val_dataset.meta.iloc[i]['label']])
    val_preds.append(probs.argmax(axis=1).cpu().numpy()[0])

print('F1:', f1_score(val_targets, val_preds, average='macro'))

  0%|          | 0/287 [00:00<?, ?it/s]

Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or unrecognized kwargs: max_length, padding.
Unused or un

F1: 0.011464968152866241


In [None]:
# empty gpu memory
import gc

gc.collect()
torch.cuda.empty_cache()
