In [1]:
from torch.optim import AdamW
from video_transformers import VideoModel
from video_transformers.backbones.transformers import TransformersBackbone
from video_transformers.data import VideoDataModule
from video_transformers.heads import LinearHead
from video_transformers.trainer import trainer_factory
from video_transformers.utils.file import download_ucf6

backbone = TransformersBackbone("facebook/timesformer-base-finetuned-k400", num_unfrozen_stages=1)

download_ucf6("./")
datamodule = VideoDataModule(
    train_root="ucf6/train",
    val_root="ucf6/val",
    batch_size=4,
    num_workers=4,
    num_timesteps=8,
    preprocess_input_size=224,
    preprocess_clip_duration=1,
    preprocess_means=backbone.mean,
    preprocess_stds=backbone.std,
    preprocess_min_short_side=256,
    preprocess_max_short_side=320,
    preprocess_horizontal_flip_p=0.5,
)

head = LinearHead(hidden_size=backbone.num_features, num_classes=datamodule.num_classes)
model = VideoModel(backbone, head)

optimizer = AdamW(model.parameters(), lr=1e-4)

Trainer = trainer_factory("single_label_classification")
trainer = Trainer(datamodule, model, optimizer=optimizer, max_epochs=8)

trainer.fit()


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at facebook/timesformer-base-finetuned-k400 were not used when initializing TimesformerModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing TimesformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TimesformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


ucf6.zip already exists. Skipping download.
Trainable parameteres: 10048518
Total parameteres: 121263366


Epoch 0 (Done) : 100%|██████████| 180/180 [01:48<00:00,  1.67 batch/s, loss=1.6964, val/f1=0.255, train/f1=0.200]
Epoch 1 (Done) : 100%|██████████| 180/180 [01:51<00:00,  1.61 batch/s, loss=0.2707, val/f1=0.925, train/f1=0.900]
Epoch 2 (Done) : 100%|██████████| 180/180 [01:55<00:00,  1.56 batch/s, loss=0.2008, val/f1=0.946, train/f1=0.996]
Epoch 3 (Done) : 100%|██████████| 180/180 [01:52<00:00,  1.60 batch/s, loss=0.1911, val/f1=0.950, train/f1=0.996]
Epoch 4 (Done) : 100%|██████████| 180/180 [01:47<00:00,  1.68 batch/s, loss=0.1768, val/f1=0.954, train/f1=0.998]
Epoch 5 (Done) : 100%|██████████| 180/180 [01:50<00:00,  1.63 batch/s, loss=0.1663, val/f1=0.945, train/f1=0.998]
Epoch 6 (Done) : 100%|██████████| 180/180 [01:52<00:00,  1.60 batch/s, loss=0.1906, val/f1=0.944, train/f1=0.998]
Epoch 7 (Done) : 100%|██████████| 180/180 [01:52<00:00,  1.60 batch/s, loss=0.1912, val/f1=0.950, train/f1=1.000]


In [None]:
from torch.optim import AdamW
from torch.nn import BCEWithLogitsLoss
from video_transformers import VideoModel
from video_transformers.backbones.transformers import TransformersBackbone
from video_transformers.data import VideoDataModule
from video_transformers.heads import MultiLabelLinearHead
from video_transformers.trainer import trainer_factory
from video_transformers.utils.file import download_ucf6

backbone = TransformersBackbone("facebook/timesformer-base-finetuned-k400", num_unfrozen_stages=1)

download_ucf6("./")
datamodule = VideoDataModule(
    train_root="ucf6/train",
    val_root="ucf6/val",
    batch_size=4,
    num_workers=4,
    num_timesteps=8,
    preprocess_input_size=224,
    preprocess_clip_duration=1,
    preprocess_means=backbone.mean,
    preprocess_stds=backbone.std,
    preprocess_min_short_side=256,
    preprocess_max_short_side=320,
    preprocess_horizontal_flip_p=0.5,
)

# Assume MultiLabelLinearHead exists and is appropriate for multi-label tasks
head = MultiLabelLinearHead(hidden_size=backbone.num_features, num_classes=datamodule.num_classes)
model = VideoModel(backbone, head)

optimizer = AdamW(model.parameters(), lr=1e-4)

# Set up the loss function for multi-label classification
loss_function = BCEWithLogitsLoss()

# Create the trainer for multi-label classification
Trainer = trainer_factory("multi_label_classification")
trainer = Trainer(datamodule, model, optimizer=optimizer, loss_function=loss_function, max_epochs=8)

trainer.fit()

In [4]:
from typing import Any, List

import evaluate

class Combine:
    # place holder for evaluate.combine till https://github.com/huggingface/evaluate/issues/234 fixed
    def __init__(self, metrics: List[str]):
        self.metrics = [evaluate.load(metric) if isinstance(metric, str) else metric for metric in metrics]
        print(self.metrics)

    def add_batch(self, predictions: Any, references: Any):
        for metric in self.metrics:
            metric.add_batch(predictions=predictions, references=references)

    def compute(self, **kwargs):
        results = {}
        zero_division = kwargs.get("zero_division", "warn")
        kwargs.pop("zero_division")
        for metric in self.metrics:
            if metric.name == "precision":
                results.update(metric.compute(zero_division=zero_division, **kwargs))
            else:
                results.update(metric.compute(**kwargs))
        return results

In [7]:
loaded_metrics = Combine([evaluate.load("f1", "multilabel"), "precision", "recall"])
print(loaded_metrics)

[EvaluationModule(name: "f1", module_type: "metric", features: {'predictions': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None), 'references': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None)}, usage: """
Args:
    predictions (`list` of `int`): Predicted labels.
    references (`list` of `int`): Ground truth labels.
    labels (`list` of `int`): The set of labels to include when `average` is not set to `'binary'`, and the order of the labels if `average` is `None`. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class. Labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in `predictions` and `references` are used in sorted order. Defaults to None.
    pos_label (`int`): The class to be considered the positive class, in the case where `average` is set to `binary`. Defaults to 1.
 

In [9]:
?evaluate.load

Object `evaluate.load()` not found.
