In [1]:
# read datasets/files.csv with column "file" and "label"
import pandas as pd

df = pd.read_csv('datasets/files.csv' , sep=',')

In [2]:
df.head( )

Unnamed: 0,file,label
0,000F5AD1-B29F-4AC9-873E-CB9FC12D457D.mp4,0
1,005CA393-E3DA-4845-A921-765A2D1091DD.mp4,0
2,009B7423-CE0C-4D81-9F7A-DB5B283C3946.mp4,0
3,00E2D549-34A4-488F-8402-F7CA255E8197.mp4,0
4,00F7BED6-DD5D-48B9-A38B-6B8A9112A4BD.mp4,0


In [3]:
df[ "file" ]

0      000F5AD1-B29F-4AC9-873E-CB9FC12D457D.mp4
1      005CA393-E3DA-4845-A921-765A2D1091DD.mp4
2      009B7423-CE0C-4D81-9F7A-DB5B283C3946.mp4
3      00E2D549-34A4-488F-8402-F7CA255E8197.mp4
4      00F7BED6-DD5D-48B9-A38B-6B8A9112A4BD.mp4
                         ...                   
397    FA56F667-F0E7-4E34-B5DA-8EF74137DBDD.mp4
398    FAD22055-50B3-4D46-8055-AD212B33CEB2.mp4
399    FC503F8E-5C19-4062-90F6-AD30F4E10B2E.mp4
400    FCCBFBCD-C00B-4145-BB3E-679957CCEB21.mp4
401    FF17EB94-B9A6-48E0-98E3-755CD3F0DD66.mp4
Name: file, Length: 402, dtype: object

In [4]:
df[ "file" ] = df[ "file" ].apply(lambda x : f'datasets/data/{x}')
df[ "label" ] = df[ "label" ].apply(lambda x : "real" if x == 1 else 'attack')

In [5]:
df.head( )

Unnamed: 0,file,label
0,datasets/data/000F5AD1-B29F-4AC9-873E-CB9FC12D...,attack
1,datasets/data/005CA393-E3DA-4845-A921-765A2D10...,attack
2,datasets/data/009B7423-CE0C-4D81-9F7A-DB5B283C...,attack
3,datasets/data/00E2D549-34A4-488F-8402-F7CA255E...,attack
4,datasets/data/00F7BED6-DD5D-48B9-A38B-6B8A9112...,attack


In [6]:
# split the dataset into train, test and validation
from sklearn.model_selection import train_test_split

train , test = train_test_split(df , test_size=0.2 , random_state=42)
train , val = train_test_split(train , test_size=0.2 , random_state=42)


In [7]:
val.head(100)

Unnamed: 0,file,label
291,datasets/data/379CF6ED-B164-4E5C-ACDB-67F5E23D...,real
179,datasets/data/517DC0E1-9BC6-499F-8AC8-2965DBDA...,attack
206,datasets/data/5D2206AE-A1F5-470C-8352-2E7302A1...,attack
234,datasets/data/6F3D521D-00AD-4F4B-AA8D-088494F9...,attack
67,datasets/data/1AF5C278-9CA6-4D71-8A99-39CA08D4...,attack
...,...,...
300,datasets/data/42A2E59D-0A31-4E55-A71B-7AD9898D...,real
139,datasets/data/3FFAB8BA-91A5-41D2-B433-D36F124B...,attack
58,datasets/data/15063135-BA45-404A-A90B-4B18E563...,attack
205,datasets/data/5C837C74-F651-47FA-A3AD-9E46A108...,attack


In [8]:
print("Training set:")
print(train[ 'label' ].value_counts( ))

print("\nTesting set:")
print(test[ 'label' ].value_counts( ))

print("\nValidation set:")
print(val[ 'label' ].value_counts( ))

Training set:
label
attack    170
real       86
Name: count, dtype: int64

Testing set:
label
attack    57
real      24
Name: count, dtype: int64

Validation set:
label
attack    41
real      24
Name: count, dtype: int64


In [9]:
class_labels = df[ 'label' ].unique( )

In [10]:
class_labels

array(['attack', 'real'], dtype=object)

In [11]:
label2id = { label : i for i , label in enumerate(class_labels) }
id2label = { i : label for label , i in label2id.items( ) }

In [12]:
from transformers import VideoMAEImageProcessor , VideoMAEForVideoClassification

model_ckpt = "MCG-NJU/videomae-base"
image_processor = VideoMAEImageProcessor.from_pretrained(model_ckpt)
model = VideoMAEForVideoClassification.from_pretrained(
        model_ckpt ,
        label2id=label2id ,
        id2label=id2label ,
        ignore_mismatched_sizes=True ,  # provide this in case you're planning to fine-tune an already fine-tuned checkpoint
)

Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:

import pytorchvideo.data



In [14]:
from pytorchvideo.transforms import (
    ApplyTransformToKey ,
    Normalize ,
    RandomShortSideScale ,
    RemoveKey ,
    ShortSideScale ,
    UniformTemporalSubsample ,
)



In [15]:
from torchvision.transforms import (
    Compose ,
    Lambda ,
    RandomCrop ,
    RandomHorizontalFlip ,
    Resize ,
)

In [16]:
mean = image_processor.image_mean
std = image_processor.image_std
if "shortest_edge" in image_processor.size :
    height = width = image_processor.size[ "shortest_edge" ]
else :
    height = image_processor.size[ "height" ]
    width = image_processor.size[ "width" ]
resize_to = (height , width)

num_frames_to_sample = model.config.num_frames
sample_rate = 4
fps = 30
clip_duration = num_frames_to_sample * sample_rate / fps

In [17]:
train_transform = Compose(
        [
                ApplyTransformToKey(
                        key="video" ,
                        transform=Compose(
                                [
                                        UniformTemporalSubsample(num_frames_to_sample) ,
                                        Lambda(lambda x : x / 255.0) ,
                                        Normalize(mean , std) ,
                                        RandomShortSideScale(min_size=256 , max_size=320) ,
                                        RandomCrop(resize_to) ,
                                        RandomHorizontalFlip(p=0.5) ,
                                ]
                        ) ,
                ) ,
        ]
)

In [19]:
dataset_root_path = "datasets/data"

In [22]:
import os

train_dataset = pytorchvideo.data.Ucf101(
        data_path="datasets/data" ,
        clip_sampler=pytorchvideo.data.make_clip_sampler("random" , clip_duration) ,
        decode_audio=False ,
        transform=train_transform ,
)

ValueError: 'class_to_index' must have at least one entry to collect any samples.

AttributeError: module 'collections' has no attribute 'Iterable'