In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import torch 
import pytorch_lightning as pl

from pytorchvideo.data import LabeledVideoDataset
from pytorchvideo.data.clip_sampling import UniformClipSampler
from pytorchvideo.transforms import ApplyTransformToKey, UniformTemporalSubsample
from torchvision.transforms import Compose, Resize

from utils.config import TRAIN_LABELS_FILE, TEST_FOLDER, TRAIN_FOLDER

In [13]:
fname_label_pairs = []

with open(TRAIN_LABELS_FILE, 'r') as f:
    for line in f:
        try:
            fname, label = line.strip().split(',')
            fname = os.path.join(TRAIN_FOLDER, fname)
            # label = {"label":int(label)}
            label = {"label": torch.tensor([int(label)], dtype=torch.int32)}
            fname_label_pairs.append(tuple((fname, label)))
        except:
            pass

In [14]:
transform = Compose([
    ApplyTransformToKey(
        key="video",
        transform=Compose([
            UniformTemporalSubsample(16),
            Resize(128)
        ])
    )
])

In [15]:
train_dataset = LabeledVideoDataset(
    fname_label_pairs,
    UniformClipSampler(5.),
    transform=transform,
    decode_audio=False,
)

In [22]:
from torch.utils.data import  DataLoader

train_loader = DataLoader(train_dataset, batch_size=len(fname_label_pairs))

In [23]:
loader = iter(train_loader)

In [24]:
batch = next(loader)

In [3]:
import pandas as pd

df = pd.read_csv(TRAIN_LABELS_FILE)

In [13]:
count = df['label'].value_counts()
count

0    27113
1    14048
Name: label, dtype: int64

In [14]:
count[0]/count[1]

1.9300256264236901

In [8]:
len(os.listdir(TEST_FOLDER))

17794

In [186]:
from sklearn.model_selection import train_test_split

In [189]:
length = len(fname_label_pairs)

In [196]:
train, val = train_test_split(fname_label_pairs, test_size=0.1, train_size=0.9)

# Training

In [1]:
import torch

from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

from models.model import FakeVideoDetector, create_resnet, PretrainedDetector
from utils.datasets import FakeVideoDataModule

Global seed set to 42


In [2]:
import torchvision.models as models

base_model = models.video.r3d_18(pretrained=True, progress=False)

In [3]:
from pytorchvideo.transforms import Div255, ShortSideScale, ApplyTransformToKey, UniformTemporalSubsample
from torchvision.transforms import Compose
from torchvideo.transforms import NormalizeVideo

side_size = 256
mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]
num_frames = 8
# frames_per_second = 30


transforms =  ApplyTransformToKey(
    key="video",
    transform=Compose(
        [
            UniformTemporalSubsample(num_frames),
            Div255(),
            NormalizeVideo(mean, std),
            ShortSideScale(
                size=side_size
            ),
        ]
    ),
)

In [4]:
# classifier = FakeVideoDetector()
classifier = PretrainedDetector(base_model, pos_weight=torch.tensor([1.93]))
datamodule = FakeVideoDataModule(num_workers=1, batch_size=4, transforms=transforms)



In [5]:
callbacks = [
            EarlyStopping(monitor="AUROC", mode="max", patience=20),
            # ModelCheckpoint(
            #     dirpath=f'checkpoints',
            #     filename='{epoch}--{AUROC:.3f}', monitor="AUROC", mode="max",
            # ),
        ]

trainer = Trainer(
    gpus=1,
    callbacks=callbacks,
    log_every_n_steps=5,
    precision=16,
    deterministic=True,
)


Using native 16bit precision.
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [6]:
trainer.fit(classifier, datamodule)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params
------------------------------------------------
0 | model     | VideoResNet       | 33.4 M
1 | metrics   | MetricCollection  | 0     
2 | loss      | BCEWithLogitsLoss | 0     
3 | out_layer | Linear            | 401   
------------------------------------------------
33.4 M    Trainable params
0         Non-trainable params
33.4 M    Total params
133.487   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
Global seed set to 42
  rank_zero_warn(


Training: -1it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
import torch

base_model = torch.hub.load("facebookresearch/pytorchvideo", model='slowfast_r50', pretrained=True)