In [2]:
!pip install pytorchvideo

Collecting pytorchvideo
  Downloading pytorchvideo-0.1.5.tar.gz (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.7/132.7 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting fvcore (from pytorchvideo)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting av (from pytorchvideo)
  Downloading av-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting parameterized (from pytorchvideo)
  Downloading parameterized-0.9.0-py2.py3-none-any.whl (20 kB)
Collecting iopath (from pytorchvideo)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━

In [14]:
import sys
sys.path.append('pytorchvideo')
from pytorchvideo.data import LabeledVideoDataset


In [15]:
from glob import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



In [16]:
nvlc = glob("/content/drive/MyDrive/Bonuxon/Data/Train/TV_Content/*")
vlc = glob("/content/drive/MyDrive/Bonuxon/Data/Train/Advertisement/*")

label = [0]*len(nvlc)+[1]*len(vlc)

df = pd.DataFrame(zip(nvlc+vlc, label), columns = ['file', 'label'])
print('TV Content ', len(nvlc))
print('Advert ', len(vlc))
df.head()




TV Content  12
Advert  12


Unnamed: 0,file,label
0,/content/drive/MyDrive/Bonuxon/Data/Train/TV_C...,0
1,/content/drive/MyDrive/Bonuxon/Data/Train/TV_C...,0
2,/content/drive/MyDrive/Bonuxon/Data/Train/TV_C...,0
3,/content/drive/MyDrive/Bonuxon/Data/Train/TV_C...,0
4,/content/drive/MyDrive/Bonuxon/Data/Train/TV_C...,0


In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, shuffle = True)
len(train_df), len(val_df)

(19, 5)

In [19]:
train_df.head()

Unnamed: 0,file,label
4,/content/drive/MyDrive/Bonuxon/Data/Train/TV_C...,0
6,/content/drive/MyDrive/Bonuxon/Data/Train/TV_C...,0
15,/content/drive/MyDrive/Bonuxon/Data/Train/Adve...,1
11,/content/drive/MyDrive/Bonuxon/Data/Train/TV_C...,0
10,/content/drive/MyDrive/Bonuxon/Data/Train/TV_C...,0


Augmentation


In [20]:
from pytorchvideo.data import LabeledVideoDataset, make_clip_sampler, labeled_video_dataset

from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,

    UniformTemporalSubsample,
    Permute
)

from  torchvision.transforms import (
    Compose,
    Lambda,
    RandomCrop,
    RandomHorizontalFlip,
    Resize

)

from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo
)

In [21]:
video_transform = Compose([
    ApplyTransformToKey(key='video',
    transform=Compose([
        UniformTemporalSubsample(20),
        Lambda(lambda x:x/255),
        Normalize((0.45,0.45,0.45),(0.225,0.225,0.225)),
        RandomShortSideScale(min_size=248, max_size=256),
        CenterCropVideo(224),
        RandomHorizontalFlip(p=0.5)
    ])
    )
])

In [35]:
from torch.utils.data import DataLoader

train_df.to_csv('/content/drive/MyDrive/Bonuxon/Data/Train/train.csv', index=False)

train_dataset = labeled_video_dataset( data_path="/content/drive/MyDrive/Bonuxon/Data/Train/train.csv",
                                      clip_sampler=make_clip_sampler('random',2),
                                      transform=video_transform,decode_audio=False # fro the smart tv project it will be true

                                      )

loader = DataLoader(train_dataset, batch_size= 5, num_workers = 0, pin_memory= True)


In [37]:
batch = next(iter(loader))

RuntimeError: ignored

In [38]:
batch.keys()

dict_keys(['video', 'video_name', 'video_index', 'clip_index', 'aug_index', 'label'])

In [39]:
batch['video'].shape, batch['label'].shape

(torch.Size([5, 3, 20, 224, 224]), torch.Size([5]))

Model Architecture

In [40]:
!pip install pytorch_lightning



In [41]:
import torch.nn as nn
import torch
from pytorch_lightning import LightningModule, seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.metrics import classification_report
import torchmetrics


In [42]:
class OurModel(LightningModule):
    def __init__(self):
        super(OurModel, self).__init__()
        # model architecture
        self.video_model = torch.hub.load('facebookresearch/pytorchvideo', 'efficient_x3d_xs', pretrained=True)
        self.relu = nn.ReLU()
        self.linear = nn.Linear(400, 1)

        self.lr = 0.001
        self.batch_size = 4
        self.numworker = 4

        # evaluation metric
        self.metric = torchmetrics.Accuracy(task="binary")
        # loss function
        self.criterion = nn.BCEWithLogitsLoss()

    def forward(self, x):
        x = self.video_model(x)
        x = self.relu(x)
        x = self.linear(x)
        return x

    def configure_optimizers(self):
        opt = torch.optim.AdamW(params=self.parameters(), lr=self.lr)
        scheduler = CosineAnnealingLR(opt, T_max=10, eta_min=1e-6, last_epoch=-1)
        return {'optimizer': opt, 'lr_scheduler': scheduler}

    def train_dataloader(self):
        dataset = labeled_video_dataset(
            train_df,
            clip_sampler=make_clip_sampler('random', 2),
            transform=video_transform,
            decode_audio=False
        )

        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworker, pin_memory=True)
        return loader

    def training_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self(video)
        loss = self.criterion(out, label)
        metric = self.metric(out, label.to(torch.int64))
        return {'loss': loss, 'metric': metric.detach()}

    def train_epoch_end(self, outputs):
        loss = torch.stack([x['loss'] for x in outputs]).mean().cpu().numpy().round(2)
        metric = torch.stack([x['metric'] for x in outputs]).mean().cpu().numpy().round(2)
        self.log('training_loss', loss)
        self.log('training_metric', metric)

    def val_dataloader(self):
        dataset = labeled_video_dataset(
            val_df,
            clip_sampler=make_clip_sampler('random', 2),
            transform=video_transform,
            decode_audio=False
        )

        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworker, pin_memory=True)
        return loader

    def validation_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self(video)
        loss = self.criterion(out, label)
        metric = self.metric(out, label.to(torch.int64))
        return {'loss': loss, 'metric': metric.detach()}

    def on_validation_epoch_end(self):
        loss = torch.stack([x['loss'] for x in self.validation_epoch_outputs]).mean().cpu().numpy().round(2)
        metric = torch.stack([x['metric'] for x in self.validation_epoch_outputs]).mean().cpu().numpy().round(2)
        self.log('validation_loss', loss)
        self.log('validation_metric', metric)

    def test_dataloader(self):
        dataset = labeled_video_dataset(
            val_df,
            clip_sampler=make_clip_sampler('random', 2),
            transform=video_transform,
            decode_audio=False
        )

        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworker, pin_memory=True)
        return loader

    def test_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self(video)
        return {'label': label.detach(), 'pred': out.detach()}

    def test_epoch_end(self, outputs):
        label = torch.cat([x['label'] for x in outputs]).cpu().numpy()
        pred = torch.cat([x['pred'] for x in outputs]).cpu().numpy()
        pred = np.where(pred > 0.5, 1, 0)
        print(classification_report(label, pred))


In [43]:
chekpoint_callback = ModelCheckpoint(
    monitor = 'val_loss',
    dirpath = 'chekpoints',
    filename = 'file',
    save_last = True
)
lr_monitor = LearningRateMonitor(logging_interval='epoch')

In [44]:
andoza = OurModel()

Using cache found in /root/.cache/torch/hub/facebookresearch_pytorchvideo_main


In [45]:
seed_everything(0)

INFO:lightning_fabric.utilities.seed:Seed set to 0


0

In [46]:
trainer = Trainer(max_epochs = 15,
                accelerator = 'gpu',
                devices = -1,
                precision = 16,
                accumulate_grad_batches = 2,
                enable_progress_bar = False,
                num_sanity_val_steps = 0,
                callbacks = [lr_monitor, chekpoint_callback],
            #    limit_train_batches = 5,
             #   limit_val_batches = 1
                )

/usr/local/lib/python3.10/dist-packages/lightning_fabric/connector.py:558: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [47]:
trainer.fit(andoza)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name        | Type              | Params
--------------------------------------------------
0 | video_model | EfficientX3d      | 3.8 M 
1 | relu        | ReLU              | 0     
2 | linear      | Linear            | 401   
3 | metric      | BinaryAccuracy    | 0     
4 | criterion   | BCEWithLogitsLoss | 0     
--------------------------------------------------
3.8 M     Trainable params
0         Non-trainable params
3.8 M     Total params
15.179    Total estimated model params size (MB)


TypeError: ignored