In [72]:
import sys
from pytorchvideo.data import LabeledVideoDataset
from glob import glob
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
sys.path.append('pytorchvideo')

In [73]:
non = glob('NonViolence/*')
vio = glob('Violence/*')
label=[0]*len(non)+[1]*len(vio)

In [77]:
df=pd.DataFrame(zip(vio+non, label), columns=['file', 'label'])
print(df)
print(len(non))
print(len(vio))

In [80]:
df.head()

Unnamed: 0,file,labels
0,NonViolence/NV_543.mp4,0
1,NonViolence/NV_557.mp4,0
2,NonViolence/NV_580.mp4,0
3,NonViolence/NV_594.mp4,0
4,NonViolence/NV_769.mp4,0


In [130]:
from sklearn.model_selection import train_test_split
train_df,val_df = train_test_split(df,test_size=0.2,shuffle = True)

In [131]:
len(train_df), len(val_df)

(1211, 303)

#augmentation process

In [106]:
from pytorchvideo.data import LabeledVideoDataset,make_clip_sampler,labeled_video_dataset

from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,
    UniformTemporalSubsample,
    Permute
)

In [107]:
from torchvision.transforms import (
    Compose,
    #Lambda,
    RandomCrop,
    RandomHorizontalFlip,
    Resize
)

In [108]:
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo
)

In [109]:
video_transform=Compose([
    ApplyTransformToKey(key='video',
    transform = Compose([
        UniformTemporalSubsample(20),
        #Lambda(lambda x:x/255),
        Normalize((0.45, 0.45, 0.45),(0.225, 0.225, 0.225)),
        RandomShortSideScale(min_size=248, max_size=256),
        CenterCropVideo(224),
        RandomHorizontalFlip(p=0.5)
    ]),
    ),
])

In [110]:
print(train_df[0:5])

                        file  labels
1212      Violence/V_204.mp4       1
641   NonViolence/NV_400.mp4       0
494   NonViolence/NV_671.mp4       0
47    NonViolence/NV_542.mp4       0
971       Violence/V_754.mp4       1


In [111]:
import os

notebook_path = os.path.abspath("__file__")
print("Notebook path:", notebook_path)

Notebook path: /Users/zaarr/Desktop/POSTDOC HBKU/CODE/4. Video/__file__


In [112]:
import pandas as pd

train_df.to_csv('/Users/zaarr/Desktop/POSTDOC HBKU/CODE/4. Video/train1_data.csv', index=False)
val_df.to_csv('/Users/zaarr/Desktop/POSTDOC HBKU/CODE/4. Video/val1_data.csv', index=False)


In [113]:
from torch.utils.data import DataLoader

# Call the labeled_video_dataset method with the file path
train_dataset = labeled_video_dataset('/Users/zaarr/Desktop/POSTDOC HBKU/CODE/4. Video/train_data', clip_sampler=make_clip_sampler('random', 2),
                                      transform=video_transform, decode_audio=False)
loader1=DataLoader(train_dataset,batch_size=5,num_workers=0,pin_memory=False)
#uniform sampler takes 2 sec frames in the whole video sequentially

In [114]:
print(train_dataset)
print(loader1)

<pytorchvideo.data.labeled_video_dataset.LabeledVideoDataset object at 0x7fdff301aee0>
<torch.utils.data.dataloader.DataLoader object at 0x7fdff34aff40>


In [115]:
test_dataset = labeled_video_dataset('/Users/zaarr/Desktop/POSTDOC HBKU/CODE/4. Video/test_data', clip_sampler=make_clip_sampler('random', 2),
                                      transform=video_transform, decode_audio=False)
loader2=DataLoader(test_dataset,batch_size=5,num_workers=0,pin_memory=False)

In [116]:
print(test_dataset)
print(loader2)

<pytorchvideo.data.labeled_video_dataset.LabeledVideoDataset object at 0x7fdfe743a340>
<torch.utils.data.dataloader.DataLoader object at 0x7fdfd3fb1700>


In [117]:
import torch.nn as nn
import torch
import torch.utils.data
from pytorch_lightning import LightningModule, seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
from torch.optim import lr_scheduler 
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.metrics import classification_report
import torchmetrics
import torch.optim as optim
import pytorch_lightning as pl

In [118]:
video_model=torch.hub.load('facebookresearch/pytorchvideo', 'efficient_x3d_xs', pretrained=True)

Using cache found in /Users/zaarr/.cache/torch/hub/facebookresearch_pytorchvideo_main


In [119]:
video_model

EfficientX3d(
  (s1): Sequential(
    (pathway0_stem_conv_xy): Conv3dTemporalKernel1BnAct(
      (kernel): Sequential(
        (conv): Conv3d(3, 24, kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), bias=False)
        (act): Identity(
          (act): Identity()
        )
      )
    )
    (pathway0_stem_conv): Conv3d5x1x1BnAct(
      (kernel): Sequential(
        (conv): Conv3d(24, 24, kernel_size=(5, 1, 1), stride=(1, 1, 1), padding=(2, 0, 0), groups=24, bias=False)
        (bn): BatchNorm3d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): ReLU(
          (act): ReLU(inplace=True)
        )
      )
    )
  )
  (s2): Sequential(
    (pathway0_res0): X3dBottleneckBlock(
      (_residual_add_func): FloatFunctional(
        (activation_post_process): Identity()
      )
      (_res_proj): Conv3dTemporalKernel1BnAct(
        (kernel): Sequential(
          (conv): Conv3d(24, 24, kernel_size=(1, 1, 1), stride=(1, 2, 2), bias=False)
          (bn)

In [141]:
class OurModel(pl.LightningModule):
    def __init__ (self):
        super(OurModel, self).__init__()
        #model architecture
        self.video_model=torch.hub.load('facebookresearch/pytorchvideo', 'efficient_x3d_xs', pretrained=True)
        self.relu=nn.ReLU()
        self.linear=nn.Linear(400,1)
        self.lr=1e-3
        self.batch_size=4
        self.numworker=4
        #6,4,8
        #evaluation metric
        self.metric=torchmetrics.Accuracy(task='binary')
        #loss function
        self.criterion=nn.BCEWithLogitsLoss()
    
    def forward(self, x, target):
        x=self.video_model(x)
        x=self.relu(x)
        x=self.linear(x)
        return x
    
    def training_step(self, batch, batch_idx):
        video,label=batch['video'], batch['label']
        out=self.forward(video)
        loss=self.criterion(out)
        metric=self.metric(out,label.to(torch.int64))
        #out=self(video)
        return {'loss':loss, 'metric':metric.detach()}
    
    def train_dataloader(self):

        dataset=labeled_video_dataset('/Users/zaarr/Desktop/POSTDOC HBKU/CODE/4. Video/train_data',clip_sampler=make_clip_sampler('random', 2),
                                    transform=video_transform, decode_audio=False)
        loader=DataLoader(dataset,batch_size=self.batch_size,num_workers=0,pin_memory=True)
        return loader
    
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr)
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6, last_epoch=-1)
        return [optimizer], [lr_scheduler]
    

    
    def on_train_epoch_end(self, outputs):
        loss=torch.stack([x['loss'] for x in outputs]).mean().cpu().numpy().round(2)
        #stack them, mean and CPU and numpy
        metric=torch.stack([x['metric'] for x in outputs]).mean().cpu().numpy().round(2)
        self.log('training_loss', loss)
        self.log('training_metric', metric)

    def val_dataloader(self):
        dataset=LabeledVideoDataset('/Users/zaarr/Desktop/POSTDOC HBKU/CODE/4. Video/test_data',clip_sampler=make_clip_sampler('random', 2),
                                    transform=video_transform, decode_audio=False)
        loader=DataLoader(dataset,batch_size=self.batch_size,num_workers=0,pin_memory=True)
        return loader
    
    def validation_step(self,batch,batch_idx):
        video,label=batch['video'], batch['label']
        out=self.forward(video)
        loss=self.criterion(out)
        metric=self.metric(out,label.to(torch.int64))
        #out=self(video)
        return {'loss':loss, 'metric':metric.detach()}
    
    def on_validation_epoch_end(self,outputs):
        loss=torch.stack([x['loss'] for x in outputs]).mean().cpu().numpy().round(2)
        #stack them, mean and CPU and numpy
        metric=torch.stack([x['metric'] for x in outputs]).mean().cpu().numpy().round(2)
        self.log('val_loss', loss)
        self.log('val_metric', metric)
        
    def test_dataloader(self):
        dataset=LabeledVideoDataset('/Users/zaarr/Desktop/POSTDOC HBKU/CODE/4. Video/test_data.csv',clip_sampler=make_clip_sampler('random', 2),
                                    transform=video_transform, decode_audio=False)
        loader=DataLoader(dataset,batch_size=self.batch_size,num_workers=self.numworker,pin_memory=True)
        return loader
    
    def test_step(self,batch,batch_idx):
        video,label=batch['video'], batch['label']
        out=self(video)
        #loss=self.criterion(out)
       # metric=self.metric(out,label.to(torch.int64))
        #out=self(video)
        return {'label':label, 'pred':out.detach()}
    
    def on_test_epoch_end(self,outputs):
        label=torch.cat(x['label'] for x in outputs).cpu().numpy()
        pred=torch.cat(x['pred'] for x in outputs).cpu().numpy()
        pred=np.where(pred>0.5,1,0)
        print(classfication_report(label,pred))

       # loss=torch.stack([x['loss'] for x in outputs]).mean().cpu().numpy().round(2)
        #stack them, mean and CPU and numpy
       # metric=torch.stack([x['metric'] for x in outputs]).mean().cpu().numpy().round(2)
       # self.log('test_loss', loss)
       # self.log('test_metric', metric)


In [142]:
m=OurModel()

Using cache found in /Users/zaarr/.cache/torch/hub/facebookresearch_pytorchvideo_main


In [143]:
m.train_dataloader()


<torch.utils.data.dataloader.DataLoader at 0x7fdf17f31dc0>

In [144]:
m.test_dataloader()

<torch.utils.data.dataloader.DataLoader at 0x7fdff66ae0a0>

In [145]:
m.val_dataloader()

<torch.utils.data.dataloader.DataLoader at 0x7fdff66ae5e0>

In [146]:
checkpoint_callback = ModelCheckpoint(monitor='val_loss', dirpath='checkpoints',
                                     filename='file',save_last=True)
lr_monitor = LearningRateMonitor(logging_interval='epoch')

In [147]:
#Total 10 epochs, at 5 model is improving, at 7 model is interrupted, 
#if we want to resume at epoch 7

#If save_last = True means the model will resume at 5

In [148]:
model=OurModel()
seed_everything(0)
trainer = Trainer(max_epochs=1,
                 accelerator='cpu', devices=1,
                 precision=16,
                 accumulate_grad_batches=2,
                 enable_progress_bar = False,
                 num_sanity_val_steps=0,
                 callbacks=[lr_monitor,checkpoint_callback],
                 limit_train_batches=5,
                 limit_val_batches=1,)
                 

Using cache found in /Users/zaarr/.cache/torch/hub/facebookresearch_pytorchvideo_main
Global seed set to 0
Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_val_batches=1)` was configured so 1 batch will be used.


In [None]:
trainer.fit(model)