## Import Libraries

In [1]:
%matplotlib inline

import os
import time
import copy
import pandas as pd
import numpy as np

from random import seed
from random import randint
import random
from numpy.random import uniform
from scipy.special import expit
from PIL import Image

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms, models
import torchvision

from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings("ignore")

from tqdm import tqdm

import math

from sklearn.metrics import roc_auc_score, average_precision_score

import seaborn as sns

## Visualization & RNG setup

In [2]:
sns.set()

# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
torch.backends.cudnn.deterministic = True # for deep learning CUDA library
torch.backends.cudnn.benchmark = False # for deep learning CUDA library
torch.cuda.manual_seed(manualSeed)
torch.cuda.manual_seed_all(manualSeed) # if use multi-GPU
np.random.seed(manualSeed) # for numpy-based backend, scikit-learn

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

Random Seed:  999
cuda:0


## Model

### Description

The model performs binning in that it takes in snippets. Then, it creates masks per frame to compute spatial importance. Then, perform ConvLSTM to reconstruct an 'average image' per bin, and LSTM again. We should have weights being different between the 'averages' if we have an anomaly (put that in weights) and little difference otherwise.

In [3]:
from explainable_anomaly_detection import *

In [4]:
DATA_PATH = r'D:\GSAI\JUPYTER\AI604\UBI_FIGHTS'
train_dataset = ViolenceDataset(DATA_PATH, sampling_freq=5)
valid_dataset = ViolenceDataset(DATA_PATH, sampling_freq=5)
test_dataset = ViolenceDataset(DATA_PATH, sampling_freq=5,is_test=True)

# For train/valid split, we'll replace the actual video files with even split of normal and abnormal videos
split_percent = 0.7
normal_idx = np.arange(0,len(train_dataset.normal_videos)).astype('int')
fight_idx = np.arange(0,len(train_dataset.fight_videos)).astype('int')
np.random.shuffle(normal_idx)
np.random.shuffle(fight_idx)
normal_train = normal_idx[:int(split_percent*len(train_dataset.normal_videos))]
normal_valid = normal_idx[int(split_percent*len(train_dataset.normal_videos)):]
fight_train = fight_idx[:int(split_percent*len(train_dataset.fight_videos))]
fight_valid = fight_idx[int(split_percent*len(train_dataset.fight_videos)):]

train_dataset.normal_videos = [train_dataset.normal_videos[t] for t in normal_train]
train_dataset.normal_labels = [train_dataset.normal_labels[t] for t in normal_train]
train_dataset.fight_videos = [train_dataset.fight_videos[t] for t in fight_train]
train_dataset.fight_labels = [train_dataset.fight_labels[t] for t in fight_train]

valid_dataset.normal_videos = [valid_dataset.normal_videos[t] for t in normal_valid]
valid_dataset.normal_labels = [valid_dataset.normal_labels[t] for t in normal_valid]
valid_dataset.fight_videos = [valid_dataset.fight_videos[t] for t in fight_valid]
valid_dataset.fight_labels = [valid_dataset.fight_labels[t] for t in fight_valid]

train_dataset.actual_videos = train_dataset.fight_videos+train_dataset.normal_videos
train_dataset.actual_labels = train_dataset.fight_labels+train_dataset.normal_labels
valid_dataset.actual_videos = valid_dataset.fight_videos+valid_dataset.normal_videos
valid_dataset.actual_labels = valid_dataset.fight_labels+valid_dataset.normal_labels

In [5]:
## Train & check performance
n_epoch = 10
valid_iter = 2
lr = 1e-4
batch_size = 1

In [6]:
SpatialBlockAbnormal = ConvSpaceMask().to(device)
SpatialBlockNormal = ConvSpaceMask().to(device)
TemporalBlockAbnormal = LSTMTimeMask().to(device)
TemporalBlockNormal = LSTMTimeMask().to(device)

net = CBAMVideoNetwork(SpatialBlockType=2, TemporalBlockType=2,kernel_size=3,n_channel=3).to(device)
loss_fn = AttentionMatchLoss()
optimizer = optim.Adam(net.parameters(), lr=lr)

In [7]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size,shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size,shuffle=False)

In [8]:
best_valid_loss = 1e10
train_loss_list = []
valid_loss_list = []
test_loss_list = []
for n in range(1,n_epoch+1):
    print(f'Epoch {n}:')
    train_loss = 0
    valid_loss = 0
    test_loss = 0
    net.train()
    print('Training...')
    for video,label in tqdm(iter(train_dataloader)):
        video = video.to(device)
        label = label.to(device)
        abnormal_score, normal_score, temporal_attention_abnormal, temporal_attention_normal = net(video.to(device))
        loss = loss_fn(label, abnormal_score,normal_score,temporal_attention_abnormal,temporal_attention_normal)
        train_loss += loss.cpu().detach().item()
        loss.backward()
        optimizer.step()
        
        del video, label, abnormal_score, normal_score, temporal_attention_abnormal, temporal_attention_normal, loss
    
    net.eval()
    if n % valid_iter == 0:
        print('Validation...')
        for video,label in tqdm(iter(valid_dataloader)):
            video = video.to(device)
            label = label.to(device)
            abnormal_score, normal_score, temporal_attention_abnormal, temporal_attention_normal = net(video)
            loss = loss_fn(label, abnormal_score,normal_score,temporal_attention_abnormal,temporal_attention_normal)
            valid_loss += loss.cpu().detach().item()

            del video, label, abnormal_score, normal_score, temporal_attention_abnormal, temporal_attention_normal, loss    

    print('Testing...')
    for video,label in tqdm(iter(test_dataloader)):
        video = video.to(device)
        label = label.to(device)
        abnormal_score, normal_score, temporal_attention_abnormal, temporal_attention_normal = net(video.to(device))
        loss = loss_fn(label, abnormal_score,normal_score,temporal_attention_abnormal,temporal_attention_normal)
        test_loss += loss.cpu().detach().item()
        
        del video, label, abnormal_score, normal_score, temporal_attention_abnormal, temporal_attention_normal, loss
    
    if valid_loss < best_valid_loss:
        torch.save(f'net_epoch_{n}.pt',net.state_dict())
    
    train_loss_list.append(train_loss)
    valid_loss_list.append(valid_loss)
    test_loss_list.append(test_loss)
    

Epoch 1:
Training...


  1%|          | 4/653 [00:18<48:47,  4.51s/it]  


RuntimeError: CUDA out of memory. Tried to allocate 528.00 MiB (GPU 0; 8.00 GiB total capacity; 5.50 GiB already allocated; 122.50 MiB free; 5.51 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF