In [1]:
import torch
from torch import nn

import torchvision
from torchvision.datasets import ImageFolder

from torchvision import transforms

from torch.utils.data import DataLoader
from pathlib import Path
import os
from torchvision.models import vgg16

In [2]:
import sys
sys.path.append("..")

In [3]:
from video_classification.datasets import FolderOfFrameFoldersDataset, FrameWindowDataset

In [4]:
device = "cpu"  # we don't need cuda for this one

In [5]:
ROOT = Path("/Users/ludovica/Documents/Insight/data")
DATA_ROOT = Path(ROOT/"frame_data")

In [6]:
DATA_ROOT

PosixPath('/Users/ludovica/Documents/Insight/data/frame_data')

In [7]:
train_transforms = transforms.Compose([
    torchvision.transforms.ColorJitter(),
    transforms.RandomHorizontalFlip(p=0.25),
    transforms.RandomVerticalFlip(p=0.25),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
])

valid_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
])

In [8]:
train_ds = FolderOfFrameFoldersDataset(DATA_ROOT/'train', 
                                       transform=train_transforms, 
                                       base_class=FrameWindowDataset,
                                       window_size=2,
                                       overlapping=True,)
valid_ds = FolderOfFrameFoldersDataset(DATA_ROOT/'validation', 
                                       transform=valid_transforms, 
                                       base_class=FrameWindowDataset,
                                       window_size=2,
                                       overlapping=True,)

In [9]:
(x_prev, x_cur), label = train_ds[3]

In [12]:
import torch.nn.functional as F

class FrameToFrameModel(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, x):
        # x is [B, T, C, H, W]
        x = x.transpose(0, 1)
        x_prev, x_cur = x
        with torch.no_grad():
            loss = F.mse_loss(x_prev, x_cur, reduction='none').mean([1, 2, 3])
        return loss

In [13]:
scorer = FrameToFrameModel()

In [15]:
from tqdm import tqdm
import numpy as np

valid_loader = DataLoader(valid_ds, batch_size=48, shuffle=False, num_workers=4)
scores = []
y_true = []
with torch.no_grad():
    for i, (x, y) in enumerate(tqdm(valid_loader)):
        batch_scores = scorer(x).tolist()
        scores.extend((s for s in batch_scores))
        y_true.extend((s for s in y))
        
scores = np.array(scores)
y_true = np.array(y_true)


  0%|          | 0/99 [00:00<?, ?it/s][A
  1%|          | 1/99 [00:05<08:49,  5.40s/it][A
  2%|▏         | 2/99 [00:06<06:24,  3.96s/it][A
  3%|▎         | 3/99 [00:06<04:43,  2.96s/it][A
  4%|▍         | 4/99 [00:07<03:36,  2.28s/it][A
  5%|▌         | 5/99 [00:08<03:15,  2.08s/it][A
  6%|▌         | 6/99 [00:09<02:30,  1.61s/it][A
  7%|▋         | 7/99 [00:10<01:58,  1.29s/it][A
  8%|▊         | 8/99 [00:10<01:37,  1.07s/it][A
  9%|▉         | 9/99 [00:11<01:41,  1.13s/it][A
 10%|█         | 10/99 [00:12<01:24,  1.06it/s][A
 11%|█         | 11/99 [00:12<01:12,  1.21it/s][A
 12%|█▏        | 12/99 [00:13<01:05,  1.34it/s][A
 13%|█▎        | 13/99 [00:15<01:26,  1.00s/it][A
 14%|█▍        | 14/99 [00:15<01:13,  1.16it/s][A
 15%|█▌        | 15/99 [00:16<01:04,  1.31it/s][A
 16%|█▌        | 16/99 [00:16<00:57,  1.44it/s][A
 17%|█▋        | 17/99 [00:18<01:19,  1.03it/s][A
 18%|█▊        | 18/99 [00:18<01:07,  1.19it/s][A
 19%|█▉        | 19/99 [00:19<01:00,  1.32it/s]

In [16]:
scores

array([0.00032781, 0.00032003, 0.00034775, ..., 0.00034765, 0.00034415,
       0.00033726])

In [56]:
import json

with open("frametoframe_mse_scores.json", 'w') as fout:
    json.dump(scores.tolist(), fout)

In [17]:
mse_mean = scores.mean()
mse_std = scores.std()

In [18]:
class AnomalyDetectionModel(nn.Module):
    def __init__(self, mean, std, alpha):
        super().__init__()
        self.mean = mean
        self.std = std
        self.alpha = alpha
        
    def forward(self, x):
        return (x > self.mean + self.alpha * self.std)

In [19]:
model = AnomalyDetectionModel(mse_mean, mse_std, 1.2)  # alpha tweaked to predict about 7% of positives

In [21]:
y_pred = model(scores)

In [22]:
y_pred[y_pred == True].shape    # Used this to tweak alpha

(436,)

In [51]:
from sklearn.metrics import classification_report
import pandas as pd
pd.DataFrame(classification_report(y_true, y_pred, output_dict=True)).T

Unnamed: 0,f1-score,precision,recall,support
0,0.906333,0.908121,0.904553,4327.0
1,0.053801,0.052752,0.054893,419.0
accuracy,0.829541,0.829541,0.829541,0.829541
macro avg,0.480067,0.480436,0.479723,4746.0
weighted avg,0.831068,0.832605,0.829541,4746.0


Aaaaand our results suck :| :|

Actually, they are not that bad: the frame to frame detector can only detect anomalies from the status quo, so it should only detect when we toggle from finding an anomaly to normal, and vice versa. Let's use this to get our real y_pred.

In [52]:
def toggle_predictions(spikes_pred):
    cur = False
    toggled = []
    for v in spikes_pred:
        if v:
            cur = not cur
        toggled.append(cur)
    return toggled

In [53]:
y_pred_toggled = toggle_predictions(y_pred)

In [54]:
from sklearn.metrics import classification_report
import pandas as pd
pd.DataFrame(classification_report(y_true, y_pred_toggled, output_dict=True)).T

Unnamed: 0,f1-score,precision,recall,support
0,0.876689,0.926183,0.832216,4327.0
1,0.206735,0.153846,0.315036,419.0
accuracy,0.786557,0.786557,0.786557,0.786557
macro avg,0.541712,0.540015,0.573626,4746.0
weighted avg,0.817542,0.857997,0.786557,4746.0


While still not amazing, we do get to a f1-score of **0.20** through this purely unsupervised method.