# PRESETS

In [1]:
import numpy as np
import pickle
import random
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import Tensor, nn
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchsummary import summary
import warnings
import os
import sys
from tqdm import tqdm
import cv2
import re
from collections import defaultdict

from sklearn import metrics
import argparse
import subprocess
from typing import Callable
from typing import Dict, List, Optional, Tuple, Union


os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "7"
###
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
### test

device(type='cuda')

## PATH

In [2]:
path_videos = "D:\\Anomaly-Detection-Dataset"
path_features = "D:\\testtemp"
path_model_c3d = "D:\\ekosman\\pretrained\\c3d.pickle"
path_model_r3d101 = "D:\\ekosman\\pretrained\\r3d101_KM_200ep.pth"
path_model_MIL = ""


## PARAMETERS

In [3]:
RESOLUTION = 112
block_frame = 16
frame_interval = 1

# MODELS

## R3D

In [4]:
""""R3D definition"""

from functools import partial

import torch
import torch.nn.functional as F
from torch import nn



def get_inplanes():
    return [64, 128, 256, 512]


def conv3x3x3(in_planes, out_planes, stride=1):
    return nn.Conv3d(
        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
    )


def conv1x1x1(in_planes, out_planes, stride=1):
    return nn.Conv3d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, downsample=None):
        super().__init__()

        self.conv1 = conv3x3x3(in_planes, planes, stride)
        self.bn1 = nn.BatchNorm3d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3x3(planes, planes)
        self.bn2 = nn.BatchNorm3d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1, downsample=None):
        super().__init__()

        self.conv1 = conv1x1x1(in_planes, planes)
        self.bn1 = nn.BatchNorm3d(planes)
        self.conv2 = conv3x3x3(planes, planes, stride)
        self.bn2 = nn.BatchNorm3d(planes)
        self.conv3 = conv1x1x1(planes, planes * self.expansion)
        self.bn3 = nn.BatchNorm3d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):
    def __init__(
        self,
        block,
        layers,
        block_inplanes,
        n_input_channels=3,
        conv1_t_size=7,
        conv1_t_stride=1,
        no_max_pool=False,
        shortcut_type="B",
        widen_factor=1.0,
        n_classes=1039,
    ):
        super().__init__()

        block_inplanes = [int(x * widen_factor) for x in block_inplanes]

        self.in_planes = block_inplanes[0]
        self.no_max_pool = no_max_pool

        self.conv1 = nn.Conv3d(
            n_input_channels,
            self.in_planes,
            kernel_size=(conv1_t_size, 7, 7),
            stride=(conv1_t_stride, 2, 2),
            padding=(conv1_t_size // 2, 3, 3),
            bias=False,
        )
        self.bn1 = nn.BatchNorm3d(self.in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(
            block, block_inplanes[0], layers[0], shortcut_type
        )
        self.layer2 = self._make_layer(
            block, block_inplanes[1], layers[1], shortcut_type, stride=2
        )
        self.layer3 = self._make_layer(
            block, block_inplanes[2], layers[2], shortcut_type, stride=2
        )
        self.layer4 = self._make_layer(
            block, block_inplanes[3], layers[3], shortcut_type, stride=2
        )

        self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))
        self.fc = nn.Linear(block_inplanes[3] * block.expansion, n_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            elif isinstance(m, nn.BatchNorm3d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _downsample_basic_block(self, x, planes, stride):
        out = F.avg_pool3d(x, kernel_size=1, stride=stride)
        zero_pads = torch.zeros(
            out.size(0), planes - out.size(1), out.size(2), out.size(3), out.size(4)
        )
        if isinstance(out.data, torch.cuda.FloatTensor):
            zero_pads = zero_pads.cuda()

        out = torch.cat([out.data, zero_pads], dim=1)

        return out

    def _make_layer(self, block, planes, blocks, shortcut_type, stride=1):
        downsample = None
        if stride != 1 or self.in_planes != planes * block.expansion:
            if shortcut_type == "A":
                downsample = partial(
                    self._downsample_basic_block,
                    planes=planes * block.expansion,
                    stride=stride,
                )
            else:
                downsample = nn.Sequential(
                    conv1x1x1(self.in_planes, planes * block.expansion, stride),
                    nn.BatchNorm3d(planes * block.expansion),
                )

        layers = []
        layers.append(
            block(
                in_planes=self.in_planes,
                planes=planes,
                stride=stride,
                downsample=downsample,
            )
        )
        self.in_planes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_planes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        if not self.no_max_pool:
            x = self.maxpool(x)

        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)

        x4 = self.avgpool(x4)

        feature = x4.view(x.size(0), -1)    ##?
        output = self.fc(feature)

        # return feature
        return x2
        # output with return x2 -> r3d: model output shape: torch.Size([1, 512, 4, 30, 40])


def generate_model(model_depth, **kwargs):
    assert model_depth in [10, 18, 34, 50, 101, 152, 200]

    if model_depth == 10:
        model = ResNet(BasicBlock, [1, 1, 1, 1], get_inplanes(), **kwargs)
    elif model_depth == 18:
        model = ResNet(BasicBlock, [2, 2, 2, 2], get_inplanes(), **kwargs)
    elif model_depth == 34:
        model = ResNet(BasicBlock, [3, 4, 6, 3], get_inplanes(), **kwargs)
    elif model_depth == 50:
        model = ResNet(Bottleneck, [3, 4, 6, 3], get_inplanes(), **kwargs)
    elif model_depth == 101:
        model = ResNet(Bottleneck, [3, 4, 23, 3], get_inplanes(), **kwargs)
    elif model_depth == 152:
        model = ResNet(Bottleneck, [3, 8, 36, 3], get_inplanes(), **kwargs)
    elif model_depth == 200:
        model = ResNet(Bottleneck, [3, 24, 36, 3], get_inplanes(), **kwargs)

    return model



## C3D

In [5]:
class C3D(nn.Module):
    """
    The C3D network as described in [1].
    """

    def __init__(self):
        super(C3D, self).__init__()

        self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1))

        self.fc6 = nn.Linear(8192, 4096)
        self.fc7 = nn.Linear(4096, 4096)
        self.fc8 = nn.Linear(4096, 487)

        self.dropout = nn.Dropout(p=0.5)

        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()

    def forward(self, x):

        h = self.relu(self.conv1(x))
        h = self.pool1(h)

        h = self.relu(self.conv2(h))
        h = self.pool2(h)

        h = self.relu(self.conv3a(h))
        h = self.relu(self.conv3b(h))
        h = self.pool3(h)

        h = self.relu(self.conv4a(h))
        h = self.relu(self.conv4b(h))
        h = self.pool4(h)

        h = self.relu(self.conv5a(h))
        h = self.relu(self.conv5b(h))
        h = self.pool5(h)

        h = h.view(-1, 8192)
        feature = self.relu(self.fc6(h))
        h = self.dropout(feature)
        h = self.relu(self.fc7(h))
        h = self.dropout(h)

        logits = self.fc8(h)
        probs = self.softmax(logits)

        return probs, feature



## MIL

In [6]:

class MIL_fc(nn.Module):
    def __init__(self, input_dim=2048, drop_p=0.0):
        super(MIL_fc, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Dropout(drop_p),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Dropout(drop_p),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
        self.drop_p = drop_p
        self.weight_init()
        self.vars = nn.ParameterList()

        for i, param in enumerate(self.classifier.parameters()):
            self.vars.append(param)

    def weight_init(self):
        for layer in self.classifier:
            if type(layer) == nn.Linear:
                nn.init.xavier_normal_(layer.weight)

    def forward(self, x, vars=None):
        if vars is None:
            vars = self.vars
        x = F.linear(x, vars[0], vars[1])
        x = F.relu(x)
        x = F.dropout(x, self.drop_p, training=self.training)
        x = F.linear(x, vars[2], vars[3])
        x = F.dropout(x, self.drop_p, training=self.training)
        x = F.linear(x, vars[4], vars[5])
        return torch.sigmoid(x)

    def parameters(self):
        """
        override this function since initial parameters will return with a generator.
        :return:
        """
        return self.vars

# LOSS

## MIL loss

In [7]:
def MIL_loss(y_pred, batch_size, is_transformer=0):
    loss = torch.tensor(0.).cuda()
    loss_intra = torch.tensor(0.).cuda()
    sparsity = torch.tensor(0.).cuda()
    smooth = torch.tensor(0.).cuda()
    if is_transformer==0:
        y_pred = y_pred.view(batch_size, -1)
    else:
        y_pred = torch.sigmoid(y_pred)

    for i in range(batch_size):
        anomaly_index = torch.randperm(30).cuda()
        normal_index = torch.randperm(30).cuda()

        y_anomaly = y_pred[i, :32][anomaly_index]
        y_normal  = y_pred[i, 32:][normal_index]

        y_anomaly_max = torch.max(y_anomaly) # anomaly
        y_anomaly_min = torch.min(y_anomaly)

        y_normal_max = torch.max(y_normal) # normal
        y_normal_min = torch.min(y_normal)

        loss += F.relu(1.-y_anomaly_max+y_normal_max)

        sparsity += torch.sum(y_anomaly)*0.00008
        smooth += torch.sum((y_pred[i,:31] - y_pred[i,1:32])**2)*0.00008
    loss = (loss+sparsity+smooth)/batch_size

    return loss

# FEATURE EXTRACTION

In [8]:
# r3d transformer

transformer_r3d = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4345, 0.4051, 0.3775], std = [0.2768, 0.2713, 0.2737], inplace=False),
])

# c3d transformer

transformer_c3d = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((128, 171)),
    transforms.CenterCrop((112, 112)),
    transforms.Normalize(mean = [124 / 255, 117 / 255, 104 / 255], std = [1 / (0.0167 * 255)] * 3, inplace=False),
])


transformer = transformer_r3d


## functions

In [57]:
# def of [to_segments, process_videos, process_tensor_noconv]
#use path_features

def to_segments(
    data: Union[Tensor, np.ndarray], n_segments: int = 32
) -> List[np.ndarray]:
    """These code is taken from:

        # https://github.com/rajanjitenpatel/C3D_feature_extraction/blob/b5894fa06d43aa62b3b64e85b07feb0853e7011a/extract_C3D_feature.py#L805

    Args:
        data (Union[Tensor, np.ndarray]): List of features of a certain video
        n_segments (int, optional): Number of segments

    Returns:
        List[np.ndarray]: List of `num` segments
    """
    data = np.array(data)
    Segments_Features = []
    thirty2_shots = np.round(np.linspace(0, len(data) - 1, num=n_segments + 1)).astype(
        int
    )
    for ss, ee in zip(thirty2_shots[:-1], thirty2_shots[1:]):
        if ss == ee:
            temp_vect = data[min(ss, data.shape[0] - 1), :]
        else:
            temp_vect = data[ss:ee, :].mean(axis=0)

        temp_vect = temp_vect / np.linalg.norm(temp_vect)

        if np.linalg.norm(temp_vect) != 0:
            Segments_Features.append(temp_vect.tolist())

    return Segments_Features

def process_videos(video_tensor, output_dir, save_name):
    mean_segments = np.array(to_segments(video_tensor.cpu()))
    output_dir = path_features+ "\\" + output_dir
    #print(mean_segments.shape)
    os.makedirs(output_dir, exist_ok=True)  # 경로가 없으면 생성
    # Save to .npy file
    np.save(f"{output_dir}\\{save_name}.npy", mean_segments)
    print(f'Saved features for video to {output_dir}\\{save_name}.npy.')


def process_tensor_noconv(input_tensor, out_channels, pooled_shape):
    """
    입력 텐서의 채널을 줄이기 위한 함수.
    
    Args:
        input_tensor (torch.Tensor): (배치, 채널, 깊이, 높이, 너비) 형태의 입력 텐서
        out_channels (int): 원하는 출력 채널 수
        pooled_shape (tuple): 평균 풀링 후의 목표 크기

    Returns:
        torch.Tensor: 채널 수가 줄어든 텐서
    """
    # 현재 입력 텐서의 크기
    in_channels = input_tensor.size(1)
    input_shape = input_tensor.shape[2:]

    # CUDA 장치 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 입력 텐서를 GPU로 이동
    input_tensor = input_tensor.to(device)
    
    # 평균 풀링 수행
    pooled_tensor = F.adaptive_avg_pool3d(input_tensor, pooled_shape)




    return pooled_tensor.view(pooled_tensor.size(0), -1)

# # 예시 텐서 생성
# tensor_1 = torch.randn(4, 64, 8, 60, 80)
# tensor_2 = torch.randn(8, 256, 8, 60, 80)
# tensor_3 = torch.randn(1, 512, 4, 30, 40)
# tensor_4 = torch.randn(1, 1024, 2, 15, 20)

# # 함수 호출 예시
# output_tensor_1 = process_tensor_noconv(tensor_1, out_channels=4, pooled_shape=(4, 9, 12))
# output_tensor_2 = process_tensor_noconv(tensor_2, out_channels=4, pooled_shape=(4, 9, 12))
# output_tensor_3 = process_tensor_noconv(tensor_3, out_channels=8, pooled_shape=(2, 6, 8))
# output_tensor_4 = process_tensor_noconv(tensor_4, out_channels=16, pooled_shape=(1, 6, 8))

# # 각 출력 텐서의 형태 확인
# print(output_tensor_1.shape)
# print(output_tensor_2.shape)
# print(output_tensor_3.shape)
# print(output_tensor_4.shape)

## extraction

In [53]:
# model instantiation - both model use this

model_res = generate_model(101)
checkpoint = torch.load(path_model_r3d101)
model_res.load_state_dict(checkpoint['state_dict'])
model_res.cuda()
model_res.eval()

ResNet(
  (conv1): Conv3d(3, 64, kernel_size=(7, 7, 7), stride=(1, 2, 2), padding=(3, 3, 3), bias=False)
  (bn1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool3d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv3d(64, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
      (bn1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
      (bn2): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv3d(64, 256, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
      (bn3): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv3d(64, 256, k

### extractor basic

In [None]:
# extract features from path_videos
# parameter : transformer path_model_XXX


for root, dirs, files in os.walk(path_videos):
    for filename in files:
        data = []
        features = []
        if filename.endswith(".mp4"):
            path = os.path.join(root, filename)
            cap = cv2.VideoCapture(path)
            success, image = cap.read()
            while success:
                try:
                    imagetensor = transformer(image)
                    data.append(imagetensor)
                    if len(data) == block_frame:
                        input_tensor = torch.stack(data)
                        input_tensor = input_tensor.permute(1, 0 ,2, 3).to(device)
                        with torch.no_grad():
                            feature = model_res(input_tensor.unsqueeze(0))
                            feature = process_tensor_noconv(feature, out_channels=1, pooled_shape=(1,6,8))
                            feature = feature.squeeze(0)
                            features.append(feature)
                        data=[]
                except:
                    print("Error loading file", path)
                success, image = cap.read()
            cap.release()
            final_tensor = torch.stack(features)
            print(final_tensor.shape)
            process_videos(final_tensor, os.path.basename(root), filename)

### extractor - parse ver

In [30]:
# parameters 
# path_videos = ''
# path_features = ''
print(path_videos)
print(path_features)
size_limit = 0.2*1024*1024*1024 #200mb
video_num_limit = 0   #only parses videos that has bigger number than this
device = "cuda" if torch.cuda.is_available() else "cpu"

D:\testset
D:\testtemp


In [79]:
import subprocess

"""
IF VIDEO DATASET HAS ALREADY PARTED, SKIP THIS CODE!!
this part parses videos that longer than 'size_limit' into 5 minute long parts.
this only care about videonumbers bigger than 'video_num_limit'
requires ffmpeg
it does not delete original file automatically
"""



# Regular expression to match filenames with numbers
cond_num = re.compile(r'(\d+)_')

#split files
cond_num = re.compile(r'(\d+)_')
for root, dirs, files in os.walk(path_videos):
    for filename in files:
        if filename.endswith("mp4"):
            match = cond_num.search(filename)
            if match:
                number = int(match.group(1))
                if number >= video_num_limit:
                    path = os.path.join(root,filename)
                    file_size = os.path.getsize(path)
                    if file_size >= size_limit :
                        print(f'Splitting {filename} (size: {file_size} bytes)')

                        # Define the split command
                        split_command = [
                            'ffmpeg', '-i', path, '-c', 'copy', '-map', '0',
                            '-segment_time', '00:5:00', '-f', 'segment',
                            f'{path[:-4]}_part%03d.mp4'
                        ]

                        # Execute the split command
                        subprocess.run(split_command, check=True)

# expected outputs
# Splitting Normal_Videos_924_x264.mp4 (size: 588821865 bytes)
# Splitting Normal_Videos_935_x264.mp4 (size: 905743231 bytes)

Splitting Normal_Videos_924_x264.mp4 (size: 588821865 bytes)
Splitting Normal_Videos_935_x264.mp4 (size: 905743231 bytes)
Splitting Normal_Videos_940_x264.mp4 (size: 315134407 bytes)
Splitting Normal_Videos307_x264.mp4 (size: 5476070783 bytes)
Splitting Normal_Videos308_x264.mp4 (size: 8674006528 bytes)


In [58]:

"""
    this code extracts features from videos that has {bigger number than 'video_num_limit' && smaller size than 'size_limit'}
    it only operates for NOT-ALREADY EXTRACTED videos.
    make sure 'transformer' is defined
"""


# parameters
path_videos = "D:\\testset"
# path_features = path_features
# size_limit = 0.2*1024*1024*1024 #200mb
# video_num_limit = 300   #only extracts features from videos that has bigger number than this
# device = "cuda" if torch.cuda.is_available() else "cpu"


# Iterate over all files in the directory (assumes spliting completed)
cond_num = re.compile(r'(\d+)_')
for root, dirs, files in os.walk(path_videos):
    for filename in files:
        output_path = path_features + "\\" + os.path.basename(root) + "\\"+ filename+".npy" 
        if os.path.exists(output_path):
            print(f"path already exists! : {output_path}")
            continue
        else:
            print(output_path)
            data = []
            features = []
            if filename.endswith("mp4"):
                match = cond_num.search(filename)
                if match:
                    # print(f"matched, number is {number}")
                    number = int(match.group(1))
                    if number >= video_num_limit:
                        path = os.path.join(root,filename)
                        file_size = os.path.getsize(path)
                        if file_size < size_limit :
                            cap = cv2.VideoCapture(path)
                            success, image = cap.read()
                            # print(f"image.shape: {image.shape}")
                            while success:
                                # try:
                                imagetensor = transformer(image)
                                data.append(imagetensor)
                                if len(data) == block_frame:
                                    input_tensor = torch.stack(data)
                                    input_tensor = input_tensor.permute(1, 0 ,2, 3).to(device)
                                    with torch.no_grad():
                                        feature = model_res(input_tensor.unsqueeze(0))
                                        feature = process_tensor_noconv(feature, out_channels=1, pooled_shape=(2,6,8))
                                        feature = feature.squeeze(0)
                                        features.append(feature)
                                    data=[]
                                # except Exception as e:
                                #     print("Error loading file", path)
                                #     print("Error details:", str(e))
                                success, image = cap.read()
                            cap.release()
                            if features != []:
                                final_tensor = torch.stack(features)
                                process_videos(final_tensor, os.path.basename(root), filename)
                                
                                
# expected output
# path already exists! : D:\temp\Training-Normal-Videos-Part-1\Normal_Videos511_x264_part002.mp4.npy
# Saved features for video to D:\temp\Training-Normal-Videos-Part-1/Normal_Videos512_x264_part000.mp4.npy.

D:\testtemp\Arson\Arson001_x264.mp4.npy
Saved features for video to D:\testtemp\Arson\Arson001_x264.mp4.npy.
D:\testtemp\Arson\Arson002_x264.mp4.npy
Saved features for video to D:\testtemp\Arson\Arson002_x264.mp4.npy.
path already exists! : D:\testtemp\training-normal-videos-part-1\Normal_Videos001_x264.mp4.npy
path already exists! : D:\testtemp\training-normal-videos-part-1\Normal_Videos002_x264.mp4.npy
path already exists! : D:\testtemp\training-normal-videos-part-1\Normal_Videos004_x264.mp4.npy
D:\testtemp\training-normal-videos-part-1\Normal_Videos005_x264.mp4.npy
Saved features for video to D:\testtemp\training-normal-videos-part-1\Normal_Videos005_x264.mp4.npy.


In [26]:
"""
    this code sort and integrate parted videos.
    after integration, the code automatically removes parted videos.
"""


# Parameters
path_features = "D:\\testtemp"

# Regex to match parted video files
pattern = re.compile(r'(?P<name>.+?)(?P<number>\d{3})_x264_part\d{3}\.mp4\.npy')

# Walk through all subdirectories and files
for root, dirs, files in os.walk(path_features):
    # Dictionary to hold lists of parted files grouped by (name, number)
    file_groups = defaultdict(list)
    
    # Group files by (name, number)
    for file in files:
        if file.endswith('.npy'):
            match = pattern.match(file)
            if match:
                video_name = match.group('name')
                video_number = match.group('number')
                key = (video_name, video_number)
                file_groups[key].append(file)
    
    # Process each group
    for key, files in file_groups.items():
        # Sort the files to ensure they are loaded in the correct order
        files.sort()
        
        # Initialize an empty list to store the arrays
        arrays = []
        
        # Load each .npy file and append to the list
        for file in files:
            npy_file = os.path.join(root, file)
            array = np.load(npy_file)
            arrays.append(array)
        
        # Stack all arrays along the first dimension
        integrated_array = np.vstack(arrays)
        
        # Create a new filename for the integrated array
        integrated_filename = f"{key[0]}{key[1]}_x264.mp4.npy"
        output_file = os.path.join(root, integrated_filename)
        
        # Save the integrated array to a new .npy file
        np.save(output_file, integrated_array)
        print(f'Integrated array saved to {output_file}')

        # Delete the original parted files
        for file in files:
            os.remove(os.path.join(root, file))
            print(f'Deleted file {file}')


#expected output
# Integrated array saved to D:\testtemp\abuse\Abuse003_x264.mp4.npy
# Deleted file Abuse003_x264_part000.mp4.npy
# Deleted file Abuse003_x264_part001.mp4.npy
# Deleted file Abuse003_x264_part002.mp4.npy
# Integrated array saved to D:\testtemp\training-normal-videos-part-1\Normal_Videos_004_x264.mp4.npy
# Deleted file Normal_Videos_004_x264_part000.mp4.npy
# Deleted file Normal_Videos_004_x264_part001.mp4.npy
# Deleted file Normal_Videos_004_x264_part002.mp4.npy

Integrated array saved to D:\testtemp\Abuse\Abuse003_x264.mp4.npy
Deleted file Abuse003_x264_part000.mp4.npy
Deleted file Abuse003_x264_part001.mp4.npy
Deleted file Abuse003_x264_part002.mp4.npy
Integrated array saved to D:\testtemp\Training-normal-videos-part-1\Normal_Videos004_x264.mp4.npy
Deleted file Normal_Videos004_x264_part000.mp4.npy
Deleted file Normal_Videos004_x264_part001.mp4.npy
Deleted file Normal_Videos004_x264_part002.mp4.npy


In [28]:
"""
    this code make (32*n,49152) integrated npy (32,49152)
    need 'functions' to be defined.
"""

# Parameters
path_features = 'D:\\testtemp'
threshold_size_kb = 13000


# Walk through all subdirectories and files
for root, dirs, files in os.walk(path_features):
    npy_files = [file for file in files if file.endswith('.npy')]

    # Identify and process integrated files
    for file in npy_files:
        file_path = os.path.join(root, file)
        if os.path.getsize(file_path) / 1024 > threshold_size_kb:
            array = np.load(file_path)
            if array.shape[1] == 49152:
                n_chunks = array.shape[0] // 32
                if n_chunks == 0:
                    print(f"File {file} does not have enough data to split into chunks of 32")
                    continue
                mean_seg = np.array(to_segments(array))
                print(mean_seg.shape)
                output_file = os.path.join(root, f"{file}")
                np.save(output_file, mean_seg)
                print(f'Averaged array saved to {output_file}')
                os.remove(file_path)
                print(f'Deleted file {file}')
            else:
                print(f"File {file} does not have the expected second dimension of 49152")


# LOADERS

In [None]:
class Normal_Loader(Dataset):
    """
    is_train = 1 <- train, 0 <- test
    """
    def __init__(self, is_train=1, path="F:\\Window\\", modality='C3D'):
        super(Normal_Loader, self).__init__()
        self.is_train = is_train
        self.modality = modality
        self.path = path
        if self.is_train == 1:
            data_list = os.path.join(path, 'train_normal.txt')
            with open(data_list, 'r') as f:
                self.data_list = f.readlines()
        else:
            data_list = os.path.join(path, 'test_normalv2.txt')
            with open(data_list, 'r') as f:
                self.data_list = f.readlines()
            random.shuffle(self.data_list)
            self.data_list = self.data_list[:-10]
    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        if self.is_train == 1:
            #rgb_npy = np.load(os.path.join(self.path+'all_rgbs', self.data_list[idx][:-1]+'.npy'))
            #flow_npy = np.load(os.path.join(self.path+'all_flows', self.data_list[idx][:-1]+'.npy'))
            c3d_npy = np.load(os.path.join(self.path+'all_c3d', self.data_list[idx][:-1]+'.npy'))
            #concat_npy = np.concatenate([rgb_npy, flow_npy], axis=1)
            # if self.modality == 'RGB':
            #     return rgb_npy
            # elif self.modality == 'FLOW':
            #     return flow_npy
            if self.modality == "C3D":
                return c3d_npy
            # else:
            #     return concat_npy
        else:
            name, frames, gts = self.data_list[idx].split(' ')[0], int(self.data_list[idx].split(' ')[1]), int(self.data_list[idx].split(' ')[2][:-1])
            name = name.replace("/", "\\")
            # rgb_npy = np.load(os.path.join(self.path+'all_rgbs', name + '.npy'))
            # flow_npy = np.load(os.path.join(self.path+'all_flows', name + '.npy'))
            c3d_npy = np.load(os.path.join(self.path+'all_c3d', name +'.npy'))
            # concat_npy = np.concatenate([rgb_npy, flow_npy], axis=1)
            # if self.modality == 'RGB':
            #     return rgb_npy, gts, frames
            # elif self.modality == 'FLOW':
            #     return flow_npy, gts, frames
            if self.modality == "C3D":
                return c3d_npy, gts, frames
            # else:
            #     return concat_npy, gts, frames

class Anomaly_Loader(Dataset):
    """
    is_train = 1 <- train, 0 <- test
    """
    def __init__(self, is_train=1, path="F:\\Window\\", modality='C3D'):
        super(Anomaly_Loader, self).__init__()
        self.is_train = is_train
        self.modality = modality
        self.path = path
        if self.is_train == 1:
            data_list = os.path.join(path, 'train_anomaly.txt')
            with open(data_list, 'r') as f:
                self.data_list = f.readlines()
        else:
            data_list = os.path.join(path, 'test_anomalyv2.txt')
            with open(data_list, 'r') as f:
                self.data_list = f.readlines()

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        if self.is_train == 1:
            # rgb_npy = np.load(os.path.join(self.path+'all_rgbs', self.data_list[idx][:-1]+'.npy'))
            # flow_npy = np.load(os.path.join(self.path+'all_flows', self.data_list[idx][:-1]+'.npy'))
            c3d_npy = np.load(os.path.join(self.path+'all_c3d', self.data_list[idx][:-1]+'.npy'))
            # concat_npy = np.concatenate([rgb_npy, flow_npy], axis=1)
            # if self.modality == 'RGB':
            #     return rgb_npy
            # elif self.modality == 'FLOW':
            #     return flow_npy
            if self.modality == "C3D":
                return c3d_npy
            # else:
            #     return concat_npy
        else:
            name, frames, gts = self.data_list[idx].split('|')[0], int(self.data_list[idx].split('|')[1]), self.data_list[idx].split('|')[2][1:-2].split(',')
            name = name.replace("/", "\\")
            # gts = [int(i) for i in gts]
            # rgb_npy = np.load(os.path.join(self.path+'all_rgbs', name + '.npy'))
            # flow_npy = np.load(os.path.join(self.path+'all_flows', name + '.npy'))
            c3d_npy = np.load(os.path.join(self.path+'all_c3d', name +'.npy'))
            # concat_npy = np.concatenate([rgb_npy, flow_npy], axis=1)
            # if self.modality == 'RGB':
            #     return rgb_npy, gts, frames
            # elif self.modality == 'FLOW':
            #     return flow_npy, gts, frames
            if self.modality =="C3D":
                return c3d_npy, gts, frames
            # else:
            #     return concat_npy, gts, frames

if __name__ == '__main__':
    loader2 = Anomaly_Loader(is_train=0, modality="C3D")
    a, b, c = loader2.__getitem__(0)
    print(a)
    print(b)
    print(c)
    #print(loader2[0].shape)
    #print(loader2.shape)
    #print(loader[1], loader2[1])

# MAIN

In [None]:
parser = argparse.ArgumentParser(description='PyTorch MIL Training')
parser.add_argument('--lr', default=0.01, type=float, help='learning rate')
parser.add_argument('--w', default=0.000100000474974513, type=float, help='weight_decay')
parser.add_argument('--modality', default='C3D', type=str, help='modality')
parser.add_argument('--input_dim', default=49152, type=int, help='input_dim')
parser.add_argument('--drop', default=0.6, type=float, help='dropout_rate')
parser.add_argument('--FFC', '-r', action='store_true', help='FFC')
args = parser.parse_args(args=[])

best_auc = 0
best_fpr = None
best_tpr = None

normal_train_dataset = Normal_Loader(is_train=1, modality=args.modality)
normal_test_dataset = Normal_Loader(is_train=0, modality=args.modality)

anomaly_train_dataset = Anomaly_Loader(is_train=1, modality=args.modality)
anomaly_test_dataset = Anomaly_Loader(is_train=0, modality=args.modality)

normal_train_loader = DataLoader(normal_train_dataset, batch_size=30, shuffle=True)
normal_test_loader = DataLoader(normal_test_dataset, batch_size=1, shuffle=True)

anomaly_train_loader = DataLoader(anomaly_train_dataset, batch_size=30, shuffle=True) 
anomaly_test_loader = DataLoader(anomaly_test_dataset, batch_size=1, shuffle=True)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = MIL_fc().cuda()

optimizer = torch.optim.NAdam(model.parameters(), lr=args.lr) #,weight_decay=args.w)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[25, 50])
criterion = MIL_loss

def train(epoch):
    print('\nEpoch: %d' % epoch)
    model.train()
    train_loss = 0
    for batch_idx, (normal_inputs, anomaly_inputs) in enumerate(zip(normal_train_loader, anomaly_train_loader)):
        inputs = torch.cat([anomaly_inputs, normal_inputs], dim=1)
        batch_size = inputs.shape[0]
        inputs = inputs.view(-1, inputs.size(-1)).to(device)
        outputs = model(inputs.float())
        loss = criterion(outputs, batch_size)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    print('loss = {}'.format(train_loss/len(normal_train_loader)))
    scheduler.step()

def test_abnormal(epoch):
    model.eval()
    global best_auc, best_fpr, best_tpr
    auc = 0
    all_gts = []
    all_scores = []
    with torch.no_grad():
        for i, (data, data2) in enumerate(zip(anomaly_test_loader, normal_test_loader)):
            inputs, gts, frames = data
            print(gts)
            print(frames)
            inputs = inputs.view(-1, inputs.size(-1)).to(device)
            score = model(inputs.float())
            score = score.cpu().detach().numpy()
            score_list = np.zeros(frames[0])
            step = np.round(np.linspace(0, frames[0]//16, 33))

            for j in range(32):
                score_list[int(step[j])*16:(int(step[j+1]))*16] = score[j]

            gt_list = np.zeros(frames[0])
            for k in range(len(gts)//2):
                s = int(gts[k*2][0])
                e = min(int(gts[k*2+1][0]), int(frames[0]))
                gt_list[s-1:e] = 1

            inputs2, gts2, frames2 = data2
            inputs2 = inputs2.view(-1, inputs2.size(-1)).to(device)
            score2 = model(inputs2.float())
            score2 = score2.cpu().detach().numpy()
            score_list2 = np.zeros(frames2[0])
            step2 = np.round(np.linspace(0, frames2[0]//16, 33))
            for kk in range(32):
                score_list2[int(step2[kk])*16:(int(step2[kk+1]))*16] = score2[kk]
            gt_list2 = np.zeros(frames2[0])
            score_list3 = np.concatenate((score_list, score_list2), axis=0)
            gt_list3 = np.concatenate((gt_list, gt_list2), axis=0)

            fpr, tpr, thresholds = metrics.roc_curve(gt_list3, score_list3, pos_label=1)
            auc += metrics.auc(fpr, tpr)

            all_gts.append(gt_list3)
            all_scores.append(score_list3)

        auc /= 140
        print('auc = {}'.format(auc))

        if best_auc < auc:
            print('Saving..')
            state = {
                'net': model.state_dict(),
            }
            if not os.path.isdir('checkpoint'):
                os.mkdir('checkpoint')
            torch.save(state, './checkpoint/ckpt3.pth')
            best_auc = auc

            #Save the best ROC curve
            best_fpr, best_tpr, _ = metrics.roc_curve(np.concatenate(all_gts), np.concatenate(all_scores), pos_label=1)

for epoch in range(0, 10):
    train(epoch)
    test_abnormal(epoch)
    
# checkpoint = torch.load('./checkpoint/ckpt.pth')
# model.load_state_dict(checkpoint['net'])
# test_abnormal(1)
# # Plot the best ROC curve after training is completed
if best_fpr is not None and best_tpr is not None:
    plt.figure()
    plt.plot(best_fpr, best_tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % best_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")
    #plt.savefig('best_roc_curve.png')
    plt.show()