### Import Libraries

In [1]:
import numpy as np
import pandas as pd 
import os
import sys
import cv2 

import os
import datetime
import copy
import re
import yaml
import uuid
import warnings
import time
import inspect

import torch
from torch import nn, optim
from torch import nn
from torch.nn import functional as F
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import DataLoader
from torchvision.models import resnet
from torchvision.transforms import Compose, ToTensor, Normalize, Resize
from torchvision.models.resnet import ResNet, BasicBlock
from torchvision.datasets import MNIST
import tensorflow as tf
from tqdm.autonotebook import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn import metrics as mtx
from sklearn import model_selection as ms

### Setting Path 

In [2]:
# current_dir = 'home/modeling/script/'
BASE_PATH = '/home/modeling/' 
DATA_PATH = os.path.join(BASE_PATH, 'data')
VIDEO_PATH = os.path.join(DATA_PATH, 'video')
ANNOTATION_PATH = os.path.join(DATA_PATH, 'annotation')
FRAME_PATH = os.path.join(DATA_PATH, 'frame')

### Define functions 

In [14]:
def get_data_x(video_name):   # Abuse/Abuse005_.mp4 
    video = os.path.join(VIDEO_PATH, video_name)

    cap = cv2.VideoCapture(video)
    # fps 및 전체 프레임 개수 계산 
    video_w, video_h = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)

    if not cap.isOpened():
        print("[Error] video={} can not be opened.".format(video))
        sys.exit(-6)
    
    # Clip 얻기  (16 images per clip)
    frames_per_clip = 16   
    clip_row = int(total_frames // frames_per_clip)
    clip_col = frames_per_clip

    clips = [[0] * clip_col for _ in range(clip_row)] 
    
    success,image = cap.read()
    count = 0    # 0 ~ total frame 
    success = True 

    while success: 
        success, image = cap.read()
        if success: 
            try:
                image_rs = cv2.resize(image, (128, 171))
                image_rs = image_rs.astype('float32')
                image_rs /= 255
                clips[count // 16][count % 16] = image_rs   # clips[261]  -> index out of range error 
            except:
                break 
        else:
            break 
            
        if cv2.waitKey(10) == 27:
            break
    
        count += 1     
        
    print(len(clips), video_name)
    cap.release()
    
    # clip들을 segment에 배정 
    num_segments = 32 
    seg_row = num_segments
    seg_col = (len(clips) // num_segments) + 1 

    seg_total = seg_row * seg_col 
    data_x = [[0] * seg_col for _ in range(seg_row)]
    
    for idx in range(seg_total):
        try:
            data_x[idx // seg_col][idx % seg_col] = clips[idx]
        except:   # 262번째 clip 부터는 dummy clip을 집어넣어줘야 함   -> 1~261 번 중 random하게 지정 
            # print(video_name)
            data_x[idx // seg_col][idx % seg_col] = clips[np.random.randint(clip_row)]  

    return data_x    # train / test data (x) 

### train, test dataset 구성 

In [15]:
columns = ['video_name']

In [16]:
train_df = pd.read_csv(os.path.join(ANNOTATION_PATH, 'Anomaly_Train.csv'), header=None,names=columns) 
test_df = pd.read_csv(os.path.join(ANNOTATION_PATH, 'Anomaly_Test.csv'), header=None,names=columns) 

In [17]:
train_df.head(5)

Unnamed: 0,video_name
0,Arson/Arson001_x264.mp4
1,Arson/Arson002_x264.mp4
2,Arson/Arson003_x264.mp4
3,Arson/Arson005_x264.mp4
4,Arson/Arson006_x264.mp4


In [18]:
anomal_list = ['Arson', 'Assault', 'Stealing', 'Normal'] 

#### Train Data

In [19]:
x_train_tmp = [0] * len(train_df)
y_train_tmp = [0] * len(train_df) 

In [20]:
for idx, data in enumerate(train_df['video_name']):
    ab_type = data.split('/')[0]
#     print(idx, data)
    x_data = get_data_x(data)
    x_data = np.array(x_data, dtype='object')
    np.save(os.path.join(DATA_PATH, 'data/train', 'train' + str(idx) + '.npy'), x_data)
    y_train_tmp[idx] = anomal_list.index(ab_type)

261 Arson/Arson001_x264.mp4



KeyboardInterrupt



#### Test Data

In [None]:
x_test_tmp = []
y_test_tmp = [] 

In [None]:
for data in test_df['video_name']:
    ab_type = data.split('/')[0]
    x_test_tmp.append(data_x(data))
    y_test_tmp.append(anomal_list.index(ab_type))

x_test = np.array(x_test_tmp)
y_test = np.array(y_test_tmp)

In [None]:
#### 오늘 안되면 train data 10개로 bag feature 추출 부분 보기 

In [5]:
train_df.head(100)

NameError: name 'train_df' is not defined

In [33]:
np.shape(train_x)

(32, 9, 16, 240, 320, 3)

In [38]:
y_train

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2])

In [34]:
instance_index_label = [(i, y_train[i]) for i in range(len(y_train))]
instance_index_label_test = [(i, y_test[i]) for i in range(len(y_test))]

In [39]:
instance_index_label_test

[(0, 0),
 (1, 0),
 (2, 0),
 (3, 0),
 (4, 0),
 (5, 0),
 (6, 0),
 (7, 0),
 (8, 0),
 (9, 1),
 (10, 1),
 (11, 1),
 (12, 2),
 (13, 2),
 (14, 2),
 (15, 2),
 (16, 2)]

### Bag 구성 

In [4]:
len(instance_index_label)

NameError: name 'instance_index_label' is not defined

In [74]:
from typing import List, Dict, Tuple
from collections import Counter 

def data_generation(instance_index_label: List[Tuple]) -> List[Dict]:
    """
    bags: {key1: [ind1, ind2, ind3],
           key2: [ind1, ind2, ind3, ind4, ind5],
           ...}
    bag_lbls:
        {key1: 0,
         key2: 1,
         ...}
    """
    bag_size = np.random.randint(3, 8, size=len(instance_index_label)//5)
    data_cp = copy.copy(instance_index_label)
    np.random.shuffle(data_cp)
    bags = {}
    bags_per_instance_labels = {}
    bags_labels = {}
    for bag_ind, size in enumerate(bag_size):
        bags[bag_ind] = []
        bags_per_instance_labels[bag_ind] = []
        try:
            for _ in range(size):
                inst_ind, lbl = data_cp.pop()
                bags[bag_ind].append(inst_ind)
                # simplfy, just use a temporary variable instead of bags_per_instance_labels
                bags_per_instance_labels[bag_ind].append(lbl)
            print(bags_per_instance_labels[bag_ind])
            bags_labels[bag_ind] = bag_label_from_instance_labels(bags_per_instance_labels[bag_ind])
        except:
            break
    return bags, bags_labels

def bag_label_from_instance_labels(instance_labels):
    count_items = Counter(instance_labels)
    return count_items.most_common(n=1)[0][0]

In [75]:
bag_indices, bag_labels = data_generation(instance_index_label)
# bag_features = {kk: torch.Tensor(feature_array[inds]) for kk, inds in bag_indices.items()}

[2, 2, 0, 1]
[2, 2, 2]
[0, 2, 1, 2]
[0, 2, 0, 2, 2]
[1, 1, 1, 2, 1, 1]
[0, 2, 2, 2, 2, 1, 0]
[0, 1, 2, 1]
[1, 2, 2, 2, 1]
[2, 2, 2, 1]
[2, 2, 0, 2, 2, 1]
[1, 2, 1, 2, 1]
[2, 0, 0, 2, 2, 2]
[0, 2, 2, 2, 0, 0]
[2, 2, 1, 2]
[0, 1, 0, 2, 0, 1, 2]
[0, 2, 0, 2]
[2, 1, 2, 2]
[2, 1, 2, 2, 2]
[0, 2, 0, 2, 2, 2, 2]
[1, 2, 2, 1]
[2, 2, 2, 0, 2, 2, 0]
[2, 2, 0]
[1, 2, 0, 0, 1, 2]
[2, 1, 1]
[2, 1, 2]
[0, 2, 2, 2, 1]
[1, 2, 0, 1, 2]
[0, 2, 2, 0, 0, 2]
[2, 2, 0, 1, 1]
[1, 0, 2, 2]
[2, 1, 1]
[1, 2, 2, 0]
[2, 0, 2]
[2, 2, 0, 1]
[1, 2, 2, 2, 0, 0, 2]
[0, 1, 1, 0]


In [79]:
type(bag_labels)

dict

In [80]:
bag_indices

{0: [148, 99, 6, 63],
 1: [88, 126, 124],
 2: [20, 98, 74, 168],
 3: [3, 120, 19, 107, 170],
 4: [47, 42, 43, 108, 86, 70],
 5: [36, 147, 175, 93, 109, 72, 30],
 6: [25, 81, 136, 87],
 7: [44, 157, 97, 174, 69],
 8: [151, 142, 128, 82],
 9: [112, 156, 27, 154, 155, 45],
 10: [58, 134, 79, 178, 62],
 11: [131, 9, 11, 160, 159, 133],
 12: [8, 96, 144, 114, 40, 0],
 13: [117, 106, 59, 161],
 14: [18, 84, 1, 152, 33, 67, 127],
 15: [12, 113, 23, 176],
 16: [135, 41, 122, 150],
 17: [101, 80, 121, 138, 172],
 18: [35, 100, 32, 130, 105, 165, 125],
 19: [76, 102, 104, 48],
 20: [181, 132, 140, 29, 173, 129, 37],
 21: [94, 177, 28],
 22: [50, 90, 5, 22, 75, 169],
 23: [103, 73, 68],
 24: [110, 65, 141],
 25: [26, 92, 179, 164, 51],
 26: [46, 146, 38, 71, 171],
 27: [14, 143, 95, 2, 31, 163],
 28: [123, 119, 4, 52, 83],
 29: [54, 34, 166, 139],
 30: [182, 55, 56],
 31: [53, 137, 116, 17],
 32: [111, 16, 167],
 33: [115, 158, 21, 64],
 34: [78, 162, 180, 91, 39, 13, 145],
 35: [24, 85, 49, 7]}

In [81]:
bag_labels

{0: 2,
 1: 2,
 2: 2,
 3: 2,
 4: 1,
 5: 2,
 6: 1,
 7: 2,
 8: 2,
 9: 2,
 10: 1,
 11: 2,
 12: 0,
 13: 2,
 14: 0,
 15: 0,
 16: 2,
 17: 2,
 18: 2,
 19: 1,
 20: 2,
 21: 2,
 22: 1,
 23: 1,
 24: 2,
 25: 2,
 26: 1,
 27: 0,
 28: 2,
 29: 2,
 30: 1,
 31: 2,
 32: 2,
 33: 2,
 34: 2,
 35: 0}