In [32]:
import os
import shutil
import cv2
import math
import random
import numpy as np
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import torch
from C3D_model import C3D
from config import model_path, output_dir, video_dir, output_name
from torch.autograd import Variable
from process_feature import resize_feature_to_n_rows
import tensorflow as tf
from model import create_model
from tensorflow.keras.utils import to_categorical

In [2]:
IMAGE_HEIGHT, IMAGE_WIDTH = 112, 112
SEQUENCE_LENGTH = 16 # number of frames to be fed to model
DATASET_DIR = r"C:\Users\PC MY TU\Desktop\Mobi_LSTM\Real Life Violence Dataset"
CLASSES_LIST = ["NonViolence", "Violence"]

In [6]:
def frames_extraction(video_path):
    frames_list = []

    video_reader = cv2.VideoCapture(video_path)
    
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)
 
    for frame_counter in range(SEQUENCE_LENGTH):
        # set frame position
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
  
        success, frame = video_reader.read() 
 
        if not success:
            break
 
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
        
        normalized_frame = resized_frame / 255
        
        frames_list.append(normalized_frame)
 
    video_reader.release()
 
    return frames_list

In [7]:
def create_dataset():
    features = []
    labels = []
    video_files_paths = []

    for class_index, class_name in enumerate(CLASSES_LIST):
        print(f'Extracting Data of Class: {class_name}')
        
        files_list = os.listdir(os.path.join(DATASET_DIR, class_name))
        
        for file_name in files_list:
            video_file_path = os.path.join(DATASET_DIR, class_name, file_name)
 
            frames = frames_extraction(video_file_path)
 
            # check if the extracted frames are equal to the SEQUENCE_LENGTH specified.
            # so ignore the vides having frames less than the SEQUENCE_LENGTH.
            if len(frames) == SEQUENCE_LENGTH:
                features.append(frames)
                labels.append(class_index)
                video_files_paths.append(video_file_path)
 
    features = np.asarray(features)
    labels = np.array(labels)  

    return features, labels, video_files_paths

In [8]:
# create dataset.
features, labels, video_files_paths = create_dataset()

Extracting Data of Class: NonViolence
Extracting Data of Class: Violence


In [9]:
# save the extracted data
np.save("features_112x112.npy", features)

In [27]:
features_np = np.load(r"C:\Users\PC MY TU\Desktop\Mobi_LSTM\features\features_112x112.npy")
labels = np.load(r"C:\Users\PC MY TU\Desktop\Mobi_LSTM\features\labels.npy")
video_files_paths = np.load(r"C:\Users\PC MY TU\Desktop\Mobi_LSTM\features\video_files_paths.npy")

In [6]:
# convert labels into one-hot vectors
encoded_labels = to_categorical(labels)

In [39]:
imgs = features_np[0]
imgs = np.expand_dims(imgs, axis=0)
print(imgs.shape)

(1, 16, 112, 112, 3)


In [33]:
c3d = C3D(487)
c3d.load_state_dict(torch.load(model_path))
classifier = create_model((32, 4096))
classifier.load_weights(r"C:\Users\PC MY TU\Desktop\CS420\trained_2048_512_2.weights.h5")

  c3d.load_state_dict(torch.load(model_path))


In [40]:
imgs = np.array(imgs, dtype="float32")
imgs = torch.from_numpy(np.float32(imgs.transpose(0, 4, 1, 2, 3)))
imgs = Variable(imgs)
_, batch_output = c3d(imgs, 6)
batch_feature  = (batch_output.data).cpu()
features = batch_feature.numpy()
features = resize_feature_to_n_rows(features)
features = tf.convert_to_tensor([features], dtype=tf.float32)
print(features.shape)

(32, 4096)


In [41]:
y_pred = classifier.predict(features)
conf_score = y_pred[0][0]
print(f"Confidence: {conf_score:.4f}")

ValueError: Exception encountered when calling Sequential.call().

[1mCannot take the length of shape with unknown rank.[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=<unknown>, dtype=float32)
  • training=False
  • mask=None

In [15]:
x_train, x_temp, y_train, y_temp = train_test_split(features, 
                                                    encoded_labels,
                                                    stratify=encoded_labels,
                                                    test_size=0.3,
                                                    shuffle=True, 
                                                    random_state=2)

x_test, x_val, y_test, y_val = train_test_split(x_temp, 
                                                y_temp, 
                                                stratify=y_temp,
                                                test_size=0.5,
                                                shuffle=True, 
                                                random_state=2)
print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)
print(x_test.shape)
print(y_test.shape)

(1400, 16, 112, 112, 3)
(1400, 2)
(300, 16, 112, 112, 3)
(300, 2)
(300, 16, 112, 112, 3)
(300, 2)
