In [1]:
# Cell 1: Importing necessary libraries

import cv2
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Bidirectional, BatchNormalization, Dropout, Dense
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.metrics import classification_report


2024-03-24 10:03:37.187664: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-24 10:03:37.187796: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-24 10:03:37.363838: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Cell 2: Loading the InceptionV3 model for feature extraction

def load_pretrained_model():
    pretrained_model = InceptionV3()
    # Extract features from the second-to-last layer of the InceptionV3 model
    pretrained_model = Model(inputs=pretrained_model.input, outputs=pretrained_model.layers[-2].output)
    return pretrained_model

pretrained_model = load_pretrained_model()
pretrained_model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5
[1m96112376/96112376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [3]:
# Cell 3: Function for extracting features from frames

def extract_frame_features(frame, pretrained_model):
    # Expand the dimensions of the frame for model compatibility
    img = np.expand_dims(frame, axis=0)
    # Use the pre-trained feature extraction model to obtain the feature vector
    feature_vector = pretrained_model.predict(img, verbose=0)
    # Return the extracted feature vector
    return feature_vector


In [4]:
# Cell 4: Function for extracting frames from videos

def extract_video_frames(video_path, sequence_length=16, image_width=299, image_height=299):
    frames_list = []
    # Open the video file for reading
    video_reader = cv2.VideoCapture(video_path)
    # Get the total number of frames in the video
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    # Calculate the number of frames to skip in order to achieve the desired sequence length
    skip_frames_window = max(int(video_frames_count / sequence_length), 1)

    # Loop through each frame in the sequence
    for frame_counter in range(sequence_length):
        # Set the position of the video reader to the current frame
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
        # Read the frame
        success, frame = video_reader.read()
        # Break if unable to read the frame
        if not success:
            break
        # Convert the frame to RGB and resize it
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        resized_frame = cv2.resize(frame_rgb, (image_height, image_width))
        # Append the resized frame to the frames list
        frames_list.append(resized_frame)
    
    # Release the video reader
    video_reader.release()
    # Return the list of frames
    return frames_list


In [5]:
# Cell 5: Function for extracting features from videos

def extract_features_from_videos(video_paths, total_videos, pretrained_model):
    all_video_features = []
    # Loop through each video
    for pos in tqdm(range(total_videos)):
        frames_list = []
        # Extract frames from the current video
        frames = extract_video_frames(video_paths[pos])
        # Extract features from each frame
        for frame in frames:
            features = extract_frame_features(frame, pretrained_model)
            frames_list.append(features)
        all_video_features.append(frames_list)
    return np.array(all_video_features)


In [6]:
# Cell 6: Loading features and preparing data for model training

# Define violence and non-violence directories
violence_dir = '/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/Violence'
nonviolence_dir = '/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/NonViolence'

# Create paths to individual videos
violence_path = [os.path.join(violence_dir, name) for name in os.listdir(violence_dir)]
nonviolence_path = [os.path.join(nonviolence_dir, name) for name in os.listdir(nonviolence_dir)]

# Extract features from videos
violence_features = extract_features_from_videos(violence_path[:500], len(violence_path[:500]), pretrained_model)
non_violence_features = extract_features_from_videos(nonviolence_path[:500], len(nonviolence_path[:500]), pretrained_model)

# Save extracted features
np.save('/kaggle/working/violence_features.npy', violence_features)
np.save('/kaggle/working/non_violence_features.npy', non_violence_features)


100%|██████████| 500/500 [37:21<00:00,  4.48s/it]
 99%|█████████▉| 497/500 [32:03<00:15,  5.03s/it][h264 @ 0x56b80d24f080] mb_type 104 in P slice too large at 98 31
[h264 @ 0x56b80d24f080] error while decoding MB 98 31
[h264 @ 0x56b80d24f080] mb_type 104 in P slice too large at 98 31
[h264 @ 0x56b80d24f080] error while decoding MB 98 31
[h264 @ 0x56b80d24f080] mb_type 104 in P slice too large at 98 31
[h264 @ 0x56b80d24f080] error while decoding MB 98 31
[h264 @ 0x56b80d24f080] mb_type 104 in P slice too large at 98 31
[h264 @ 0x56b80d24f080] error while decoding MB 98 31
100%|██████████| 500/500 [32:19<00:00,  3.88s/it]


In [7]:
# Cell 7: Loading features and labels for model training

# Load features and labels
violence_features = np.load('/kaggle/working/violence_features.npy')
non_violence_features = np.load('/kaggle/working/non_violence_features.npy')

# Creating labels
violence_labels = np.zeros(len(violence_features))
non_violence_labels = np.ones(len(non_violence_features))

# Combining features and labels
X = np.concatenate([violence_features, non_violence_features], axis=0)
y = np.concatenate([violence_labels, non_violence_labels], axis=0)

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

# Reshaping data for LSTM input
X_train_reshaped = X_train.reshape((X_train.shape[0], 16, 2048))
X_test_reshaped = X_test.reshape((X_test.shape[0], 16, 2048))


In [8]:
# Cell 8: Building and training the LSTM model

inputs = Input(shape=(16, 2048))
x = Bidirectional(LSTM(200, return_sequences=True))(inputs)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Bidirectional(LSTM(100))(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(200, activation='relu')(x)
outputs = Dense(1, activation='sigmoid')(x)

model = Model(inputs=inputs, outputs=outputs)
model.summary()

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_reshaped, y_train, validation_data=(X_test_reshaped, y_test), epochs=5, batch_size=32)


Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 258ms/step - accuracy: 0.5484 - loss: 0.7714 - val_accuracy: 0.6100 - val_loss: 0.6761
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 214ms/step - accuracy: 0.5842 - loss: 0.6937 - val_accuracy: 0.5050 - val_loss: 0.7062
Epoch 3/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 225ms/step - accuracy: 0.6508 - loss: 0.6337 - val_accuracy: 0.5100 - val_loss: 0.7031
Epoch 4/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 216ms/step - accuracy: 0.6726 - loss: 0.6102 - val_accuracy: 0.5100 - val_loss: 0.7582
Epoch 5/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 214ms/step - accuracy: 0.6475 - loss: 0.6388 - val_accuracy: 0.5100 - val_loss: 0.7206


<keras.src.callbacks.history.History at 0x7cd974d19090>

In [9]:
# Cell 9: Evaluating the model

accuracy = model.evaluate(X_test_reshaped, y_test)
print("Test Accuracy:", accuracy[1])


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 71ms/step - accuracy: 0.5255 - loss: 0.6928
Test Accuracy: 0.5099999904632568


In [10]:
# Cell 10: Testing the model on sample videos

# Extract features from sample test videos
violence_features_test = extract_features_from_videos(violence_path[500:510], len(violence_path[500:510]), pretrained_model)
non_violence_features_test = extract_features_from_videos(nonviolence_path[500:510], len(nonviolence_path[500:510]), pretrained_model)

# Reshape the features for LSTM input
test_violence = violence_features_test.reshape((violence_features_test.shape[0], 16, 2048))
test_non_violence = non_violence_features_test.reshape((non_violence_features_test.shape[0], 16, 2048))

# Define class names
class_names = ['violence', 'non_violence']

# Predictions for test videos
predicted_non_violence = [class_names[1] if i > 0.5 else class_names[0] for i in model.predict(test_non_violence)]
predicted_violence = [class_names[1] if i > 0.5 else class_names[0] for i in model.predict(test_violence)]

print("Predicted labels for non-violence videos:", predicted_non_violence)
print("Predicted labels for violence videos:", predicted_violence)


100%|██████████| 10/10 [00:52<00:00,  5.21s/it]
100%|██████████| 10/10 [00:42<00:00,  4.26s/it]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 763ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
Predicted labels for non-violence videos: ['non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence']
Predicted labels for violence videos: ['non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence']
