<a href="https://colab.research.google.com/github/Mona1811k/Anomaly-Detection-in-CCTV-Footage-using-Deep-Learning-and-with-Alerting-Sytsem/blob/main/smart_ser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from IPython.display import clear_output,Video
import keras
import numpy as np
import cv2

In [None]:
scvd_train_dir = '/kaggle/input/smartcity-cctv-violence-detection-dataset-scvd/SCVD/SCVD_converted_sec_split/Train'
scvd_test_dir = '/kaggle/input/smartcity-cctv-violence-detection-dataset-scvd/SCVD/SCVD_converted_sec_split/Test'
scvd_classes = ['Normal', 'Violence','Weaponized']

In [None]:
# Define the base directory and categories
base_dir = '/kaggle/input/smartcity-cctv-violence-detection-dataset-scvd/SCVD/SCVD_converted_sec_split/Train'
categories = ['Normal', 'Violence', 'Weaponized']

In [None]:
import os

# Function to count videos in each category
def count_videos(base_dir, categories):
    video_count = {}

    for category in categories:
        category_path = os.path.join(base_dir, category)

        if not os.path.exists(category_path):
            print(f"Directory for category '{category}' not found.")
            video_count[category] = 0
            continue

        # List all video files in the category directory
        video_files = [f for f in os.listdir(category_path) if f.endswith(('.mp4', '.avi', '.mkv'))]
        video_count[category] = len(video_files)

    return video_count

# Call the function and print the counts
video_counts = count_videos(base_dir, categories)

print("Video counts per category:")
for category, count in video_counts.items():
    print(f"{category}: {count} videos")


Video counts per category:
Normal: 872 videos
Violence: 970 videos
Weaponized: 832 videos


In [None]:
IMG_SIZE = 224
BATCH_SIZE = 64
EPOCHS = 50

MAX_SEQ_LENGTH = 20
NUM_FEATURES = 2048

In [None]:
# Define the label mapping
label_mapping = {"Normal": 0, "Violence": 1, "Weaponized": 2}


In [None]:
def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]


def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames)

In [None]:
def build_feature_extractor():
    feature_extractor = keras.applications.InceptionV3(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
    )
    preprocess_input = keras.applications.inception_v3.preprocess_input

    inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed)
    return keras.Model(inputs, outputs, name="feature_extractor")


feature_extractor = build_feature_extractor()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [None]:
def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = df["original_path"].values  # Use the 'original_path' column
    labels = df["label"].map(label_mapping).values  # Assuming label_mapping is defined

    # Initialize placeholders for masks and features
    frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype="bool")
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )

    # For each video
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension
        frames = load_video(path)  # Use the full path directly
        frames = frames[None, ...]

        # Initialize placeholders for the current video's masks and features
        temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                temp_frame_features[i, j, :] = feature_extractor.predict(
                    batch[None, j, :]
                )
            temp_frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

        # Store features and masks for the current video
        frame_features[idx,] = temp_frame_features.squeeze()
        frame_masks[idx,] = temp_frame_mask.squeeze()

    return (frame_features, frame_masks), labels


In [None]:
import os
import pandas as pd

# Base directory for the dataset
base_dir = '/kaggle/input/smartcity-cctv-violence-detection-dataset-scvd/SCVD/SCVD_converted_sec_split/Train'

# Define the class labels
categories = ['Normal', 'Violence', 'Weaponized']

# Initialize a list to store metadata
metadata = []

# Traverse through each category
for category in categories:
    category_path = os.path.join(base_dir, category)
    if not os.path.exists(category_path):
        print(f"Category directory '{category}' not found!")
        continue

    # List all video files in the category directory
    video_files = [f for f in os.listdir(category_path) if f.endswith(('.mp4', '.avi', '.mkv'))]
    for video_file in video_files:
        # Create full path to the video
        video_path = os.path.join(category_path, video_file)
        # Append metadata
        metadata.append({'original_path': video_path, 'label': category})

# Create a DataFrame from the metadata
train_sample_metadata = pd.DataFrame(metadata)

# Display the first few rows
print("Sample of train_sample_metadata:")
print(train_sample_metadata.head())

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split

Train_set, Test_set = train_test_split(
    train_sample_metadata,
    test_size=0.1,
    random_state=42,
    stratify=train_sample_metadata['label']
)

print("Train and Test set shapes:")
print(Train_set.shape, Test_set.shape)


Sample of train_sample_metadata:
                                       original_path   label
0  /kaggle/input/smartcity-cctv-violence-detectio...  Normal
1  /kaggle/input/smartcity-cctv-violence-detectio...  Normal
2  /kaggle/input/smartcity-cctv-violence-detectio...  Normal
3  /kaggle/input/smartcity-cctv-violence-detectio...  Normal
4  /kaggle/input/smartcity-cctv-violence-detectio...  Normal
Train and Test set shapes:
(2406, 2) (268, 2)


In [None]:
train_data, train_labels = prepare_all_videos(Train_set, "train")
test_data, test_labels = prepare_all_videos(Test_set, "test")

print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")

In [None]:
print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")

Frame features in train set: (2406, 20, 2048)
Frame masks in train set: (2406, 20)


In [None]:
frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

# Refer to the following tutorial to understand the significance of using `mask`:
# https://keras.io/api/layers/recurrent_layers/gru/
x = keras.layers.GRU(16, return_sequences=True)(
    frame_features_input, mask=mask_input
)
x = keras.layers.GRU(8)(x)
x = keras.layers.Dropout(0.4)(x)
x = keras.layers.Dense(8, activation="relu")(x)
output = keras.layers.Dense(3, activation="softmax")(x)

model = keras.Model([frame_features_input, mask_input], output)

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

In [None]:
from tensorflow.keras.utils import to_categorical

# Convert labels to one-hot encoding
train_labels_one_hot = to_categorical(train_labels, num_classes=3)
test_labels_one_hot = to_categorical(test_labels, num_classes=3)

# Ensure model is compiled only once, before calling fit
model = keras.Model([frame_features_input, mask_input], output)
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Fit the model
checkpoint = keras.callbacks.ModelCheckpoint('./best_weights.weights.h5',
                                              save_weights_only=True,
                                              save_best_only=True)

history = model.fit(
    [train_data[0], train_data[1]],  # Frame features and masks
    train_labels_one_hot,  # One-hot encoded labels
    validation_data=([test_data[0], test_data[1]], test_labels_one_hot),  # One-hot encoded validation labels
    callbacks=[checkpoint],
    epochs=200,
    batch_size=8
)


Epoch 1/200
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.5957 - loss: 0.8041 - val_accuracy: 0.5261 - val_loss: 0.9334
Epoch 2/200
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5907 - loss: 0.7907 - val_accuracy: 0.5187 - val_loss: 0.8993
Epoch 3/200
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5883 - loss: 0.7859 - val_accuracy: 0.5299 - val_loss: 0.8955
Epoch 4/200
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5568 - loss: 0.8040 - val_accuracy: 0.5336 - val_loss: 0.8694
Epoch 5/200
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5892 - loss: 0.7793 - val_accuracy: 0.5224 - val_loss: 0.8753
Epoch 6/200
[1m301/301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5968 - loss: 0.7613 - val_accuracy: 0.5261 - val_loss: 0.8852
Epoch 7/200
[1m301/30