In [1]:
!pip install tensorflow opencv-python



In [2]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
import os
import glob


2025-05-09 15:39:21.230308: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746805161.446801      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746805161.506972      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
import os
import cv2
from tqdm import tqdm

# Paths to violence and non-violence video folders
violence_dir = '/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/Violence'
nonviolence_dir = '/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/NonViolence'

# Output directories for frames
output_base = '/kaggle/working/frames'
violence_output = os.path.join(output_base, 'Violence')
nonviolence_output = os.path.join(output_base, 'NonViolence')

# Create output directories
os.makedirs(violence_output, exist_ok=True)
os.makedirs(nonviolence_output, exist_ok=True)

# Function to extract N evenly spaced frames from a video
def extract_frames(video_path, output_folder, video_label, num_frames=5, resize_shape=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Get frame indices to extract
    if total_frames < num_frames or total_frames == 0:
        return  # Skip videos that are too short or corrupt
    frame_indices = [int(i * total_frames / num_frames) for i in range(num_frames)]

    frame_count = 0
    saved = 0
    success = True

    while success and saved < num_frames:
        success, frame = cap.read()
        if frame_count in frame_indices:
            frame = cv2.resize(frame, resize_shape)
            video_name = os.path.basename(video_path).split('.')[0]
            frame_filename = f"{video_name}_frame_{saved}.jpg"
            frame_path = os.path.join(output_folder, frame_filename)
            cv2.imwrite(frame_path, frame)
            saved += 1
        frame_count += 1

    cap.release()

# Process all videos
print("Extracting frames from Violence videos...")
for video_file in tqdm(os.listdir(violence_dir)):
    video_path = os.path.join(violence_dir, video_file)
    extract_frames(video_path, violence_output, 'Violence')

print("Extracting frames from NonViolence videos...")
for video_file in tqdm(os.listdir(nonviolence_dir)):
    video_path = os.path.join(nonviolence_dir, video_file)
    extract_frames(video_path, nonviolence_output, 'NonViolence')


Extracting frames from Violence videos...


100%|██████████| 1000/1000 [02:14<00:00,  7.44it/s]


Extracting frames from NonViolence videos...


 50%|████▉     | 497/1000 [00:28<00:41, 12.10it/s][h264 @ 0x1d8992c0] mb_type 104 in P slice too large at 98 31
[h264 @ 0x1d8992c0] error while decoding MB 98 31
100%|██████████| 1000/1000 [01:06<00:00, 15.14it/s]


In [6]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Paths
source_dir = '/kaggle/working/frames'
train_dir = '/kaggle/working/data/train'
val_dir = '/kaggle/working/data/val'

# Create the train and validation directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Subdirectories for each class (Violence/NonViolence)
for subdir in ['Violence', 'NonViolence']:
    os.makedirs(os.path.join(train_dir, subdir), exist_ok=True)
    os.makedirs(os.path.join(val_dir, subdir), exist_ok=True)

# Helper function to split files
def split_data(source_class_dir, train_class_dir, val_class_dir, test_size=0.2):
    # Get all the files in the source directory
    all_files = [f for f in os.listdir(source_class_dir) if os.path.isfile(os.path.join(source_class_dir, f))]
    
    # Split the files into train and validation sets
    train_files, val_files = train_test_split(all_files, test_size=test_size, random_state=42)

    # Move files to respective directories
    for file in train_files:
        shutil.copy(os.path.join(source_class_dir, file), os.path.join(train_class_dir, file))

    for file in val_files:
        shutil.copy(os.path.join(source_class_dir, file), os.path.join(val_class_dir, file))

# Split data for each class
split_data(os.path.join(source_dir, 'Violence'), os.path.join(train_dir, 'Violence'), os.path.join(val_dir, 'Violence'))
split_data(os.path.join(source_dir, 'NonViolence'), os.path.join(train_dir, 'NonViolence'), os.path.join(val_dir, 'NonViolence'))

print("Data split into train and validation sets!")


Data split into train and validation sets!


In [9]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Image dimensions expected by InceptionV3
img_height, img_width = 299, 299
batch_size = 32

# Create the training and validation data generators
train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, rotation_range=20)
val_datagen = ImageDataGenerator(rescale=1./255)

# Load images from directory
train_generator = train_datagen.flow_from_directory(
    '/kaggle/working/data/train',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    '/kaggle/working/data/val',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)


Found 8000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [8]:
# Load the InceptionV3 model without the top layer
base_model = InceptionV3(weights='imagenet', include_top=False)

# Add custom classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)  # Binary classification (violent/non-violent)

# Create the final model
model = Model(inputs=base_model.input, outputs=x)

# Freeze the layers of the base model to prevent retraining them
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [10]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    validation_data=val_generator,
    validation_steps=len(val_generator),
    epochs=5  # You can increase this to 10–15 if time allows
)


Epoch 1/5


  self._warn_if_super_not_called()
I0000 00:00:1746807345.066186    8082 service.cc:148] XLA service 0x7b8968025d40 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746807345.068781    8082 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1746807345.068805    8082 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1746807347.066956    8082 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  1/250[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:01:03[0m 29s/step - accuracy: 0.4688 - loss: 0.7750

I0000 00:00:1746807358.185892    8082 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 651ms/step - accuracy: 0.7923 - loss: 0.4223 - val_accuracy: 0.8609 - val_loss: 0.3130
Epoch 2/5


  self.gen.throw(typ, value, traceback)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 30ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.9375 - val_loss: 0.2028
Epoch 3/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m159s[0m 620ms/step - accuracy: 0.9010 - loss: 0.2380 - val_accuracy: 0.8876 - val_loss: 0.2524
Epoch 4/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 312us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.9375 - val_loss: 0.0954
Epoch 5/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m158s[0m 614ms/step - accuracy: 0.9152 - loss: 0.2052 - val_accuracy: 0.8972 - val_loss: 0.2608


In [11]:
model.save('violence_detection_model.h5')

In [18]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.preprocessing import image

def extract_frames_from_video(video_path, num_frames=16, target_size=(224, 224)):
    frames = []
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if total_frames < num_frames:
        frame_indices = np.linspace(0, total_frames - 1, total_frames).astype(int)
    else:
        frame_indices = np.linspace(0, total_frames - 1, num_frames).astype(int)

    count = 0
    grabbed = 0
    while True:
        success, frame = cap.read()
        if not success:
            break
        if count in frame_indices:
            frame = cv2.resize(frame, target_size)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)
            grabbed += 1
            if grabbed == len(frame_indices):
                break
        count += 1
    cap.release()
    
    return np.array(frames)

video_path = "/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/NonViolence/NV_126.mp4"
frames = extract_frames_from_video(video_path, num_frames=16)
frames = frames / 255.0

# Predict on each frame, then average
preds = model.predict(frames)
avg_pred = np.mean(preds)

label = "Violent" if avg_pred > 0.5 else "Non-Violent"
print(f"Prediction: {label} (confidence: {avg_pred:.2f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Prediction: Non-Violent (confidence: 0.16)


In [20]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix

# Folder paths
non_violent_folder = "/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/NonViolence"
violent_folder = "/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/Violence"

# Limit to 100 total videos (50 from each category)
non_violent_videos = sorted([os.path.join(non_violent_folder, f) for f in os.listdir(non_violent_folder) if f.endswith(".mp4")])[:50]
violent_videos = sorted([os.path.join(violent_folder, f) for f in os.listdir(violent_folder) if f.endswith(".mp4")])[:50]

video_paths = [(path, 0) for path in non_violent_videos] + [(path, 1) for path in violent_videos]

def extract_frames(video_path, num_frames=5):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(1, total_frames // num_frames)
    
    frames = []
    for i in range(0, total_frames, step):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, (224, 224))
            frames.append(frame)
        if len(frames) >= num_frames:
            break
    cap.release()
    return np.array(frames)

def predict_video(video_path, model):
    frames = extract_frames(video_path)
    frames = frames.astype('float32') / 255.0
    preds = model.predict(frames, verbose=0)
    avg_pred = np.mean(preds)
    return 1 if avg_pred > 0.5 else 0

# Evaluate
y_true = []
y_pred = []

print("Evaluating on 100 videos...")
for path, label in tqdm(video_paths):
    pred = predict_video(path, model)
    y_true.append(label)
    y_pred.append(pred)

# Metrics
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=["Non-Violent", "Violent"]))

print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))


Evaluating on 100 videos...


100%|██████████| 100/100 [00:26<00:00,  3.82it/s]


Classification Report:
              precision    recall  f1-score   support

 Non-Violent       1.00      0.70      0.82        50
     Violent       0.77      1.00      0.87        50

    accuracy                           0.85       100
   macro avg       0.88      0.85      0.85       100
weighted avg       0.88      0.85      0.85       100

Confusion Matrix:
[[35 15]
 [ 0 50]]



