In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
import tensorflow as tf

In [2]:
data_dir = '/kaggle/input/real-time-anomaly-detection-in-cctv-surveillance/'
video_dir = os.path.join(data_dir, 'data')
train_csv = os.path.join(video_dir, 'train.csv')
test_csv = os.path.join(video_dir, 'test.csv')
print("Dataset root contents:", os.listdir(data_dir))
print("Video dir contents:", os.listdir(video_dir))
print(f"Train CSV path: {train_csv}, Exists: {os.path.exists(train_csv)}")
print(f"Test CSV path: {test_csv}, Exists: {os.path.exists(test_csv)}")
threat_classes = ['abuse', 'arrest', 'arson', 'assault', 'burglary', 'explosion', 
                  'fighting', 'roadaccidents', 'robbery', 'shooting', 'shoplifting', 
                  'stealing', 'vandalism']
normal_class = ['normal']
def extract_frames(video_path, num_frames=5):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_path}")
        return []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(1, total_frames // num_frames)
    frames = []
    for i in range(0, total_frames, step):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frames.append(cv2.resize(frame, (224, 224)))
        if len(frames) >= num_frames: break
    cap.release()
    return frames

model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
def extract_cnn_features(frames):
    if not frames:
        return None
    features = []
    for frame in frames:
        frame_array = image.img_to_array(frame)
        frame_array = np.expand_dims(frame_array, axis=0)
        frame_array = tf.keras.applications.resnet50.preprocess_input(frame_array)
        feat = model.predict(frame_array, verbose=0)
        features.append(feat.flatten())
    return np.mean(features, axis=0)

# Load training data from train.csv
train_df = pd.read_csv(train_csv)
print("Train CSV columns:", train_df.columns)
print("Train CSV head:\n", train_df.head())
X_train, y_train = [], []
for index, row in train_df.iterrows():
    # Adjust column names based on actual CSV structure (e.g., 'video_path', 'label')
    relative_path = row['video_name'].replace('\n', '').replace('\\', '/')
    video_path = os.path.join(data_dir, relative_path)
    label = 1 if row['label'] in threat_classes else (0 if row['label'] in normal_class else -1)
    if label == -1: continue
    # print(f"Processing: {video_path}")
    frames = extract_frames(video_path)
    if frames:
        features = extract_cnn_features(frames)
        if features is not None:
            X_train.append(features)
            y_train.append(label)

# Convert training data to numpy arrays
X_train = np.array(X_train)
y_train = np.array(y_train)
print(f"Train data shape: {X_train.shape}, Train labels shape: {y_train.shape}")

if X_train.shape[0] == 0:
    raise ValueError("No training data loaded. Check train.csv and video files.")

Dataset root contents: ['data']
Video dir contents: ['roadaccidents', 'assault', 'vandalism', 'arrest', 'shooting', 'arson', 'explosion', 'normal', 'shoplifting', 'robbery', 'stealing', 'burglary', 'train.csv', 'test.csv', 'abuse', 'fighting']
Train CSV path: /kaggle/input/real-time-anomaly-detection-in-cctv-surveillance/data/train.csv, Exists: True
Test CSV path: /kaggle/input/real-time-anomaly-detection-in-cctv-surveillance/data/test.csv, Exists: True


I0000 00:00:1757242272.636413      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1757242272.637106      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Train CSV columns: Index(['Unnamed: 0', 'label', 'video_name'], dtype='object')
Train CSV head:
    Unnamed: 0          label                                    video_name
0        1229         normal        data\normal\Normal_Videos_196_x264.mp4
1         551         normal         data\normal\Normal_Videos179_x264.mp4
2         715         normal         data\normal\Normal_Videos361_x264.mp4
3        1366  roadaccidents  data\roadaccidents\RoadAccidents017_x264.mp4
4         501         normal         data\normal\Normal_Videos125_x264.mp4


I0000 00:00:1757242278.616307      84 service.cc:148] XLA service 0x7a4b48004100 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1757242278.616952      84 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1757242278.616993      84 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1757242279.224768      84 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1757242281.663131      84 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Train data shape: (1520, 2048), Train labels shape: (1520,)


In [8]:
model.save('resnet50_finetuned.h5')


In [3]:
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_model.fit(X_train, y_train)

# Load test data from test.csv
test_df = pd.read_csv(test_csv)
X_test, y_test = [], []
for index, row in test_df.iterrows():
    relative_path = row['video_name'].replace('\n', '').replace('\\', '/')
    video_path = os.path.join(data_dir, relative_path)
    label = 1 if row['label'] in threat_classes else (0 if row['label'] in normal_class else -1)
    if label == -1: continue
    frames = extract_frames(video_path)
    if frames:
        features = extract_cnn_features(frames)
        if features is not None:
            X_test.append(features)
            y_test.append(label)

# Convert test data to numpy arrays
X_test = np.array(X_test)
y_test = np.array(y_test)
print(f"Test data shape: {X_test.shape}, Test labels shape: {y_test.shape}")

Test data shape: (380, 2048), Test labels shape: (380,)


In [4]:
# Simulate real-time analysis with a test video
test_video = os.path.join(video_dir,'Abuse001_x264.mp4')  # Adjust based on actual file
cap = cv2.VideoCapture(test_video)
frame_buffer = []
while cap.isOpened():
    ret, frame = cap.read()
    if not ret: break
    frame_resized = cv2.resize(frame, (224, 224))
    frame_buffer.append(frame_resized)
    if len(frame_buffer) > 5:
        frame_buffer.pop(0)
    if len(frame_buffer) == 5:
        features = extract_cnn_features(frame_buffer)
        if features is not None:
            prediction = svm_model.predict([features])[0]
            label = "Threat" if prediction == 1 else "Normal"
            print(f"Prediction: {label}")
cap.release()

In [5]:
# Model Evaluation
y_pred = svm_model.predict(X_test)
print("Accuracy of the model in percentage:", accuracy_score(y_test, y_pred)*100)

Accuracy of the model in percentage: 88.68421052631578


In [6]:
test_video_1 = os.path.join(video_dir, 'abuse/Abuse001_x264.mp4')
frames = extract_frames(test_video_1) 
features = extract_cnn_features(frames)
prediction = svm_model.predict([features])[0]
label = "Threat" if prediction == 1 else "Normal"
print(f"Prediction: {label}")

Prediction: Threat


In [7]:
import joblib
joblib.dump(svm_model, '/kaggle/working/svm_model.pkl')
print("Model saved to /kaggle/working/svm_model.pkl")

Model saved to /kaggle/working/svm_model.pkl


In [5]:
import time
import cv2
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
import joblib
video_path = "/kaggle/input/real-time-anomaly-detection-in-cctv-surveillance/data/normal/Normal_Videos001_x264.mp4"
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

svm_model = joblib.load('/kaggle/input/svmbasedmodel/pytorch/default/1/svm_model.pkl')

# Function to extract frames
def extract_frames(video_path, num_frames=5):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_path}")
        return []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(1, total_frames // num_frames)
    frames = []
    for i in range(0, total_frames, step):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frames.append(cv2.resize(frame, (224, 224)))
        if len(frames) >= num_frames: break
    cap.release()
    return frames
def extract_cnn_features(frames):
    features = []
    for frame in frames:
        frame_array = image.img_to_array(frame)
        frame_array = np.expand_dims(frame_array, axis=0)
        frame_array = tf.keras.applications.resnet50.preprocess_input(frame_array)
        feat = model.predict(frame_array, verbose=0)
        features.append(feat.flatten())
    return np.mean(features, axis=0)

total_start = time.time()
t0 = time.time()
frames = extract_frames(video_path)
t1 = time.time()
print(f"Frame extraction took: {t1 - t0:.2f} seconds, {len(frames)} frames extracted")
t2 = time.time()
features = extract_cnn_features(frames)
t3 = time.time()
print(f"ResNet feature extraction took: {t3 - t2:.2f} seconds")
t4 = time.time()
features = features.reshape(1, -1)  # reshape for SVM
y_pred = svm_model.predict(features)
t5 = time.time()
print(f"SVM prediction took: {t5 - t4:.4f} seconds")
print("Predicted label:", y_pred[0])

total_end = time.time()
print(f"Total processing time: {total_end - total_start:.2f} seconds")


Frame extraction took: 0.19 seconds, 5 frames extracted
ResNet feature extraction took: 3.59 seconds
SVM prediction took: 0.0024 seconds
Predicted label: 0
Total processing time: 3.78 seconds


In [1]:
import time
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
import joblib
import tensorflow as tf

video_path = "/kaggle/input/real-time-anomaly-detection-in-cctv-surveillance/data/normal/Normal_Videos001_x264.mp4"
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
svm_model = joblib.load('/kaggle/input/svmbasedmodel/pytorch/default/1/svm_model.pkl')
def extract_frames(video_path, num_frames=100):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_path}")
        return []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(1, total_frames // num_frames)
    frames = []
    for i in range(0, total_frames, step):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frames.append(cv2.resize(frame, (224, 224)))
        if len(frames) >= num_frames: break
    cap.release()
    return frames

# Function to extract CNN features
def extract_cnn_features(frames):
    features = []
    for frame in frames:
        frame_array = image.img_to_array(frame)
        frame_array = np.expand_dims(frame_array, axis=0)
        frame_array = tf.keras.applications.resnet50.preprocess_input(frame_array)
        feat = model.predict(frame_array, verbose=0)
        features.append(feat.flatten())
    return np.mean(features, axis=0)

total_start = time.time()
cap = cv2.VideoCapture(video_path)
if cap.isOpened():
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    video_length_sec = total_frames / fps if fps > 0 else 0
    print(f"Original video length: {video_length_sec:.2f} seconds, FPS: {fps}, Total frames: {total_frames}")
cap.release()
t0 = time.time()
frames = extract_frames(video_path)
t1 = time.time()
print(f"Frame extraction took: {t1 - t0:.2f} seconds, {len(frames)} frames extracted")

t2 = time.time()
features = extract_cnn_features(frames)
t3 = time.time()
print(f"ResNet feature extraction took: {t3 - t2:.2f} seconds")
t4 = time.time()
features = features.reshape(1, -1)  # reshape for SVM
y_pred = svm_model.predict(features)
t5 = time.time()
print(f"SVM prediction took: {t5 - t4:.4f} seconds")
print("Predicted label:", y_pred[0])

total_end = time.time()
print(f"Total processing time: {total_end - total_start:.2f} seconds")


2025-09-07 13:39:58.300592: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757252398.654870      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757252398.760396      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
I0000 00:00:1757252415.391126      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1757252415.391823      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability:

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Original video length: 18.13 seconds, FPS: 30.0, Total frames: 544
Frame extraction took: 2.86 seconds, 100 frames extracted


I0000 00:00:1757252425.207871      97 service.cc:148] XLA service 0x7807f80023a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1757252425.209458      97 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1757252425.209477      97 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1757252425.880700      97 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1757252429.042686      97 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


ResNet feature extraction took: 14.71 seconds
SVM prediction took: 0.0026 seconds
Predicted label: 0
Total processing time: 17.75 seconds


In [11]:
import time
import cv2
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
import joblib
import tensorflow as tf
from tensorflow.keras import mixed_precision

# --- Enable mixed precision for faster GPU inference ---
mixed_precision.set_global_policy('mixed_float16')

# --- Paths ---
video_path = "/kaggle/input/real-time-anomaly-detection-in-cctv-surveillance/data/normal/Normal_Videos001_x264.mp4"
svm_path = "/kaggle/input/svmbasedmodel/pytorch/default/1/svm_model.pkl"

# --- Load models ---
model = ResNet50(weights='imagenet', include_top=False, pooling='avg', input_shape=(224,224,3))
svm_model = joblib.load(svm_path)

# --- Warm-up GPU ---
dummy = np.zeros((1, 224, 224, 3), dtype=np.float16)
model.predict(dummy)

# --- Functions ---
def extract_frames_fps(video_path, target_fps=5):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_path}")
        return []
    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_fps = cap.get(cv2.CAP_PROP_FPS)
    total_seconds = int(total_frames / video_fps)
    step = int(video_fps / target_fps)
    
    frames = []
    for sec in range(total_seconds):
        for i in range(0, int(video_fps), step):
            frame_idx = sec * int(video_fps) + i
            if frame_idx >= total_frames:
                break
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ret, frame = cap.read()
            if ret:
                frames.append(cv2.resize(frame, (224, 224)))
    cap.release()
    return frames

def extract_cnn_features_batch(frames):
    batch = np.array([tf.keras.applications.resnet50.preprocess_input(image.img_to_array(f)) for f in frames], dtype=np.float16)
    feats = model.predict(batch, verbose=0)
    return np.mean(feats, axis=0)

# --- Timing starts ---
total_start = time.time()

# Video info
cap = cv2.VideoCapture(video_path)
if cap.isOpened():
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    video_length_sec = total_frames / fps if fps > 0 else 0
    print(f"Original video length: {video_length_sec:.2f} seconds, FPS: {fps}, Total frames: {total_frames}")
cap.release()

t0 = time.time()
frames = extract_frames_fps(video_path, target_fps=5)
t1 = time.time()
print(f"Frame extraction took: {t1 - t0:.2f} seconds, {len(frames)} frames extracted")
t2 = time.time()
features = extract_cnn_features_batch(frames)
t3 = time.time()
print(f"ResNet50 batch feature extraction took: {t3 - t2:.2f} seconds")
t4 = time.time()
features = features.reshape(1, -1)
y_pred = svm_model.predict(features)
t5 = time.time()
print(f"SVM prediction took: {t5 - t4:.4f} seconds")
print("Predicted label:", y_pred[0])

total_end = time.time()
print(f"Total processing time: {total_end - total_start:.2f} seconds")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Original video length: 18.13 seconds, FPS: 30.0, Total frames: 544
Frame extraction took: 2.50 seconds, 90 frames extracted
ResNet50 batch feature extraction took: 6.28 seconds
SVM prediction took: 0.0025 seconds
Predicted label: 0
Total processing time: 8.79 seconds


In [16]:
import time
import cv2
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
import joblib
import tensorflow as tf
from tensorflow.keras import mixed_precision
from collections import deque

# --- Mixed precision ---
mixed_precision.set_global_policy('mixed_float16')

# --- Paths ---
video_path = "/kaggle/input/real-time-anomaly-detection-in-cctv-surveillance/data/fighting/Fighting002_x264.mp4"
svm_path = "/kaggle/input/svmbasedmodel/pytorch/default/1/svm_model.pkl"

# --- Load models ---
model = ResNet50(weights='imagenet', include_top=False, pooling='avg', input_shape=(224,224,3))
svm_model = joblib.load(svm_path)

# --- Warm-up GPU ---
dummy = np.zeros((1, 224, 224, 3), dtype=np.float16)
model.predict(dummy)

# --- Functions ---
def process_batch(frames, frame_indices):
    """Run ResNet + SVM on a batch of frames."""
    if not frames:
        return
    batch = np.array([tf.keras.applications.resnet50.preprocess_input(image.img_to_array(f)) for f in frames], dtype=np.float16)
    feats = model.predict(batch, verbose=0)
    preds = svm_model.predict(feats)
    for idx, pred in zip(frame_indices, preds):
        print(f"Frame {idx}: Pred={pred}")

# --- Pipeline ---
target_fps = 5   # only capture 5 frames per second
batch_size = 100
queue = deque()
queue_indices = []

cap = cv2.VideoCapture(video_path)
total_start = time.time()
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
video_fps = cap.get(cv2.CAP_PROP_FPS)
print(f"Original video length: {total_frames/video_fps:.2f}s, FPS: {video_fps}, Total frames: {total_frames}")

frame_idx = 0
step = int(video_fps / target_fps)  # number of frames to skip to achieve target_fps

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # only process frames according to step
    if frame_idx % step == 0:
        frame_resized = cv2.resize(frame, (224, 224))
        queue.append(frame_resized)
        queue_indices.append(frame_idx)

        # Process batch when queue is full
        if len(queue) == batch_size:
            batch_start = time.time()
            process_batch(list(queue), list(queue_indices))
            batch_end = time.time()
            print(f"Processed batch of {batch_size} frames in {batch_end - batch_start:.2f}s")
            queue.clear()
            queue_indices.clear()

    frame_idx += 1

# Process any remaining frames
if queue:
    batch_start = time.time()
    process_batch(list(queue), list(queue_indices))
    batch_end = time.time()
    print(f"Processed final batch of {len(queue)} frames in {batch_end - batch_start:.2f}s")

cap.release()
total_end = time.time()
print(f"Total processing time for video: {total_end - total_start:.2f}s")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Original video length: 89.60s, FPS: 30.0, Total frames: 2688
Frame 0: Pred=1
Frame 6: Pred=1
Frame 12: Pred=1
Frame 18: Pred=1
Frame 24: Pred=1
Frame 30: Pred=1
Frame 36: Pred=1
Frame 42: Pred=1
Frame 48: Pred=1
Frame 54: Pred=1
Frame 60: Pred=1
Frame 66: Pred=1
Frame 72: Pred=1
Frame 78: Pred=1
Frame 84: Pred=1
Frame 90: Pred=1
Frame 96: Pred=1
Frame 102: Pred=1
Frame 108: Pred=1
Frame 114: Pred=1
Frame 120: Pred=1
Frame 126: Pred=1
Frame 132: Pred=1
Frame 138: Pred=1
Frame 144: Pred=1
Frame 150: Pred=1
Frame 156: Pred=1
Frame 162: Pred=1
Frame 168: Pred=1
Frame 174: Pred=1
Frame 180: Pred=1
Frame 186: Pred=1
Frame 192: Pred=1
Frame 198: Pred=1
Frame 204: Pred=1
Frame 210: Pred=1
Frame 216: Pred=1
Frame 222: Pred=1
Frame 228: Pred=1
Frame 234: Pred=1
Frame 240: Pred=1
Frame 246: Pred=1
Frame 252: Pred=1
Frame 258: Pred=1
Frame 264: Pred=1
Frame 270: Pred=1
Frame 276: Pred=1
Frame 282: Pred=1
Frame 288: Pred=1
Frame 

In [3]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

data_dir = '/kaggle/input/real-time-anomaly-detection-in-cctv-surveillance/'
video_dir = os.path.join(data_dir, 'data')
train_csv = os.path.join(video_dir, 'train.csv')
test_csv = os.path.join(video_dir, 'test.csv')

backbone_name = "mobilenetv2" 
num_frames = 5
threat_classes = ['abuse', 'arrest', 'arson', 'assault', 'burglary', 'explosion', 
                  'fighting', 'roadaccidents', 'robbery', 'shooting', 'shoplifting', 
                  'stealing', 'vandalism']
normal_class = ['normal']

def load_backbone(name):
    if name.lower() == "resnet50":
        from tensorflow.keras.applications import ResNet50
        from tensorflow.keras.applications.resnet50 import preprocess_input
        model = ResNet50(weights="imagenet", include_top=False, pooling="avg")
    elif name.lower() == "mobilenetv2":
        from tensorflow.keras.applications import MobileNetV2
        from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
        model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")
    elif name.lower() == "efficientnetb0":
        from tensorflow.keras.applications import EfficientNetB0
        from tensorflow.keras.applications.efficientnet import preprocess_input
        model = EfficientNetB0(weights="imagenet", include_top=False, pooling="avg")
    else:
        raise ValueError("Unknown backbone")
    return model, preprocess_input

def extract_frames(video_path, num_frames=num_frames):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_path}")
        return []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(1, total_frames // num_frames)
    frames = []
    for i in range(0, total_frames, step):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frames.append(cv2.resize(frame, (224, 224)))
        if len(frames) >= num_frames:
            break
    cap.release()
    return frames

def extract_cnn_features(frames, model, preprocess_input):
    if not frames:
        return None
    features = []
    for frame in frames:
        frame_array = image.img_to_array(frame)
        frame_array = np.expand_dims(frame_array, axis=0)
        frame_array = preprocess_input(frame_array)
        feat = model.predict(frame_array, verbose=0)
        features.append(feat.flatten())
    return np.mean(features, axis=0)

def load_dataset(csv_path, model, preprocess_input):
    df = pd.read_csv(csv_path)
    X, y = [], []
    for _, row in df.iterrows():
        relative_path = row['video_name'].replace('\n', '').replace('\\', '/')
        video_path = os.path.join(data_dir, relative_path)
        label = 1 if row['label'] in threat_classes else (0 if row['label'] in normal_class else -1)
        if label == -1:
            continue
        frames = extract_frames(video_path, num_frames=num_frames)
        if frames:
            features = extract_cnn_features(frames, model, preprocess_input)
            if features is not None:
                X.append(features)
                y.append(label)
    return np.array(X), np.array(y)


print(f"Using backbone: {backbone_name}")
model, preprocess_input = load_backbone(backbone_name)

# Train set
X_train, y_train = load_dataset(train_csv, model, preprocess_input)
print(f"Train shape: {X_train.shape}, Labels: {y_train.shape}")

# Train SVM
svm_model = SVC(kernel="rbf", C=1.0, gamma="scale")
svm_model.fit(X_train, y_train)

# Test set
X_test, y_test = load_dataset(test_csv, model, preprocess_input)
print(f"Test shape: {X_test.shape}, Labels: {y_test.shape}")

# Evaluate
y_pred = svm_model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred, digits=4))
print("Accuracy:", accuracy_score(y_test, y_pred))


🔎 Using backbone: mobilenetv2


  model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")
I0000 00:00:1757264073.927747     100 service.cc:148] XLA service 0x794480002500 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1757264073.929418     100 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1757264073.929443     100 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1757264074.492899     100 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1757264078.062333     100 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Train shape: (1520, 1280), Labels: (1520,)
Test shape: (380, 1280), Labels: (380,)
Classification Report:
               precision    recall  f1-score   support

           0     0.8528    0.8842    0.8682       190
           1     0.8798    0.8474    0.8633       190

    accuracy                         0.8658       380
   macro avg     0.8663    0.8658    0.8657       380
weighted avg     0.8663    0.8658    0.8657       380

Accuracy: 0.8657894736842106


In [6]:
import joblib

joblib.dump(svm_model, "svm_efficientnet_model.pkl")


['svm_efficientnet_model.pkl']