In [5]:
import os
import shutil
import cv2
from sklearn.model_selection import train_test_split
import tensorflow as tf
import multiprocessing as mp
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG19
from tensorflow.keras import layers, models
import logging

In [7]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define base directory
base_dir = './Data'

# Define video and directory paths
video_dir = os.path.join(base_dir, 'Video')
temp_frames_dir = os.path.join(base_dir, 'TempFrames')
train_dir = os.path.join(base_dir, 'Frames', 'Training')
validation_dir = os.path.join(base_dir, 'Frames', 'Validation')

# Ensure directories exist
for directory in [train_dir, validation_dir, temp_frames_dir]:
    os.makedirs(os.path.join(directory, 'V'), exist_ok=True)
    os.makedirs(os.path.join(directory, 'NV'), exist_ok=True)

def extract_frames(video_path, output_dir, frame_rate=30):
    """
    Extract frames from video at a specified rate (every 'frame_rate' frames).
    """
    cap = cv2.VideoCapture(video_path)
    count = 0
    success = True
    while success:
        success, frame = cap.read()
        if count % frame_rate == 0 and success:
            frame_filename = os.path.join(output_dir, f"{os.path.basename(video_path).split('.')[0]}_frame{count}.jpg")
            cv2.imwrite(frame_filename, frame)
        count += 1
    cap.release()
    logging.info(f"Extracted frames from {video_path}")

def process_videos(video_files, output_dir, frame_rate=30):
    for video_file in video_files:
        extract_frames(video_file, output_dir, frame_rate)

# Function to split and move files
def split_and_move_files(src_dir, train_dst_dir, val_dst_dir, test_size=0.2):
    # List all files in the source directory
    files = [os.path.join(src_dir, f) for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f))]
    
    # Log the number of files found
    logging.info(f"Found {len(files)} files in {src_dir}")
    
    if len(files) == 0:
        raise ValueError(f"No files found in directory: {src_dir}")

    # Split into training and validation
    train_files, val_files = train_test_split(files, test_size=test_size, random_state=42)
    
    # Move training files
    for file in train_files:
        os.rename(file, os.path.join(train_dst_dir, os.path.basename(file)))
    
    # Move validation files
    for file in val_files:
        os.rename(file, os.path.join(val_dst_dir, os.path.basename(file)))

    logging.info(f"Moved files from {src_dir} to {train_dst_dir} and {val_dst_dir}")

# Paths for violent and non-violent videos
violent_src_dir = os.path.join(video_dir, 'Violent')
non_violent_src_dir = os.path.join(video_dir, 'NonViolent')
temp_violent_dir = os.path.join(temp_frames_dir, 'V')
temp_non_violent_dir = os.path.join(temp_frames_dir, 'NV')
train_violent_dir = os.path.join(train_dir, 'V')
train_non_violent_dir = os.path.join(train_dir, 'NV')
validation_violent_dir = os.path.join(validation_dir, 'V')
validation_non_violent_dir = os.path.join(validation_dir, 'NV')

# Get list of video files
violent_files = [os.path.join(violent_src_dir, f) for f in os.listdir(violent_src_dir) if os.path.isfile(os.path.join(violent_src_dir, f))]
non_violent_files = [os.path.join(non_violent_src_dir, f) for f in os.listdir(non_violent_src_dir) if os.path.isfile(os.path.join(non_violent_src_dir, f))]

# Use multiprocessing to extract frames from videos
def parallel_process_videos(video_files, output_dir, frame_rate=30):
    pool = mp.Pool(mp.cpu_count())
    for video_file in video_files:
        pool.apply_async(extract_frames, args=(video_file, output_dir, frame_rate))
    pool.close()
    pool.join()

# Process violent and non-violent videos in parallel
parallel_process_videos(violent_files, temp_violent_dir)
parallel_process_videos(non_violent_files, temp_non_violent_dir)

# Split and move files for violent and non-violent frames
split_and_move_files(temp_violent_dir, train_violent_dir, validation_violent_dir)
split_and_move_files(temp_non_violent_dir, train_non_violent_dir, validation_non_violent_dir)

logging.info("Frames extracted and data split into training and validation directories successfully.")

2024-06-05 12:25:10,472 - INFO - Extracted frames from ./Data/Video/Violent/14.mp4
2024-06-05 12:25:10,709 - INFO - Extracted frames from ./Data/Video/Violent/93.mp4
2024-06-05 12:25:11,180 - INFO - Extracted frames from ./Data/Video/Violent/6.mp4
2024-06-05 12:25:11,301 - INFO - Extracted frames from ./Data/Video/Violent/4.mp4
2024-06-05 12:25:11,885 - INFO - Extracted frames from ./Data/Video/Violent/56.mp4
2024-06-05 12:25:14,864 - INFO - Extracted frames from ./Data/Video/Violent/97.mp4
2024-06-05 12:25:16,509 - INFO - Extracted frames from ./Data/Video/Violent/105.mp4
2024-06-05 12:25:16,987 - INFO - Extracted frames from ./Data/Video/Violent/115.mp4
2024-06-05 12:25:20,828 - INFO - Extracted frames from ./Data/Video/Violent/91.mp4
2024-06-05 12:25:21,398 - INFO - Extracted frames from ./Data/Video/Violent/84.mp4
2024-06-05 12:25:21,697 - INFO - Extracted frames from ./Data/Video/Violent/38.mp4
2024-06-05 12:25:22,193 - INFO - Extracted frames from ./Data/Video/Violent/70.mp4
2024

In [8]:
# Image data generators
FRAME_SIZE = (150, 150)
BATCH_SIZE = 20

In [9]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [10]:
val_datagen = ImageDataGenerator(rescale=1./255)

In [11]:
# Flow training images in batches using train_datagen generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=FRAME_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=True
)

Found 19296 images belonging to 2 classes.


In [12]:
validation_generator = val_datagen.flow_from_directory(
    validation_dir,
    target_size=FRAME_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=True
)

Found 4826 images belonging to 2 classes.


In [13]:
# Load pre-trained VGG19 model
vgg19_base = VGG19(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

# Freeze the base model
vgg19_base.trainable = False

# Add custom layers on top
model = models.Sequential([
    vgg19_base,
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

In [14]:
model.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)
model.summary()

In [15]:
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    epochs=30,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE
)

Epoch 1/30


2024-06-05 12:29:33.598638: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 16777216 exceeds 10% of free system memory.
2024-06-05 12:29:33.676078: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 16777216 exceeds 10% of free system memory.
2024-06-05 12:29:33.681970: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 16777216 exceeds 10% of free system memory.
2024-06-05 12:29:33.734148: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 16777216 exceeds 10% of free system memory.
2024-06-05 12:29:33.751165: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 16777216 exceeds 10% of free system memory.
  self._warn_if_super_not_called()


[1m 18/964[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:43:42[0m 7s/step - accuracy: 0.5079 - loss: 2.1658

KeyboardInterrupt: 

In [None]:
import time
t = time.time()
export_path_keras = "./vgg19_{}.h5".format(int(t))
model.save(export_path_keras)
print(f"Model saved to {export_path_keras}")

In [None]:
import matplotlib.pyplot as plt

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.show()