## Extract Frames From 100 Videos max for each category and Save them each in their corresponding video folder

In [1]:
import os
import cv2
import json
import re
import pandas as pd
from collections import defaultdict

# Paths for annotations, videos, output frames, and CSV file
ANNOTATIONS_FILE = '/kaggle/input/ucaucf-crime-annotation-dataset/UCFCrime_Test.json'
VIDEOS_DIR = '/kaggle/input/ucaucf-crime-annotation-dataset/UCF_Crimes/UCF_Crimes/Videos'    # e.g., "./UCF_Crimes/Videos/"
OUTPUT_DIR = 'output_frames'           # Folder to save the extracted frames
CSV_FILE = 'image_captions.csv'        # CSV file to save image path and caption pairs

# List of possible file extensions in your dataset
VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv']

# Maximum number of videos to process per category
MAX_PER_CATEGORY = 100

def find_video_file(video_name, base_dir, exts):
    """
    Search recursively under `base_dir` for a file that has the same
    (base) name as `video_name` and one of the allowed `exts`.
    Returns the full path if found, else None.
    """
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            name, ext = os.path.splitext(file)
            if name == video_name and ext.lower() in exts:
                return os.path.join(root, file)
    return None

def extract_category_from_key(video_key):
    """
    Extract the category name from the start of the video key.
    For example, 'Abuse001_x264' -> 'Abuse'
    Adjust this logic if your naming convention is different.
    """
    match = re.match(r'([A-Za-z]+)', video_key)
    if match:
        return match.group(1)
    return "Unknown"

# Create the output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load the JSON annotations
with open(ANNOTATIONS_FILE, 'r') as f:
    annotations = json.load(f)

# Keep track of how many videos processed per category
category_count = defaultdict(int)

# List to collect dictionaries with image paths and captions
data_entries = []

# Iterate over each video entry in the JSON
for video_name, data in annotations.items():
    # Identify the category from the video key
    category = extract_category_from_key(video_name)

    # If we already have 100 videos for this category, skip
    if category_count[category] >= MAX_PER_CATEGORY:
        continue

    # Attempt to find the actual video file in subfolders
    video_file = find_video_file(video_name, VIDEOS_DIR, VIDEO_EXTENSIONS)
    if not video_file:
        print(f"[WARNING] Could not find file for video key '{video_name}'")
        continue

    # Try to open the video
    cap = cv2.VideoCapture(video_file)
    if not cap.isOpened():
        print(f"[ERROR] Unable to open video file: {video_file}")
        continue

    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"Processing '{video_name}' in category '{category}' at {fps} fps.")

    # Create a subdirectory for frames from this video
    video_out_dir = os.path.join(OUTPUT_DIR, video_name)
    os.makedirs(video_out_dir, exist_ok=True)

    # Get timestamps and sentences lists
    timestamps = data.get("timestamps", [])
    sentences = data.get("sentences", [])

    frame_index = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break  # End of video

        # Sample every 20th frame (modify this value as needed)
        if frame_index % 20 == 0:
            current_time = frame_index / fps
            caption = None

            # Determine which timestamp interval covers the current frame time
            for idx, interval in enumerate(timestamps):
                start, end = interval
                if start <= current_time <= end:
                    caption = sentences[idx]
                    break

            # If a caption is found, save the frame and record the data
            if caption is not None:
                image_filename = f"{video_name}_frame{frame_index}.jpg"
                image_path = os.path.join(video_out_dir, image_filename)
                cv2.imwrite(image_path, frame)

                data_entries.append({
                    'image_path': image_path,
                    'caption': caption,
                    'video_key': video_name,
                    'category': category,
                    'frame_index': frame_index
                })
                #print(f"Saved: {image_path}, Caption: {caption}")

        frame_index += 1

    cap.release()

    # Increase the count for this category
    category_count[category] += 1

    # If we've now reached the max for this category, print a message
    if category_count[category] == MAX_PER_CATEGORY:
        print(f"[INFO] Reached {MAX_PER_CATEGORY} videos in category '{category}'.")

# Convert the collected data into a DataFrame and save as a CSV file
df = pd.DataFrame(data_entries)
df.to_csv(CSV_FILE, index=False)
print(f"Data saved to {CSV_FILE}")


Processing 'Abuse037_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse038_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse039_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse040_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse041_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse042_x264' in category 'Abuse' at 30.0 fps.
Processing 'Arrest043_x264' in category 'Arrest' at 30.0 fps.
Processing 'Arrest044_x264' in category 'Arrest' at 30.0 fps.
Processing 'Arrest046_x264' in category 'Arrest' at 30.0 fps.
Processing 'Arrest048_x264' in category 'Arrest' at 30.0 fps.
Processing 'Arrest049_x264' in category 'Arrest' at 30.0 fps.
Processing 'Arrest050_x264' in category 'Arrest' at 29.970029 fps.
Processing 'Arrest051_x264' in category 'Arrest' at 30.0 fps.
Processing 'Arson020_x264' in category 'Arson' at 30.0 fps.
Processing 'Arson021_x264' in category 'Arson' at 30.0 fps.
Processing 'Arson022_x264' in category 'Arson' at 30.0 fps.
Processing 'Arson023_

## Extract Frames From 110 Videos max for each category

In [2]:
import os
import cv2
import json
import re
import pandas as pd
from collections import defaultdict

# Paths for annotations, videos, output frames, and CSV file
ANNOTATIONS_FILE = '/kaggle/input/ucaucf-crime-annotation-dataset/UCFCrime_Val.json'
VIDEOS_DIR = '/kaggle/input/ucaucf-crime-annotation-dataset/UCF_Crimes/UCF_Crimes/Videos'    # e.g., "./UCF_Crimes/Videos/"
OUTPUT_DIR = 'val_output_frames'           # All images will now be saved in one folder
CSV_FILE = 'Val_image_captions.csv'        # CSV file to save image path and caption pairs

# List of possible file extensions in your dataset
VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv']

# Maximum number of videos to process per category
MAX_PER_CATEGORY = 110

def find_video_file(video_name, base_dir, exts):
    """
    Search recursively under `base_dir` for a file that has the same
    (base) name as `video_name` and one of the allowed `exts`.
    Returns the full path if found, else None.
    """
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            name, ext = os.path.splitext(file)
            if name == video_name and ext.lower() in exts:
                return os.path.join(root, file)
    return None

def extract_category_from_key(video_key):
    """
    Extract the category name from the start of the video key.
    For example, 'Abuse001_x264' -> 'Abuse'
    Adjust this logic if your naming convention is different.
    """
    match = re.match(r'([A-Za-z]+)', video_key)
    if match:
        return match.group(1)
    return "Unknown"

# Create the output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load the JSON annotations
with open(ANNOTATIONS_FILE, 'r') as f:
    annotations = json.load(f)

# Keep track of how many videos processed per category
category_count = defaultdict(int)

# List to collect dictionaries with image paths and captions
data_entries = []

# Iterate over each video entry in the JSON
for video_name, data in annotations.items():
    # Identify the category from the video key
    category = extract_category_from_key(video_name)

    # If we already have 100 videos for this category, skip
    if category_count[category] >= MAX_PER_CATEGORY:
        continue

    # Attempt to find the actual video file in subfolders
    video_file = find_video_file(video_name, VIDEOS_DIR, VIDEO_EXTENSIONS)
    if not video_file:
        print(f"[WARNING] Could not find file for video key '{video_name}'")
        continue

    # Try to open the video
    cap = cv2.VideoCapture(video_file)
    if not cap.isOpened():
        print(f"[ERROR] Unable to open video file: {video_file}")
        continue

    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"Processing '{video_name}' in category '{category}' at {fps} fps.")

    # Get timestamps and sentences lists
    timestamps = data.get("timestamps", [])
    sentences = data.get("sentences", [])

    frame_index = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break  # End of video

        # Sample every 20th frame (modify this value as needed)
        if frame_index % 20 == 0:
            current_time = frame_index / fps
            caption = None

            # Determine which timestamp interval covers the current frame time
            for idx, interval in enumerate(timestamps):
                start, end = interval
                if start <= current_time <= end:
                    caption = sentences[idx]
                    break

            # If a caption is found, save the frame and record the data
            if caption is not None:
                image_filename = f"{video_name}_frame{frame_index}.jpg"
                image_path = os.path.join(OUTPUT_DIR, image_filename)
                cv2.imwrite(image_path, frame)

                data_entries.append({
                    'image_path': image_path,
                    'caption': caption,
                    'video_key': video_name,
                    'category': category,
                    'frame_index': frame_index
                })
                #print(f"Saved: {image_path}, Caption: {caption}")

        frame_index += 1

    cap.release()

    # Increase the count for this category
    category_count[category] += 1

    # If we've now reached the max for this category, print a message
    if category_count[category] == MAX_PER_CATEGORY:
        print(f"[INFO] Reached {MAX_PER_CATEGORY} videos in category '{category}'.")




Processing 'Abuse043_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse044_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse045_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse046_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse047_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse048_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse049_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse050_x264' in category 'Abuse' at 30.0 fps.
Processing 'Arrest047_x264' in category 'Arrest' at 30.0 fps.
Processing 'Arson040_x264' in category 'Arson' at 30.0 fps.
Processing 'Arson041_x264' in category 'Arson' at 30.0 fps.
Processing 'Arson042_x264' in category 'Arson' at 30.0 fps.
Processing 'Arson044_x264' in category 'Arson' at 30.0 fps.
Processing 'Arson045_x264' in category 'Arson' at 30.0 fps.
Processing 'Arson046_x264' in category 'Arson' at 30.0 fps.
Processing 'Arson047_x264' in category 'Arson' at 30.0 fps.
Processing 'Arson048_x264' in category

## Process with PySceneDetect to get key frames

In [3]:
import os
import cv2
import json
import re
import pandas as pd
from collections import defaultdict

# 1) Import PySceneDetect
!pip install scenedetect --quiet
from scenedetect import VideoManager, SceneManager
from scenedetect.detectors import ContentDetector
from scenedetect.scene_manager import StatsManager

# Paths for annotations, videos, output frames, and CSV file
ANNOTATIONS_FILE = '/kaggle/input/ucaucf-crime-annotation-dataset/UCFCrime_Val.json'
VIDEOS_DIR = '/kaggle/input/ucaucf-crime-annotation-dataset/UCF_Crimes/UCF_Crimes/Videos'  # Adjust if needed
OUTPUT_DIR = 'val_output_frames'          # All images saved in one folder
CSV_FILE = 'Val_image_captions.csv'       # CSV file to save image path & caption pairs

# List of possible file extensions in your dataset
VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv']

# Max number of videos to process per category
MAX_PER_CATEGORY = 110

def find_video_file(video_name, base_dir, exts):
    """
    Search recursively under `base_dir` for a file that has the same
    (base) name as `video_name` and one of the allowed `exts`.
    Returns the full path if found, else None.
    """
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            name, ext = os.path.splitext(file)
            if name == video_name and ext.lower() in exts:
                return os.path.join(root, file)
    return None

def extract_category_from_key(video_key):
    """
    Extract the category name from the start of the video key.
    E.g., 'Abuse001_x264' -> 'Abuse'.
    Adjust logic if your naming convention differs.
    """
    match = re.match(r'([A-Za-z]+)', video_key)
    if match:
        return match.group(1)
    return "Unknown"

# Create the output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load the JSON annotations
with open(ANNOTATIONS_FILE, 'r') as f:
    annotations = json.load(f)

# Track how many videos processed per category
category_count = defaultdict(int)

# List to collect dictionaries with image paths and captions
data_entries = []

# 2) Function to detect scenes and pick key frames
def detect_scenes_and_extract_frames(video_path, video_name, category, timestamps, sentences):
    """
    Use PySceneDetect to detect scene boundaries and then extract a representative frame
    from each scene. For each key frame, find which caption it belongs to (using timestamps).
    """
    # --- Setup PySceneDetect ---
    video_manager = VideoManager([video_path])
    stats_manager = StatsManager()
    scene_manager = SceneManager(stats_manager)
    
    # ContentDetector: you can tune the threshold to get more/fewer scenes
    scene_manager.add_detector(ContentDetector(threshold=30.0))
    
    # Start the video manager & perform scene detection
    video_manager.start()
    scene_manager.detect_scenes(frame_source=video_manager)
    scene_list = scene_manager.get_scene_list()
    
    # Re-open video with OpenCV for frame extraction
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    frames_and_captions = []  # store (frame_index, caption) pairs

    # 3) For each scene, pick a representative frame
    for i, scene in enumerate(scene_list):
        start_tc, end_tc = scene
        start_frame = start_tc.get_frames()
        end_frame = end_tc.get_frames()
        
        # Pick the midpoint frame of the scene
        mid_frame = (start_frame + end_frame) // 2
        cap.set(cv2.CAP_PROP_POS_FRAMES, mid_frame)
        ret, frame = cap.read()
        if not ret:
            continue
        
        # Find the time (in seconds) for the mid frame
        mid_time_sec = mid_frame / fps
        
        # Match to the correct caption based on timestamps
        matched_caption = None
        for idx, interval in enumerate(timestamps):
            start, end = interval
            if start <= mid_time_sec <= end:
                matched_caption = sentences[idx]
                break
        
        if matched_caption:
            image_filename = f"{video_name}_frame{mid_frame}.jpg"
            image_path = os.path.join(OUTPUT_DIR, image_filename)
            cv2.imwrite(image_path, frame)
            
            frames_and_captions.append({
                'image_path': image_path,
                'caption': matched_caption,
                'video_key': video_name,
                'category': category,
                'frame_index': mid_frame
            })
    
    cap.release()
    video_manager.release()
    return frames_and_captions

# 4) Main loop: process each video using scene detection
for video_name, data in annotations.items():
    # Identify the category from the video key
    category = extract_category_from_key(video_name)

    # If we already have enough videos for this category, skip
    if category_count[category] >= MAX_PER_CATEGORY:
        continue

    # Find the actual video file
    video_file = find_video_file(video_name, VIDEOS_DIR, VIDEO_EXTENSIONS)
    if not video_file:
        print(f"[WARNING] Could not find file for video key '{video_name}'")
        continue

    # Extract timestamps and sentences
    timestamps = data.get("timestamps", [])  # list of (start, end) in seconds
    sentences = data.get("sentences", [])    # list of caption strings

    print(f"Processing '{video_name}' in category '{category}'...")

    # Use PySceneDetect to get key frames
    frames_captions = detect_scenes_and_extract_frames(
        video_path=video_file,
        video_name=video_name,
        category=category,
        timestamps=timestamps,
        sentences=sentences
    )

    # Append results
    data_entries.extend(frames_captions)

    # Increase the count for this category
    category_count[category] += 1
    if category_count[category] == MAX_PER_CATEGORY:
        print(f"[INFO] Reached {MAX_PER_CATEGORY} videos in category '{category}'.")

# 5) Convert to DataFrame and save to CSV
df = pd.DataFrame(data_entries)
df.to_csv(CSV_FILE, index=False)
print(f"\n[INFO] Saved {len(df)} extracted frames to '{CSV_FILE}'.")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.6/131.6 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hProcessing 'Abuse043_x264' in category 'Abuse'...
Processing 'Abuse044_x264' in category 'Abuse'...
Processing 'Abuse045_x264' in category 'Abuse'...
Processing 'Abuse046_x264' in category 'Abuse'...
Processing 'Abuse047_x264' in category 'Abuse'...
Processing 'Abuse048_x264' in category 'Abuse'...
Processing 'Abuse049_x264' in category 'Abuse'...
Processing 'Abuse050_x264' in category 'Abuse'...
Processing 'Arrest047_x264' in category 'Arrest'...
Processing 'Arson040_x264' in category 'Arson'...
Processing 'Arson041_x264' in category 'Arson'...
Processing 'Arson042_x264' in category 'Arson'...
Processing 'Arson044_x264' in category 'Arson'...
Processing 'Arson045_x264' in category 'Arson'...
Processing 'Arson046_x264' in category 'Arson'...
Processing 'Arson047_x264' in category 'Arson'...
Processing 'Arson048_x264' in category 'Arson'...
Processing 'Arson049_

In [4]:
import os
import cv2
import json
import re
import pandas as pd
from collections import defaultdict

# 1) Install & Import PySceneDetect
!pip install scenedetect --quiet
from scenedetect import VideoManager, SceneManager
from scenedetect.detectors import ContentDetector
from scenedetect.scene_manager import StatsManager

# Paths for annotations, videos, output frames, and CSV file
ANNOTATIONS_FILE = '/kaggle/input/ucaucf-crime-annotation-dataset/UCFCrime_Test.json'
VIDEOS_DIR = '/kaggle/input/ucaucf-crime-annotation-dataset/UCF_Crimes/UCF_Crimes/Videos'
OUTPUT_DIR = 'test_output_frames'
CSV_FILE = 'test_image_captions.csv'

# Allowed video file extensions
VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv']

# Max videos per category
MAX_PER_CATEGORY = 150

def find_video_file(video_name, base_dir, exts):
    """
    Search recursively under `base_dir` for a file that has the same
    (base) name as `video_name` and one of the allowed `exts`.
    Returns the full path if found, else None.
    """
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            name, ext = os.path.splitext(file)
            if name == video_name and ext.lower() in exts:
                return os.path.join(root, file)
    return None

def extract_category_from_key(video_key):
    """
    Extract the category name from the start of the video key.
    e.g., 'Abuse001_x264' -> 'Abuse'.
    """
    match = re.match(r'([A-Za-z]+)', video_key)
    if match:
        return match.group(1)
    return "Unknown"

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load JSON annotations
with open(ANNOTATIONS_FILE, 'r') as f:
    annotations = json.load(f)

category_count = defaultdict(int)
data_entries = []

def detect_scenes_and_extract_frames(
    video_path, video_name, category,
    timestamps, sentences,
    frames_per_scene=3  # <--- We want multiple frames per scene
):
    """
    Use PySceneDetect to detect scene boundaries and extract multiple frames 
    from each scene. For each frame, we find the matching caption based on timestamps.
    """
    # --- Setup PySceneDetect ---
    video_manager = VideoManager([video_path])
    stats_manager = StatsManager()
    scene_manager = SceneManager(stats_manager)
    
    # Adjust threshold to tune sensitivity
    scene_manager.add_detector(ContentDetector(threshold=30.0))
    
    # Start the video manager & perform scene detection
    video_manager.start()
    scene_manager.detect_scenes(frame_source=video_manager)
    scene_list = scene_manager.get_scene_list()
    
    # Re-open video with OpenCV for frame extraction
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    frames_and_captions = []
    
    # For each scene, extract multiple frames
    for scene_idx, scene in enumerate(scene_list):
        start_tc, end_tc = scene
        start_frame = start_tc.get_frames()
        end_frame = end_tc.get_frames()
        scene_length = end_frame - start_frame
        
        # If scene is too short, just skip or pick 1 frame
        if scene_length <= 0:
            continue
        
        # Example: evenly space frames_per_scene frames across the scene
        for i in range(frames_per_scene):
            fraction = i / (frames_per_scene - 1) if frames_per_scene > 1 else 0
            frame_idx = start_frame + int(scene_length * fraction)
            
            # Set video position & read the frame
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ret, frame = cap.read()
            if not ret:
                continue
            
            # Current time in seconds
            current_time_sec = frame_idx / fps
            
            # Find matching caption
            matched_caption = None
            for idx, interval in enumerate(timestamps):
                start_sec, end_sec = interval
                if start_sec <= current_time_sec <= end_sec:
                    matched_caption = sentences[idx]
                    break
            
            if matched_caption:
                image_filename = f"{video_name}_scene{scene_idx}_frame{frame_idx}.jpg"
                image_path = os.path.join(OUTPUT_DIR, image_filename)
                cv2.imwrite(image_path, frame)
                
                frames_and_captions.append({
                    'image_path': image_path,
                    'caption': matched_caption,
                    'video_key': video_name,
                    'category': category,
                    'frame_index': frame_idx
                })
    
    cap.release()
    video_manager.release()
    return frames_and_captions

# Main loop
for video_name, data in annotations.items():
    category = extract_category_from_key(video_name)

    if category_count[category] >= MAX_PER_CATEGORY:
        continue

    video_file = find_video_file(video_name, VIDEOS_DIR, VIDEO_EXTENSIONS)
    if not video_file:
        print(f"[WARNING] Could not find file for video key '{video_name}'")
        continue

    timestamps = data.get("timestamps", [])
    sentences = data.get("sentences", [])

    print(f"Processing '{video_name}' in category '{category}'...")

    frames_captions = detect_scenes_and_extract_frames(
        video_path=video_file,
        video_name=video_name,
        category=category,
        timestamps=timestamps,
        sentences=sentences,
        frames_per_scene=3  # <--- Change this to however many frames you want per scene
    )

    data_entries.extend(frames_captions)
    category_count[category] += 1
    
    if category_count[category] == MAX_PER_CATEGORY:
        print(f"[INFO] Reached {MAX_PER_CATEGORY} videos in category '{category}'.")

# Convert to DataFrame and save
df = pd.DataFrame(data_entries)
df.to_csv(CSV_FILE, index=False)
print(f"[INFO] Saved {len(df)} extracted frames to '{CSV_FILE}'.")


Processing 'Abuse037_x264' in category 'Abuse'...
Processing 'Abuse038_x264' in category 'Abuse'...
Processing 'Abuse039_x264' in category 'Abuse'...
Processing 'Abuse040_x264' in category 'Abuse'...
Processing 'Abuse041_x264' in category 'Abuse'...
Processing 'Abuse042_x264' in category 'Abuse'...
Processing 'Arrest043_x264' in category 'Arrest'...
Processing 'Arrest044_x264' in category 'Arrest'...
Processing 'Arrest046_x264' in category 'Arrest'...
Processing 'Arrest048_x264' in category 'Arrest'...
Processing 'Arrest049_x264' in category 'Arrest'...
Processing 'Arrest050_x264' in category 'Arrest'...
Processing 'Arrest051_x264' in category 'Arrest'...
Processing 'Arson020_x264' in category 'Arson'...
Processing 'Arson021_x264' in category 'Arson'...
Processing 'Arson022_x264' in category 'Arson'...
Processing 'Arson023_x264' in category 'Arson'...
Processing 'Arson024_x264' in category 'Arson'...
Processing 'Arson025_x264' in category 'Arson'...
Processing 'Arson026_x264' in catego

## Extract Frames From all Videos and save them in 1 Folder

In [5]:
import os
import cv2
import json
import re
import pandas as pd
from collections import defaultdict

# Paths for annotations, videos, output frames, and CSV file
ANNOTATIONS_FILE = '/kaggle/input/ucaucf-crime-annotation-dataset/UCFCrime_Train.json'
VIDEOS_DIR = '/kaggle/input/ucaucf-crime-annotation-dataset/UCF_Crimes/UCF_Crimes/Videos'    # e.g., "./UCF_Crimes/Videos/"
OUTPUT_DIR = 'output-frames'           # All images will be saved in one folder
CSV_FILE = 'train_image_captions.csv'    # CSV file to save image path and caption pairs

# List of possible file extensions in your dataset
VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv']

def find_video_file(video_name, base_dir, exts):
    """
    Search recursively under `base_dir` for a file that has the same
    (base) name as `video_name` and one of the allowed `exts`.
    Returns the full path if found, else None.
    """
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            name, ext = os.path.splitext(file)
            if name == video_name and ext.lower() in exts:
                return os.path.join(root, file)
    return None

def extract_category_from_key(video_key):
    """
    Extract the category name from the start of the video key.
    For example, 'Abuse001_x264' -> 'Abuse'
    Adjust this logic if your naming convention is different.
    """
    match = re.match(r'([A-Za-z]+)', video_key)
    if match:
        return match.group(1)
    return "Unknown"

# Create the output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load the JSON annotations
with open(ANNOTATIONS_FILE, 'r') as f:
    annotations = json.load(f)

# List to collect dictionaries with image paths and captions
data_entries = []

# Iterate over each video entry in the JSON
for video_name, data in annotations.items():
    # Identify the category from the video key
    category = extract_category_from_key(video_name)

    # Attempt to find the actual video file in subfolders
    video_file = find_video_file(video_name, VIDEOS_DIR, VIDEO_EXTENSIONS)
    if not video_file:
        print(f"[WARNING] Could not find file for video key '{video_name}'")
        continue

    # Try to open the video
    cap = cv2.VideoCapture(video_file)
    if not cap.isOpened():
        print(f"[ERROR] Unable to open video file: {video_file}")
        continue

    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"Processing '{video_name}' in category '{category}' at {fps} fps.")

    # Get timestamps and sentences lists
    timestamps = data.get("timestamps", [])
    sentences = data.get("sentences", [])

    frame_index = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break  # End of video

        # Sample every 20th frame (modify this value as needed)
        if frame_index % 20 == 0:
            current_time = frame_index / fps
            caption = None

            # Determine which timestamp interval covers the current frame time
            for idx, interval in enumerate(timestamps):
                start, end = interval
                if start <= current_time <= end:
                    caption = sentences[idx]
                    break

            # If a caption is found, save the frame and record the data
            if caption is not None:
                image_filename = f"{video_name}_frame{frame_index}.jpg"
                image_path = os.path.join(OUTPUT_DIR, image_filename)
                cv2.imwrite(image_path, frame)

                data_entries.append({
                    'image_path': image_path,
                    'caption': caption,
                    'video_key': video_name,
                    'category': category,
                    'frame_index': frame_index
                })
                #print(f"Saved: {image_path}, Caption: {caption}")

        frame_index += 1

    cap.release()

# Convert the collected data into a DataFrame and save as a CSV file
df = pd.DataFrame(data_entries)
df.to_csv(CSV_FILE, index=False)
print(f"Data saved to {CSV_FILE}")


Processing 'Abuse001_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse002_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse003_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse004_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse005_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse006_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse007_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse008_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse009_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse010_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse011_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse012_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse013_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse014_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse015_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse016_x264' in category 'Abuse' at 30.0 fps.
Processing 'Abuse017_x264' in category '

## Save output Data in a zip file

In [6]:
import shutil

# Define the output directory
output_dir = "/kaggle/working/test_output_frames"

# Define the name of the zip file
zip_filename = "test_ucf_output.zip"

# Create a zip file of the output directory
shutil.make_archive(base_name= zip_filename.replace('.zip', ''), format='zip', root_dir=output_dir)

print(f"Zipped the output directory to {zip_filename}")

Zipped the output directory to test_ucf_output.zip


## Get Download Link for the zip file to download it in Google Drive

In [7]:
from IPython.display import FileLink
FileLink(r'train_ucf_output.zip')

In [8]:
from IPython.display import FileLink
FileLink(r'test_ucf_output.zip')

In [9]:
from IPython.display import FileLink
FileLink(r'Val_ucf_output.zip')

In [10]:
import os
import json
import random
import shutil
from collections import defaultdict

# === Paths ===
VIDEO_DIR = "/kaggle/input/ucaucf-crime-annotation-dataset/UCF_Crimes/UCF_Crimes/Videos"
ANNOTATION_JSON_PATH = "/kaggle/input/ucaucf-crime-annotation-dataset/UCFCrime_Train.json"  # Adjust if different
OUTPUT_DIR = "/kaggle/working/sampled_ucf_videos"
OUTPUT_JSON_PATH = os.path.join(OUTPUT_DIR, "sampled_annotations.json")

# === Load Annotations ===
with open(ANNOTATION_JSON_PATH, "r") as f:
    annotations = json.load(f)

# === Organize Videos by Category ===
category_videos = defaultdict(list)

for video_name in annotations:
    # Video name example: Abuse001_x264
    for category in os.listdir(VIDEO_DIR):
        if video_name.startswith(category):
            category_videos[category].append(video_name)
            break

# === Sample Videos and Copy to Output ===
sampled_data = {}
os.makedirs(OUTPUT_DIR, exist_ok=True)

for category, videos in category_videos.items():
    sampled = random.sample(videos, min(3, len(videos)))
    category_output_dir = os.path.join(OUTPUT_DIR, category)
    os.makedirs(category_output_dir, exist_ok=True)

    for video_name in sampled:
        video_file = video_name + ".mp4"  # Change if videos are .avi or another extension
        src_video_path = os.path.join(VIDEO_DIR, category, video_file)
        dst_video_path = os.path.join(category_output_dir, video_file)

        if os.path.exists(src_video_path):
            shutil.copy2(src_video_path, dst_video_path)
            sampled_data[video_name] = annotations[video_name]
        else:
            print(f"Warning: Video file not found: {src_video_path}")

# === Save Sampled Annotations ===
with open(OUTPUT_JSON_PATH, "w") as out_json:
    json.dump(sampled_data, out_json, indent=4)
1
print(f"✅ Sampling complete! Videos saved in: {OUTPUT_DIR}")
print(f"✅ New JSON annotations file: {OUTPUT_JSON_PATH}") 


✅ Sampling complete! Videos saved in: /kaggle/working/sampled_ucf_videos
✅ New JSON annotations file: /kaggle/working/sampled_ucf_videos/sampled_annotations.json


In [11]:
import shutil

# Define the output directory
output_dir = "/kaggle/working/sampled_ucf_videos"

# Define the name of the zip file
zip_filename = "sampled_ucf_videos.zip"

# Create a zip file of the output directory
shutil.make_archive(base_name= zip_filename.replace('.zip', ''), format='zip', root_dir=output_dir)

print(f"Zipped the output directory to {zip_filename}")

Zipped the output directory to sampled_ucf_videos.zip


In [12]:
from IPython.display import FileLink
FileLink(r'sampled_ucf_videos.zip')