# üìò VidSense ‚Äì Frame Extraction & Preprocessing  
This notebook extractsframes from each raw video, resizes them to 224√ó224, and stores them in structured folders under `data/processed/`.  
These frames will later be passed to the CNN-LSTM model.


In [8]:
# -----------------------------------------------------
# FRAME EXTRACTION ‚Äì VidSense Project
# Senior Data Scientist Notes:
# - This notebook reads videos from data/raw/
# - Extracts frames and saves them into data/processed/
# - Frame-based datasets are required for CNN+LSTM pipeline
# -----------------------------------------------------

import cv2
import os
from pathlib import Path

# Detect repo root automatically (same trick as before)
repo_root = Path.cwd().parent if "notebooks" in str(Path.cwd()) else Path.cwd()

raw_dir = Path("D:/VidSense/data/raw")
processed_dir = Path("D:/VidSense/data/processed")

print("Raw directory:", raw_dir)
print("Processed directory:", processed_dir)

# Create processed dir if not exists
processed_dir.mkdir(parents=True, exist_ok=True)


Raw directory: D:\VidSense\data\raw
Processed directory: D:\VidSense\data\processed


In [9]:
# Setting up folders for raw and processed data
repo_root = Path.cwd().parent if (Path.cwd().name == "notebooks") else Path.cwd()

raw_dir = repo_root / "data" / "raw"
processed_dir = repo_root / "data" / "processed"

processed_dir.mkdir(parents=True, exist_ok=True)

print("Raw directory:", raw_dir)
print("Processed directory:", processed_dir)


Raw directory: D:\VidSense\data\raw
Processed directory: D:\VidSense\data\processed


In [11]:

# Function: extract frames from a given .mp4 video
# -----------------------------------------------------

def extract_frames(video_path, output_folder, frame_rate=5):
    """
    Extract frames from video at a given sampling rate.
    frame_rate=5 ‚Üí Save 1 frame every 5 frames (controls dataset size)
    """
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        print("Not‚ùå Unable to open:", video_path)
        return

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    saved = 0
    count = 0

    output_folder.mkdir(parents=True, exist_ok=True)

    while True:
        success, frame = cap.read()
        if not success:
            break

        # Save every nth frame
        if count % frame_rate == 0:
            frame_file = output_folder / f"frame_{saved:04d}.jpg"
            cv2.imwrite(str(frame_file), frame)
            saved += 1

        count += 1

    cap.release()
    print(f"‚úî Extracted {saved} frames from {video_path.name}")


In [12]:

# Loop over each folder inside data/raw (walking, running‚Ä¶)
# and process every .mp4 file inside.
# -----------------------------------------------------

labels = [d.name for d in raw_dir.iterdir() if d.is_dir()]
print("Detected labels:", labels)

if not labels:
    print("‚ùå ERROR: No folders found inside data/raw/.")
else:
    for label in labels:
        label_raw = raw_dir / label
        videos = list(label_raw.glob("*.mp4"))

        print(f"\nProcessing label: {label} | {len(videos)} videos")

        for vid in videos:
            out_folder = processed_dir / label / vid.stem
            extract_frames(vid, out_folder)


Detected labels: ['falling', 'running', 'sitting', 'walking']

Processing label: falling | 3 videos
‚úî Extracted 12 frames from falling_demo_1.mp4
‚úî Extracted 12 frames from falling_demo_2.mp4
‚úî Extracted 12 frames from falling_demo_3.mp4

Processing label: running | 3 videos
‚úî Extracted 12 frames from running_demo_1.mp4
‚úî Extracted 12 frames from running_demo_2.mp4
‚úî Extracted 12 frames from running_demo_3.mp4

Processing label: sitting | 3 videos
‚úî Extracted 12 frames from sitting_demo_1.mp4
‚úî Extracted 12 frames from sitting_demo_2.mp4
‚úî Extracted 12 frames from sitting_demo_3.mp4

Processing label: walking | 3 videos
‚úî Extracted 12 frames from walking_demo_1.mp4
‚úî Extracted 12 frames from walking_demo_2.mp4
‚úî Extracted 12 frames from walking_demo_3.mp4


In [13]:
# -----------------------------------------------------
# Show summary: frames per video per label
# -----------------------------------------------------

print("\n===== SUMMARY ==========\m")

for label in labels:
    label_dir = processed_dir / label
    if not label_dir.exists():
        continue

    print(f"\nLabel: {label}")

    for vid_folder in label_dir.iterdir():
        if vid_folder.is_dir():
            frames = list(vid_folder.glob("*.jpg"))
            print(f"  {vid_folder.name}: {len(frames)} frames ")




Label: falling
  falling_demo_1: 12 frames 
  falling_demo_2: 12 frames 
  falling_demo_3: 12 frames 

Label: running
  running_demo_1: 12 frames 
  running_demo_2: 12 frames 
  running_demo_3: 12 frames 

Label: sitting
  sitting_demo_1: 12 frames 
  sitting_demo_2: 12 frames 
  sitting_demo_3: 12 frames 

Label: walking
  walking_demo_1: 12 frames 
  walking_demo_2: 12 frames 
  walking_demo_3: 12 frames 
