In [1]:
# === Imports ===
import os
from pathlib import Path
import numpy as np
import cv2
import csv
from datetime import datetime

# === Robust repo_root detection ===
# If you run the notebook from /notebooks, repo_root becomes parent; otherwise it's the current working directory.
cwd = Path.cwd()
if cwd.name.lower() == "notebooks":
    repo_root = cwd.parent
else:
    # also handle being inside ephemeral notebook dirs (like .ipynb_checkpoints)
    if (cwd / "notebooks").exists() or (cwd / "src").exists() or (cwd / "data").exists():
        repo_root = cwd
    else:
        # fallback: assume parent is repo root
        repo_root = cwd.parent

print("Current working directory:", cwd)
print("Detected repository root:", repo_root)

# Paths
data_raw = repo_root / "data" / "raw"
data_processed = repo_root / "data" / "processed"
notebooks_dir = repo_root / "notebooks"
for p in [data_raw, data_processed, notebooks_dir]:
    p.mkdir(parents=True, exist_ok=True)

print("data/raw exists:", data_raw.exists())
print("data/processed exists:", data_processed.exists())


Current working directory: D:\VidSense\notebooks
Detected repository root: D:\VidSense
data/raw exists: True
data/processed exists: True


In [2]:
# === Synthetic video generator config ===
labels = ["walking", "running", "sitting", "falling"]
per_label_videos = 3   # 3 videos per label -> total 12 videos
width, height = 320, 240
frames_per_video = 60  # ~3 seconds @ 20 fps
fps = 20

# Create label subfoldersunder data/raw
for lbl in labels:
    (data_raw / lbl).mkdir(parents=True, exist_ok=True)

def create_synthetic_video(path: Path, label: str, frames=frames_per_video, w=width, h=height, fps=fps):
    """
    Create a small synthetic .mp4 video that simulates motion for the given label.
    path: pathlib.Path for output file
    label: one of labels
    returns: path (string)
    """
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    writer = cv2.VideoWriter(str(path), fourcc, fps, (w, h))
    for i in range(frames):
        # background (slightly noisy)
        frame = np.full((h, w, 3), 30, dtype=np.uint8)
        cv2.randn(frame, 30, 5)  # add slight texture, looks more realistic

        if label == "walking":
            cx = int((i * 3) % (w + 100)) - 50
            cy = int(h*0.55 + np.sin(i/6.0) * 8)
            cv2.circle(frame, (cx % w, cy), 18, (0,200,0), -1)

        elif label == "running":
            cx = int((i * 6) % (w + 200)) - 100
            cy = int(h*0.5 + np.sin(i/3.0) * 16)
            cv2.circle(frame, (cx % w, cy), 22, (0,0,200), -1)

        elif label == "sitting":
            cx, cy = w//2, int(h*0.6)
            size = 22 + int(np.sin(i/8.0) * 3)
            cv2.rectangle(frame, (cx-size, cy-size), (cx+size, cy+size+8), (200,50,50), -1)

        elif label == "falling":
            t = i / float(frames)
            cy = int(h * (t**1.8))
            cx = w//2 + int(np.sin(i/5.0)*10)
            radius = max(6, 28 - int(i/3))
            cv2.circle(frame, (cx, cy), radius, (255,200,0), -1)

        # helper overlay for debugging (not necessary in final)
        cv2.putText(frame, f"{label} - {i}", (8, 18), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (200,200,200), 1, cv2.LINE_AA)
        writer.write(frame)

    writer.release()
    return str(path)


In [6]:
# === Generate videos and write metadata CSV ===
metadata = []
print("Generating synthetic videos...")

for lbl in labels:
    for i in range(1, per_label_videos + 1):
        filename = f"{lbl}_demo_{i}.mp4"
        out_path = data_raw / lbl / filename
        if out_path.exists():
            print("Skipping existing", out_path.name)
        else:
            create_synthetic_video(out_path, lbl)
            print("Created:", out_path.name)

        # collect metadata for this file
        file_path = out_path
        try:
            # get duration from frame count via cv2 (robust)
            cap = cv2.VideoCapture(str(file_path))
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or frames_per_video
            video_fps = cap.get(cv2.CAP_PROP_FPS) or fps
            duration_sec = round(frame_count / video_fps, 2)
            cap.release()
        except Exception:
            frame_count = frames_per_video
            duration_sec = round(frames_per_video / fps, 2)

        metadata.append({
            "filename": str(file_path.relative_to(repo_root)),
            "label": lbl,
            "frames": frame_count,
            "fps": video_fps,
            "duration_seconds": duration_sec,
            "created_at": datetime.utcnow().isoformat()
        })

# Save metadata CSV under data/raw
csv_path = data_raw / "synthetic_videos.csv"
csv_columns = ["filename", "label", "frames", "fps", "duration_seconds", "created_at"]
with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
    writer.writeheader()
    for data in metadata:
        writer.writerow(data)

print("Metadata CSV saved to:", csv_path)


Generating synthetic videos...
Skipping existing walking_demo_1.mp4
Skipping existing walking_demo_2.mp4
Skipping existing walking_demo_3.mp4
Skipping existing running_demo_1.mp4
Skipping existing running_demo_2.mp4
Skipping existing running_demo_3.mp4
Skipping existing sitting_demo_1.mp4
Skipping existing sitting_demo_2.mp4
Skipping existing sitting_demo_3.mp4
Skipping existing falling_demo_1.mp4
Skipping existing falling_demo_2.mp4
Skipping existing falling_demo_3.mp4
Metadata CSV saved to: D:\VidSense\data\raw\synthetic_videos.csv


In [7]:
# === Verify the filescreated ===
print("\nListing contents of data/raw/:")
for lbl in labels:
    folder = data_raw / lbl
    print(f"\nLabel folder: {folder} -> exists: {folder.exists()}")
    if folder.exists():
        files = list(folder.glob("*.mp4"))
        print("  mp4 count:", len(files))
        for f in files:
            print("   -", f.name)
    else:
        print("  Folder missing:", folder)

# Print CSV head
import pandas as pd
df = pd.read_csv(data_raw / "synthetic_videos.csv")
print("\nMetadata CSV preview:")
display(df.head())



Listing contents of data/raw/:

Label folder: D:\VidSense\data\raw\walking -> exists: True
  mp4 count: 3
   - walking_demo_1.mp4
   - walking_demo_2.mp4
   - walking_demo_3.mp4

Label folder: D:\VidSense\data\raw\running -> exists: True
  mp4 count: 3
   - running_demo_1.mp4
   - running_demo_2.mp4
   - running_demo_3.mp4

Label folder: D:\VidSense\data\raw\sitting -> exists: True
  mp4 count: 3
   - sitting_demo_1.mp4
   - sitting_demo_2.mp4
   - sitting_demo_3.mp4

Label folder: D:\VidSense\data\raw\falling -> exists: True
  mp4 count: 3
   - falling_demo_1.mp4
   - falling_demo_2.mp4
   - falling_demo_3.mp4

Metadata CSV preview:


Unnamed: 0,filename,label,frames,fps,duration_seconds,created_at
0,data\raw\walking\walking_demo_1.mp4,walking,60,20.0,3.0,2025-11-28T07:20:00.180071
1,data\raw\walking\walking_demo_2.mp4,walking,60,20.0,3.0,2025-11-28T07:20:00.186067
2,data\raw\walking\walking_demo_3.mp4,walking,60,20.0,3.0,2025-11-28T07:20:00.206058
3,data\raw\running\running_demo_1.mp4,running,60,20.0,3.0,2025-11-28T07:20:00.217049
4,data\raw\running\running_demo_2.mp4,running,60,20.0,3.0,2025-11-28T07:20:00.223045
