In [3]:
import os
import csv
from pathlib import Path
from collections import defaultdict

archive_path = "/Users/mukuldixit/Desktop/projects/SmartEye/data/raw"
data_root = os.path.join(archive_path, "Train")
processed_path = "/Users/mukuldixit/Desktop/projects/SmartEye/data/processed"
output_csv = os.path.join(processed_path, "train.csv")

SEQ_LEN = 16
STRIDE = 8
rows = []
sequence_id = 0

In [4]:
for anomaly_type in os.listdir(data_root):
    anomaly_path = os.path.join(data_root, anomaly_type)

    if not os.path.isdir(anomaly_path):
        continue

    label = 0 if anomaly_type.lower() == "normal" else 1

    # Collect image files
    frames = [
        f for f in os.listdir(anomaly_path)
        if f.lower().endswith((".jpg", ".png"))
    ]

    # Group frames by video_id
    videos = defaultdict(list)

    for f in frames:
        # Example: Abuse042_x264_180.png
        try:
            video_id = f.split("_x264_")[0]
            frame_number = int(f.split("_x264_")[1].split(".")[0])
            videos[video_id].append((frame_number, f))
        except:
            continue

    # Process each video separately
    for video_id, frame_list in videos.items():

        # Sort by real frame number
        frame_list.sort(key=lambda x: x[0])

        if len(frame_list) < SEQ_LEN:
            continue

        for start in range(0, len(frame_list) - SEQ_LEN + 1, STRIDE):

            actual_start_frame = frame_list[start][0]

            rows.append([
                sequence_id,
                anomaly_path,
                video_id,
                actual_start_frame,
                label
            ])

            sequence_id += 1


In [5]:
with open(output_csv, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["sequence_id", "folder_path", "video_id", "start_frame", "label"])
    writer.writerows(rows)

print(f"CSV file created: {output_csv}")
print(f"Total sequences: {len(rows)}")

CSV file created: /Users/mukuldixit/Desktop/projects/SmartEye/data/processed/train.csv
Total sequences: 155986
