In [96]:
# Importing Libraries
import pandas as pd
import numpy as np
import json
import re
import os
from tqdm import tqdm
import yaml
import cv2
import shutil

### Data Prep Helper Functions

In [16]:
# Function to summarize dataset structure
def summarize_dataset_structure(base_dir="."):
    """
    Summarize how many videos exist in each class folder across train/val/test splits.
    Handles nested folder structures (e.g., class -> video -> clips).

    Example layout:
        base_dir/
            train/
                Abuse/
                    Abuse001_x264/
                        Abuse001_x264_0.mp4
                        Abuse001_x264_1.mp4
                Shoplifting/
            val/
            test/
    """
    for split in ["train", "val", "test"]:
        split_path = os.path.join(base_dir, split)
        if not os.path.exists(split_path):
            print(f"‚ö†Ô∏è Split folder not found: {split_path}")
            continue

        print(f"\nüìÇ Stats for split: {split}")
        total_videos = 0

        # Iterate over each class folder (Abuse, Shoplifting, etc.)
        for cls in sorted(os.listdir(split_path)):
            cls_path = os.path.join(split_path, cls)
            if not os.path.isdir(cls_path):
                continue

            video_count = 0

            # Go into subdirectories (e.g., Abuse001_x264)
            for root, _, files in os.walk(cls_path):
                video_files = [
                    f for f in files
                    if f.lower().endswith(('.mp4', '.avi', '.mov', '.mkv', '.npy'))
                ]
                video_count += len(video_files)

            print(f"  üóÇÔ∏è {cls}: {video_count} video clips")
            total_videos += video_count

        print(f"  ‚û§ Total video clips in '{split}': {total_videos}")

In [17]:
# Function to load UCF Crime-style JSON annotations
def load_ucf_json(json_path):
    """
    Loads a UCF Crime-style JSON annotation file and converts it
    into a flattened pandas DataFrame.

    Args:
        json_path (str): Path to the JSON annotation file.
    
    Returns:
        pd.DataFrame: Flattened DataFrame with columns:
            ['video', 'duration', 'start', 'end', 'description']
    """
    with open(json_path, "r") as f:
        data = json.load(f)

    rows = []
    for video, info in data.items():
        for (ts, sentence) in zip(info.get("timestamps", []), info.get("sentences", [])):
            rows.append({
                "video": video,
                "duration": info.get("duration", None),
                "start": ts[0],
                "end": ts[1],
                "description": sentence
            })

    df = pd.DataFrame(rows)
    return df

In [18]:
def add_clip_paths(df):
    """
    Adds a 'clip_path' column based only on DataFrame columns:
    folder/video_basename/video_basename_i.mp4

    Args:
        df (pd.DataFrame): must contain 'folder' and 'video' columns.

    Returns:
        pd.DataFrame: with an extra column 'clip_path'
    """
    df = df.copy()

    # Remove .mp4 extension from video to get folder/video base
    df["video_base"] = df["video"].str.replace(".mp4", "", regex=False)

    # Get index per unique (folder, video) group ‚Äî i = 0, 1, 2, ...
    df["clip_idx"] = df.groupby(["folder", "video"]).cumcount()

    # Construct the relative path
    df["clip_path"] = df.apply(
        lambda row: f"{row['folder']}/{row['video_base']}/{row['video_base']}_{row['clip_idx']}.mp4",
        axis=1
    )

    df.drop(['video_base', 'clip_idx'], axis=1, inplace=True)

    return df

### SwinBERT: Loading UCA Annotations

In [None]:
from moviepy import VideoFileClip

In [None]:
train_file = "../Surveillance-Video-Understanding-main/ucf-annotation/json/UCFCrime_Train.json"
test_file = "../Surveillance-Video-Understanding-main/ucf-annotation/json/UCFCrime_Test.json"
val_file = "../Surveillance-Video-Understanding-main/ucf-annotation/json/UCFCrime_Val.json"

In [None]:
train_df = load_ucf_json(train_file)
test_df = load_ucf_json(test_file)
val_df = load_ucf_json(val_file)

In [None]:
train_df['folder'] = train_df['video'].apply(lambda x: re.match(r'([A-Za-z]+)', x).group(1) if re.match(r'([A-Za-z]+)', x) else None)
test_df['folder'] = test_df['video'].apply(lambda x: re.match(r'([A-Za-z]+)', x).group(1) if re.match(r'([A-Za-z]+)', x) else None)
val_df['folder'] = val_df['video'].apply(lambda x: re.match(r'([A-Za-z]+)', x).group(1) if re.match(r'([A-Za-z]+)', x) else None)

In [None]:
train_df['video'] = train_df['video']+".mp4"
test_df['video'] = test_df['video']+".mp4"
val_df['video'] = val_df['video']+".mp4"

In [None]:
train_df = add_clip_paths(train_df)
train_df[train_df["video"] == "Abuse001_x264.mp4"][["folder", "video", "clip_path"]]

In [None]:
test_df = add_clip_paths(test_df)
val_df = add_clip_paths(val_df)

In [None]:
train_df['split'] = 'train'
test_df['split'] = 'test'
val_df['split'] = 'val'

all_annotations = pd.concat([train_df, test_df, val_df], ignore_index=True)

# Expringting all annotations to CSV
all_annotations.to_csv("../uca-dataset/uca_annotations.csv", index=False)

### SwinBERT: Preparing UCA Dataset with Clipped Videos

In [None]:
def clip_videos_from_df(df, source_dir, output_dir):
    """
    Clips videos using MoviePy 2.x API based on 'start' and 'end' times in the DataFrame.
    Displays a progress bar and only logs failures or invalid clips.
    """
    os.makedirs(output_dir, exist_ok=True)
    total_videos = df['video'].nunique()

    print(f"üé¨ Starting video clipping for {total_videos} unique videos ({len(df)} total clips)...\n")

    # Group videos first
    grouped_videos = list(df.groupby(["folder", "video"]))

    # tqdm progress bar
    for (folder, video_name), group in tqdm(grouped_videos, desc="Processing videos", unit="video"):
        src_path = os.path.join(source_dir, folder, video_name)
        base_name, ext = os.path.splitext(video_name)
        dest_subdir = os.path.join(output_dir, folder, base_name)
        os.makedirs(dest_subdir, exist_ok=True)

        if not os.path.exists(src_path):
            print(f"‚ö†Ô∏è Missing source video: {src_path}")
            continue

        try:
            video = VideoFileClip(src_path)
            for i, row in enumerate(group.itertuples(index=False)):
                start = float(row.start)
                end = min(float(row.end), video.duration)

                # Skip invalid segments
                if end <= start:
                    print(f"‚è© Skipping invalid segment ({start:.2f}-{end:.2f}) in {video_name}")
                    continue

                clip = video.subclipped(start, end)
                dest_path = os.path.join(dest_subdir, f"{base_name}_{i}.mp4")

                # Silent writing (no MoviePy output spam)
                clip.write_videofile(dest_path, audio=False, logger=None)

            video.close()

        except Exception as e:
            print(f"‚ùå Error processing {src_path}: {e}")

    print("\n‚úÖ All videos processed successfully!")

In [None]:
source_dir = '../original-ucf/Videos'
destination_dir = '../uca-dataset'

clip_videos_from_df(
    df=train_df,
    source_dir=source_dir,         
    output_dir=destination_dir+"/train"
)

In [None]:
source_dir = '../original-ucf/Videos'
destination_dir = '../uca-dataset'

clip_videos_from_df(
    df=val_df,
    source_dir=source_dir,         
    output_dir=destination_dir+"/val"
)

In [None]:
source_dir = '../original-ucf/Videos'
destination_dir = '../uca-dataset'

clip_videos_from_df(
    df=test_df,
    source_dir=source_dir,         
    output_dir=destination_dir+"/test"
)

#### Validating Data Prep

In [None]:
def check_for_duplicates(df, subset_cols=["video", "start", "end", "description"]):
    """
    Checks for duplicate video segments based on selected columns.
    Prints how many duplicates exist and which videos have them.
    """
    duplicated_rows = df[df.duplicated(subset=subset_cols, keep=False)]
    total_dupes = len(duplicated_rows)

    if total_dupes == 0:
        print("‚úÖ No duplicates found.")
        return

    print(f"üìä Found {total_dupes} duplicate rows based on {subset_cols}")

    # Count duplicates per video
    dupe_counts = (
        duplicated_rows.groupby(["video", "start", "end"])
        .size()
        .sort_values(ascending=False)
        .reset_index(name="duplicate_count")
    )

    print("\nüéûÔ∏è Videos with duplicate timestamps:")
    display(dupe_counts.head(10))  # show top 10 by default

    return duplicated_rows

In [None]:
check_for_duplicates(test_df)

In [None]:
check_for_duplicates(val_df)

In [None]:
duplicated_rows = check_for_duplicates(train_df)
# These duplicates are present in th original transcripts as well. Leaving them for now

In [None]:
# This aligns perfectly with the UCA paper table 3
destination_dir = "../uca-dataset"
summarize_dataset_structure(base_dir=destination_dir)

### SwinBERT: YAML File Creation - Fine Tune SwinBERT on UCA

In [None]:
def create_yaml_files(df, output_dir="../UCA-Dataset/w-captions/"):
    """
    Converts UCA annotations CSV into SwinBERT-compatible YAML files for train/val/test.

    Args:
        csv_path (str): Path to CSV file with columns ['clip_path', 'description', 'split'].
        output_dir (str): Folder where YAMLs will be saved.
    """

    os.makedirs(output_dir, exist_ok=True)

    # Validate required columns
    required = {"clip_path", "description", "split"}
    if not required.issubset(df.columns):
        raise ValueError(f"CSV must contain columns: {required}")

    # Generate YAML for each split
    for split in df["split"].unique():
        split_df = df[df["split"] == split]
        split_yaml = {
            # This is based on file structure in Cloud GPU Instance
            f"{split}_videos": [os.path.join("../UCA-Dataset/w-captions", p) for p in split_df["clip_path"]],
            "captions": split_df["description"].tolist()
        }

        out_path = os.path.join(output_dir, f"{split}.yaml")
        with open(out_path, "w") as f:
            yaml.dump(split_yaml, f, default_flow_style=False, sort_keys=False, allow_unicode=True)

        print(f"‚úÖ Saved: {out_path} ({len(split_df)} entries)")

In [None]:
all_annotations = pd.read_csv("../UCA-Dataset/uca_annotations.csv")

In [None]:
all_annotations['description'] = all_annotations['description'].str.replace('\n', ' ').str.strip()

In [None]:
create_yaml_files(all_annotations)

###  TEVAD: Aligning data based on UCF to avoid data leakage

In [97]:
# Original UCF
train_ucf = pd.read_csv("../../Anomaly_Train.txt", header=None,
    names=["path"])

train_ucf['video'] = train_ucf['path'].str.split('/').str[1]

test_ucf = pd.read_csv("../../Anomaly_Test.txt", header=None,
    names=["path"])

test_ucf['video'] = test_ucf['path'].str.split('/').str[1]

In [98]:
test_ucf.head(2)

Unnamed: 0,path,video
0,Abuse/Abuse028_x264.mp4,Abuse028_x264.mp4
1,Abuse/Abuse030_x264.mp4,Abuse030_x264.mp4


In [None]:
# Step 1: moving all files to a central location
src_dir = "../TEVAD/save/Crime/UCF_ten_crop_i3d_v1"
dest_dir = "../TEVAD/save/Crime/UCF_all_i3d"

subfolders = ["UCF_Train_ten_crop_i3d", "UCF_Test_ten_crop_i3d"]

for sub in subfolders:
    sub_path = os.path.join(src_dir, sub)
    files = os.listdir(sub_path)

    print(f"\nMoving files from: {sub_path}")

    # tqdm progress bar
    for file in tqdm(files, desc=f"Processing {sub}", unit="file"):
        src_path = os.path.join(sub_path, file)
        dst_path = os.path.join(dest_dir, file)

        if os.path.isfile(src_path):
            shutil.move(src_path, dst_path)

In [116]:
src_dir = "../TEVAD/save/Crime/UCF_all_i3d"
train_dir = "../TEVAD/save/Crime/UCF_ten_crop_i3d_v1/UCF_Train_ten_crop_i3d"
test_dir = "../TEVAD/save/Crime/UCF_ten_crop_i3d_v1/UCF_Test_ten_crop_i3d"

In [117]:
train_ucf['split'] = 'train'
test_ucf['split'] = 'test'

In [118]:
df = pd.concat([train_ucf, test_ucf])
df.drop_duplicates(inplace = True)
df.reset_index(drop = True, inplace = True)

In [119]:
df['split'].value_counts()

split
train    1610
test      290
Name: count, dtype: int64

In [120]:
df['npy_name'] = df['video'].str.replace('.mp4', '_i3d.npy')

In [50]:
for _, row in tqdm(df.iterrows(), total=len(df), desc="Moving files", unit="file"):
    filename = row['npy_name']
    split = row['split']

    src_path = os.path.join(src_dir, filename)

    # Determine destination
    if split in ['train', 'val']:
        dst_path = os.path.join(train_dir, filename)
    else:  # test
        dst_path = os.path.join(test_dir, filename)

    # Move if exists
    if os.path.exists(src_path):
        shutil.move(src_path, dst_path)
    else:
        print(f"Missing file: {filename}")

Moving files: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1900/1900 [00:18<00:00, 105.26file/s]


In [60]:
embeddings_loc = "../TEVAD/save/Crime/sent_emb_n"

In [61]:
df['emb_name'] = df['video'].str.replace('.mp4', '_emb.npy')

In [77]:
# Checking if we missed any embeddings

expected = set(df['emb_name'])
actual = set(os.listdir(embeddings_loc))
missing_files = expected - actual

emb_rem = df[df['emb_name'].isin(missing_files)]
emb_rem.head()

Unnamed: 0,path,video,split,npy_name,emb_name


In [78]:
#emb_rem.to_csv("Emb.csv", index = False)

### TEVAD: Creating UCF Train/Test List and Ground Truth

In [123]:
train_base_dir = "/home/ubuntu/uca-virginia/Multimodal-Anomaly-Detection-Survelliance-Videos/TEVAD/save/Crime/UCF_ten_crop_i3d_v1/UCF_Train_ten_crop_i3d"
test_base_dir = "/home/ubuntu/uca-virginia/Multimodal-Anomaly-Detection-Survelliance-Videos/TEVAD/save/Crime/UCF_ten_crop_i3d_v1/UCF_Test_ten_crop_i3d"

In [127]:
train_ucf.head(2)

Unnamed: 0,path,video,split
0,Abuse/Abuse001_x264.mp4,Abuse001_x264.mp4,train
1,Abuse/Abuse002_x264.mp4,Abuse002_x264.mp4,train


In [148]:
train_ucf['npy_name'] = train_ucf['video'].str.replace('.mp4', '_i3d.npy')
train_ucf['is_normal'] = train_ucf['npy_name'].str.startswith("Normal_Videos")

train_ucf = train_ucf.sort_values(by='is_normal', ascending=True)
train_ucf.reset_index(drop = True, inplace = True)

In [149]:
train_ucf[train_ucf['is_normal']].head(2)

Unnamed: 0,path,video,split,npy_name,is_normal,full_path
810,Training_Normal_Videos_Anomaly/Normal_Videos09...,Normal_Videos097_x264.mp4,train,Normal_Videos097_x264_i3d.npy,True,/home/ubuntu/uca-virginia/Multimodal-Anomaly-D...
811,Training_Normal_Videos_Anomaly/Normal_Videos08...,Normal_Videos086_x264.mp4,train,Normal_Videos086_x264_i3d.npy,True,/home/ubuntu/uca-virginia/Multimodal-Anomaly-D...


In [153]:
train_ucf['full_path'] = train_base_dir + "/" + train_ucf['npy_name']

output_file = "../TEVAD/list/ucf-i3d.list"

with open(output_file, "w") as f:
    for path in train_ucf['full_path']:
        f.write(path + "\n")

In [154]:
train_ucf[train_ucf['is_normal']].head(2)

Unnamed: 0,path,video,split,npy_name,is_normal,full_path
810,Training_Normal_Videos_Anomaly/Normal_Videos09...,Normal_Videos097_x264.mp4,train,Normal_Videos097_x264_i3d.npy,True,/home/ubuntu/uca-virginia/Multimodal-Anomaly-D...
811,Training_Normal_Videos_Anomaly/Normal_Videos08...,Normal_Videos086_x264.mp4,train,Normal_Videos086_x264_i3d.npy,True,/home/ubuntu/uca-virginia/Multimodal-Anomaly-D...


In [155]:
train_ucf[train_ucf['npy_name']== 'Normal_Videos001_x264_i3d.npy']

Unnamed: 0,path,video,split,npy_name,is_normal,full_path
1310,Training_Normal_Videos_Anomaly/Normal_Videos00...,Normal_Videos001_x264.mp4,train,Normal_Videos001_x264_i3d.npy,True,/home/ubuntu/uca-virginia/Multimodal-Anomaly-D...


In [130]:
train_ucf['is_normal'].value_counts()

is_normal
False    810
True     800
Name: count, dtype: int64

In [132]:
# No Sorting for Test
test_ucf['npy_name'] = test_ucf['video'].str.replace('.mp4', '_i3d.npy')

test_ucf['full_path'] = test_base_dir + "/" + test_ucf['npy_name']

output_file = "../TEVAD/list/ucf-i3d-test.list"

with open(output_file, "w") as f:
    for path in train_df['full_path']:
        f.write(path + "\n")

In [44]:
test_df['is_normal'].value_counts()

is_normal
False    206
True     104
Name: count, dtype: int64

### TEVAD: Removing Extra Embeddings

In [45]:
uca_split['emb_name'] = uca_split['video'].str.replace('.mp4', '_emb.npy')

In [46]:
temp= uca_split[['emb_name', 'split']].copy()
temp.drop_duplicates(inplace = True)
temp.reset_index(drop = True, inplace = True)

In [47]:
emb_dir = "../TEVAD/save/Crime/sent_emb_n"

expected = set(temp['emb_name'])
actual = set(os.listdir(emb_dir))

extra_files = actual-expected

In [48]:
for f in extra_files:
    file_path = os.path.join(emb_dir, f)
    
    if os.path.isfile(file_path):
        os.remove(file_path)
        print(f"Deleted: {f}")
    else:
        print(f"Skipped (not a file): {f}")

### TEVAD: Checking how Many Files are missing from Train/Test compared to UCF

In [106]:
train_df = pd.read_csv("../../Anomaly_Train.txt", sep = '/', header = None, names = ['folder', 'file'])
test_df = pd.read_csv("../../Anomaly_Test.txt", sep = '/', header = None, names = ['folder', 'file'])

In [107]:
train_df.head(2)

Unnamed: 0,folder,file
0,Abuse,Abuse001_x264.mp4
1,Abuse,Abuse002_x264.mp4


In [108]:
train_df['npy_name'] = train_df['file'].str.replace('.mp4', '_i3d.npy')
test_df['npy_name'] = test_df['file'].str.replace('.mp4', '_i3d.npy')

In [109]:
train_img_dir = "../TEVAD/save/Crime/UCF_ten_crop_i3d_v1/UCF_Train_ten_crop_i3d"
test_img_dir = "../TEVAD/save/Crime/UCF_ten_crop_i3d_v1/UCF_Test_ten_crop_i3d"

In [110]:
# Checking how many missed files
expected = set(train_df['npy_name'])
actual = set(os.listdir(train_img_dir))
missing_files = expected - actual

train_rem = train_df[train_df['npy_name'].isin(missing_files)]
train_rem
#train_rem.to_csv("Train_Rem.csv",index= False)

Unnamed: 0,folder,file,npy_name


In [94]:
len(actual)

1610

In [95]:
train_df.shape

(1610, 3)

In [87]:
# Checking how many missed files
expected = set(test_df['npy_name'])
actual = set(os.listdir(test_img_dir))
missing_files = expected - actual

test_rem = test_df[test_df['npy_name'].isin(missing_files)]
test_rem
#test_rem.to_csv("Test_Rem.csv",index= False)

Unnamed: 0,folder,file,npy_name


In [89]:
len(actual)

290

In [90]:
test_df.shape

(290, 3)

In [114]:
train_df.head()

Unnamed: 0,folder,file,npy_name
0,Abuse,Abuse001_x264.mp4,Abuse001_x264_i3d.npy
1,Abuse,Abuse002_x264.mp4,Abuse002_x264_i3d.npy
2,Abuse,Abuse003_x264.mp4,Abuse003_x264_i3d.npy
3,Abuse,Abuse004_x264.mp4,Abuse004_x264_i3d.npy
4,Abuse,Abuse005_x264.mp4,Abuse005_x264_i3d.npy


In [115]:
train_df[train_df['npy_name'] == 'Normal_Videos_781_x264_i3d.npy']

Unnamed: 0,folder,file,npy_name


### LAVAD

In [6]:
pwd

'/lambda/nfs/uca-virginia/Multimodal-Anomaly-Detection-Survelliance-Videos/dataprep'

In [11]:
# ---- CONFIG ----
TEMPORAL_ANNOT_FILE = "../../original/Temporal_Anomaly_Annotation_for_Testing_Videos.txt"
VIDEO_ROOT = "../../original"
PIPELINE_OUT = "../../lavad/annotations/lavad_pipeline_annotations.txt"
GT_OUT_DIR = "../../lavad/annotations/gt_masks"

In [12]:
os.makedirs(GT_OUT_DIR, exist_ok=True)

In [13]:
def count_frames(video_path):
    """Count frames using OpenCV VideoCapture."""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"[ERROR] Cannot open: {video_path}")
        return None

    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return total


def find_video_file(video_name):
    """
    Recursively search VIDEO_ROOT for the file video_name + '.mp4'
    Example: Abuse041_x264 -> Abuse041_x264.mp4
    """
    target = video_name + ".mp4"
    for root, dirs, files in os.walk(VIDEO_ROOT):
        if target in files:
            return os.path.join(root, target)
    print(f"[WARN] Video file not found for: {video_name}")
    return None

In [14]:
pipeline_lines = []

with open(TEMPORAL_ANNOT_FILE, "r") as f:
    for line in f:
        if not line.strip():
            continue

        parts = line.split()
        video_mp4 = parts[0]                # e.g., Abuse041_x264.mp4
        video_name = video_mp4.replace(".mp4", "")

        s1, e1 = int(parts[2]), int(parts[3])
        s2, e2 = int(parts[4]), int(parts[5])

        # Label: 1 if abnormal, 0 if normal
        label = 0 if s1 == -1 else 1

        # Find video on disk
        video_path = find_video_file(video_name)
        if video_path is None:
            continue

        # Count frames
        num_frames = count_frames(video_path)
        if num_frames is None:
            continue

        start_frame = 0
        end_frame = num_frames - 1

        # Save pipeline annotation line
        pipeline_lines.append(f"{video_name} {start_frame} {end_frame} {label}")

        # ----------------------------
        # GENERATE FRAME-LEVEL GT MASK
        # ----------------------------
        gt = np.zeros(num_frames, dtype=np.int32)

        if label == 1:
            if s1 != -1:
                gt[s1:e1+1] = 1
            if s2 != -1:
                gt[s2:e2+1] = 1

        np.save(os.path.join(GT_OUT_DIR, f"{video_name}.npy"), gt)
        print(f"[OK] Processed {video_name} ({num_frames} frames)")

# Save pipeline file
with open(PIPELINE_OUT, "w") as f:
    f.write("\n".join(pipeline_lines))

print("\nDONE!")
print(f"Pipeline annotation saved to: {PIPELINE_OUT}")
print(f"GT masks saved in: {GT_OUT_DIR}")

[OK] Processed Abuse028_x264 (1412 frames)
[OK] Processed Abuse030_x264 (1544 frames)
[OK] Processed Arrest001_x264 (2374 frames)
[OK] Processed Arrest007_x264 (3144 frames)
[OK] Processed Arrest024_x264 (3629 frames)
[OK] Processed Arrest030_x264 (8642 frames)
[OK] Processed Arrest039_x264 (15835 frames)
[OK] Processed Arson007_x264 (6252 frames)
[OK] Processed Arson009_x264 (743 frames)
[OK] Processed Arson010_x264 (3159 frames)
[OK] Processed Arson011_x264 (1266 frames)
[OK] Processed Arson016_x264 (1795 frames)
[OK] Processed Arson018_x264 (842 frames)
[OK] Processed Arson022_x264 (8640 frames)
[OK] Processed Arson035_x264 (1437 frames)
[OK] Processed Arson041_x264 (3754 frames)
[OK] Processed Assault006_x264 (8096 frames)
[OK] Processed Assault010_x264 (16177 frames)
[OK] Processed Assault011_x264 (2288 frames)
[OK] Processed Burglary005_x264 (7729 frames)
[OK] Processed Burglary017_x264 (2113 frames)
[OK] Processed Burglary018_x264 (1125 frames)
[OK] Processed Burglary021_x264 (1

### Creating Final Test For Everything

In [156]:
test_df = pd.read_csv("../../Anomaly_Test.txt", sep = '/', header = None, names = ['folder', 'file'])

In [158]:
src_root = "/lambda/nfs/uca-virginia/original"      
dst_root = "/lambda/nfs/uca-virginia/test_videos_raw" 

os.makedirs(dst_root, exist_ok=True)

In [163]:
test_df['folder'] = test_df['folder'].str.replace("Testing_Normal_Videos_Anomaly", "Normal")

In [164]:
test_df[test_df['file'] == 'Normal_Videos_003_x264.mp4']

Unnamed: 0,folder,file
135,Normal,Normal_Videos_003_x264.mp4


In [165]:
for _, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Copying videos"):
    folder = row["folder"]
    filename = row["file"]

    src_path = os.path.join(src_root, folder, filename)
    dst_path = os.path.join(dst_root, filename)

    if os.path.exists(src_path):
        shutil.copy2(src_path, dst_path)
    else:
        print(f"Missing: {src_path}")

Copying videos: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 290/290 [01:22<00:00,  3.50it/s]
