In [16]:
# END2END RESNET 
import os
import pandas as pd 
import numpy as np 
from tqdm import tqdm
import torch
from torch import nn, optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import metrics
from matplotlib import pyplot as plt
from sklearn.ensemble import GradientBoostingClassifier
import seaborn as sns
import cv2


In [17]:
FRAME_INTERVAL = 5  # Capture every 5th frame
CLIP_LENGTH = 16  # Number of frames per clip for 3D CNN
FRAME_HEIGHT, FRAME_WIDTH = 112, 112  # r3d_18 with input 112x112

In [18]:
VIDEO_DIR = r"C:\Users\Keelan.Butler\Desktop\python_projects\Final Project\OneDrive_2025-01-30\MSAD Dataset\MSAD_blur"
SAVE_DIR = r"C:\Users\Keelan.Butler\Desktop\python_projects\Final Project\Processed_Frames"
Anomaly_dir = r"C:\Users\Keelan.Butler\Desktop\python_projects\Final Project\OneDrive_2025-01-30\MSAD Dataset\anomaly_annotation.csv"

In [19]:
transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((FRAME_HEIGHT, FRAME_WIDTH)), 
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalising the features
    ])

In [None]:
Anomaly_data = pd.read_csv(Anomaly_dir)
anomalies = set([anon.split("_")[0] for anon in Anomaly_data.name.values])
print(f'Anomalies: {anomalies})

In [20]:
# Save directory for extracted frames

def extract_and_save_frames(video_path, save_dir, frame_interval=5):
    cap = cv2.VideoCapture(video_path)
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    save_folder = os.path.join(save_dir, video_name)
    os.makedirs(save_folder, exist_ok=True)

    frame_count = 0
    saved_count = 0
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break  # End of video

        if frame_count % frame_interval == 0:
            # Convert BGR (OpenCV) to RGB (PIL)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Apply transformations
            frame = transform(frame)  # Now it's a Tensor (C, H, W)

            # Convert back to PIL image to save
            frame = transforms.ToPILImage()(frame)

            # Save frame as JPEG
            frame_path = os.path.join(save_folder, f"frame_{saved_count:04d}.jpg")
            frame.save(frame_path, "JPEG")
            saved_count += 1 

        frame_count += 1
    
    cap.release()
    return label

In [None]:
for root, _, files in os.walk(VIDEO_DIR):
    for video_file in tqdm(files, desc=f"Extracting Frames {root}"):
        if video_file.endswith((".mp4", ".avi", ".mov")):
            video_path = os.path.join(root, video_file)
            anon_label = extract_and_save_frames(video_path, SAVE_DIR)

Extracting Frames: 0it [00:00, ?it/s]
Extracting Frames: 0it [00:00, ?it/s]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 15/15 [02:16<00:00,  9.13s/it]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 15/15 [01:22<00:00,  5.47s/it]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 13/13 [02:32<00:00, 11.69s/it]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 25/25 [05:13<00:00, 12.52s/it]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 22/22 [02:31<00:00,  6.87s/it]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 43/43 [03:07<00:00,  4.36s/it]
Extracting Frames: 100%|██████████████████████████████████████████

In [None]:
class FrameDataset(Dataset):
    def __init__(self, root_dir, labels_df, transform=None, clip_length=16):
        self.root_dir = root_dir
        self.transform = transform
        self.clip_length = clip_length
        labels_df  # Columns: [name	scenario	total frames	starting frame of anomaly	ending frame of anomaly]
        all_anomalies = 
        self.label_encoder = LabelEncoder()
        self.data["label"] = self.label_encoder.fit_transform(self.data["label"])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        video_name, label = self.data.iloc[idx]
        video_folder = os.path.join(self.root_dir, video_name)
        frame_files = sorted(os.listdir(video_folder))  # Ensure chronological order
        
        # Select 16 frames (random or sequential)
        start_idx = np.random.randint(0, max(1, len(frame_files) - self.clip_length))
        selected_frames = frame_files[start_idx:start_idx + self.clip_length]
        
        frames = [self.transform(cv2.imread(os.path.join(video_folder, f))) for f in selected_frames]
        frames = torch.stack(frames)  # Shape: (16, 3, 112, 112) 

        return frames, torch.tensor(label)