In [20]:
# END2END RESNET 
import os
import pandas as pd 
import numpy as np 
from tqdm import tqdm
import torch
from torch import nn, optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import metrics
from matplotlib import pyplot as plt
from sklearn.ensemble import GradientBoostingClassifier
import seaborn as sns
import cv2
from PIL import Image


In [21]:
FRAME_INTERVAL = 5  # Capture every 5th frame
CLIP_LENGTH = 16  # Number of frames per clip for 3D CNN
FRAME_HEIGHT, FRAME_WIDTH = 112, 112  # r3d_18 with input 112x112

In [22]:
VIDEO_DIR = r"C:\Users\Keelan.Butler\Desktop\python_projects\Final Project\OneDrive_2025-01-30\MSAD Dataset\MSAD_blur"
SAVE_DIR = r"C:\Users\Keelan.Butler\Desktop\python_projects\Final Project\Processed_Frames"
Anomaly_dir = r"C:\Users\Keelan.Butler\Desktop\python_projects\Final Project\OneDrive_2025-01-30\MSAD Dataset\anomaly_annotation.csv"

In [23]:
transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((FRAME_HEIGHT, FRAME_WIDTH)), 
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalising the features
    ])

In [24]:
Anomaly_data = pd.read_csv(Anomaly_dir)
anomalies = set([anon.split("_")[0] for anon in Anomaly_data.name.values])
print(f'Anomalies: {anomalies}')

anno_names = Anomaly_data.name.values.tolist()
anno_start = Anomaly_data['starting frame of anomaly'].values.tolist()
anno_end = Anomaly_data['ending frame of anomaly'].values.tolist()

Anomalies: {'Fire', 'Vandalism', 'Shooting', 'Water', 'Assault', 'Object', 'People', 'Fighting', 'Traffic', 'Robbery', 'Explosion'}


In [25]:
# Save directory for extracted frames

def extract_and_save_frames(video_path, save_dir, frame_interval=5):
    cap = cv2.VideoCapture(video_path)
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    save_folder = os.path.join(save_dir, video_name)
    os.makedirs(save_folder, exist_ok=True)

    frame_count = 0
    saved_count = 0
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break  # End of video

        if frame_count % frame_interval == 0:
            # Convert BGR (OpenCV) to RGB (PIL)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Apply transformations
            frame = transform(frame)  # Now it's a Tensor (C, H, W)

            # Convert back to PIL image to save
            frame = transforms.ToPILImage()(frame)

            # Save frame as JPEG
            frame_path = os.path.join(save_folder, f"frame_{saved_count:04d}.jpg")
            frame.save(frame_path, "JPEG")
            saved_count += 1 

        frame_count += 1
    
    cap.release()
    return label

In [21]:
for root, _, files in os.walk(VIDEO_DIR):
    for video_file in tqdm(files, desc=f"Extracting Frames {root}"):
        if video_file.endswith((".mp4", ".avi", ".mov")):
            video_path = os.path.join(root, video_file)
            extract_and_save_frames(video_path, SAVE_DIR)

Extracting Frames: 0it [00:00, ?it/s]
Extracting Frames: 0it [00:00, ?it/s]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 15/15 [02:16<00:00,  9.13s/it]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 15/15 [01:22<00:00,  5.47s/it]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 13/13 [02:32<00:00, 11.69s/it]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 25/25 [05:13<00:00, 12.52s/it]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 22/22 [02:31<00:00,  6.87s/it]
Extracting Frames: 100%|██████████████████████████████████████████████████████████████████████████████████████| 43/43 [03:07<00:00,  4.36s/it]
Extracting Frames: 100%|██████████████████████████████████████████

In [26]:
anomaly = []
anonamly_bool = []
frame_paths = []
frames = []
video_names = []
video_path = []
import os
for root, _, files in os.walk(SAVE_DIR):
   for name in files:
      frame_path = os.path.join(root, name)
      components = frame_path.split(os.sep) 
      video_name =  components[-2]
      frame = int(components[-1].split("_")[1].split(".")[0]) * FRAME_INTERVAL
      frames.append(frame)
      video_names.append(video_name)
      #print(video_name) 
      frame_paths.append(frame_path)
      anom = video_name.split("_")[0]
      if anom in anomalies:
          #print(frame,video_name)
          pos = anno_names.index(video_name)
          start = anno_start[pos]
          end = anno_end[pos]

          if start < frame < end: 
              anon_bool = 1 
              anomaly_label = anom
          else:
              anon_bool = 0
              anomaly_label = "Normal"
         
      else:
          anon_bool = 0
          anomaly_label = "Normal"
      anomaly.append(anomaly_label)
      anonamly_bool.append(anon_bool)


metadata = pd.DataFrame({'Video':video_names,
              'Frame':frames,
             'Frames_path':frame_paths, 
             "Anomaly Type": anomaly,
             "Anomaly": anonamly_bool})

metadata["Video"] = metadata["Video"].str.replace("MSAD_normal_", "", regex=False)

In [27]:
metadata

Unnamed: 0,Video,Frame,Frames_path,Anomaly Type,Anomaly
0,Assault_1,0,C:\Users\Keelan.Butler\Desktop\python_projects...,Normal,0
1,Assault_1,5,C:\Users\Keelan.Butler\Desktop\python_projects...,Normal,0
2,Assault_1,10,C:\Users\Keelan.Butler\Desktop\python_projects...,Normal,0
3,Assault_1,15,C:\Users\Keelan.Butler\Desktop\python_projects...,Normal,0
4,Assault_1,20,C:\Users\Keelan.Butler\Desktop\python_projects...,Normal,0
...,...,...,...,...,...
89666,Water_incident_9,405,C:\Users\Keelan.Butler\Desktop\python_projects...,Water,1
89667,Water_incident_9,410,C:\Users\Keelan.Butler\Desktop\python_projects...,Water,1
89668,Water_incident_9,415,C:\Users\Keelan.Butler\Desktop\python_projects...,Water,1
89669,Water_incident_9,420,C:\Users\Keelan.Butler\Desktop\python_projects...,Water,1


In [28]:
with open(r'C:\Users\Keelan.Butler\Desktop\python_projects\Final Project\OneDrive_2025-01-30\MSAD Dataset\MSAD_I3D_WS_Train.list') as train:
    t = train.readlines()
    train_list = [item.split("\n")[0].split("/")[-1].replace("_i3d.npy","") for item in t]
    train_label = ["Train"] * len(train_list)
tr_labels = pd.DataFrame({"Video":train_list,
                        "partition":train_label}) 
with open(r'C:\Users\Keelan.Butler\Desktop\python_projects\Final Project\OneDrive_2025-01-30\MSAD Dataset\MSAD_I3D_WS_Test.list') as test:
    t = test.readlines()
    test_list = [item.split("\n")[0].split("/")[-1].replace("_i3d.npy","") for item in t]
    test_label = ["Test"] * len(test_list)

te_labels = pd.DataFrame({"Video":test_list,
                         "partition":test_label})
label_df = pd.concat([tr_labels,te_labels])
label_df["Video"] = label_df["Video"].str.replace("MSAD_normal_", "", regex=False)
label_df

Unnamed: 0,Video,partition
0,Assault_1,Train
1,Assault_3,Train
2,Assault_5,Train
3,Assault_6,Train
4,Assault_9,Train
...,...,...
235,testing_116,Test
236,testing_117,Test
237,testing_118,Test
238,testing_119,Test


In [29]:
label_df.groupby("partition").count()

Unnamed: 0_level_0,Video
partition,Unnamed: 1_level_1
Test,240
Train,480


In [30]:
df = pd.merge(left= metadata, right = label_df , on= "Video",how= "left")
df_train =  df[df["partition"] == "Train"].drop(columns= "partition")
df_test =  df[df["partition"] == "Test"].drop(columns= "partition")

In [31]:
len(df_train)

57190

In [32]:
class FrameDataset(Dataset):
    def __init__(self, labels_df, transform=None, clip_length=16, step_size=8):
        """
        labels_df: DataFrame with columns ["Video", "Frame", "Frames_path", "Anomaly Type", "Anomaly"]
        transform: Image transformations (for resizing, normalizing, etc.)
        clip_length: Number of frames per sample (default 16 for ResNet3D)
        step_size: How far the window moves per sample (default 8 frames)
        """
        self.labels_df = labels_df.copy()  # Prevent modifying the original DataFrame
        self.transform = transform
        self.clip_length = clip_length
        self.step_size = step_size

        # Ensure data is sorted by video and frame number
        self.labels_df.sort_values(by=["Video", "Frame"], inplace=True)

        # Group frames by video
        self.video_groups = self.labels_df.groupby("Video")

        # Encode labels
        self.label_encoder = LabelEncoder()
        self.labels_df["Encoded_Label"] = self.label_encoder.fit_transform(self.labels_df["Anomaly"])

        # Store unique videos
        self.video_list = list(self.video_groups.groups.keys())

    def __len__(self):
        total_clips = 0
        for video_name in self.video_list:
            num_frames = len(self.video_groups.get_group(video_name))
            num_clips = max(0, (num_frames - self.clip_length) // self.step_size + 1)  # Fix possible overestimation
            total_clips += num_clips
        return total_clips

    def __getitem__(self, idx):
        # Determine video & clip index
        total_clips = 0
        for video_name in self.video_list:
            video_frames = self.video_groups.get_group(video_name)
            num_frames = len(video_frames)
            num_clips = max(0, (num_frames - self.clip_length) // self.step_size + 1)

            if idx < total_clips + num_clips:
                clip_idx = idx - total_clips
                break

            total_clips += num_clips
        else:
            raise IndexError(f"Index {idx} is out of range for dataset of length {self.__len__()}")

        # Get frames & labels
        frame_paths = video_frames["Frames_path"].tolist()
        frame_labels = video_frames["Encoded_Label"].tolist()

        start_idx = clip_idx * self.step_size

        selected_frames = frame_paths[start_idx:start_idx + self.clip_length]
        selected_labels = frame_labels[start_idx:start_idx + self.clip_length]

        if not selected_frames:  # Prevent infinite loop
            raise ValueError(f"No frames found for clip at index {idx}")

        # Pad if needed
        while len(selected_frames) < self.clip_length:
            selected_frames.append(selected_frames[-1])

        while len(selected_labels) < self.clip_length:
            selected_labels.append(selected_labels[-1])

        # Load frames
        frames = []
        for frame_path in selected_frames:
            frame = cv2.imread(frame_path)
            if frame is None:
                raise ValueError(f"Error reading image: {frame_path}")

            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            if self.transform:
                frame = self.transform(frame)
            else:
                frame = torch.from_numpy(frame).float()

            frames.append(frame)

        frames = torch.stack(frames)  # Shape: (16, 3, H, W)

        # Assign majority label
        clip_label = torch.tensor(selected_labels).mode()[0]  # Get most frequent label

        return frames.permute(3, 0, 1, 2), torch.tensor(clip_label, dtype=torch.long)


In [42]:
training_transform = transforms.Compose([
    transforms.ToTensor(),  # Converts NumPy array to Tensor
    transforms.Resize((128, 128)),  
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomRotation(degrees=10),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [47]:
training_dataset = FrameDataset(labels_df = df_train)
testing_dataset = FrameDataset(labels_df = df_test)
print("Training dataset size: {}\ntesting dataset size: {}".format(len(training_dataset),len(testing_dataset)))
num_workers = min(6, os.cpu_count() - 1)
training_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=256, shuffle=False, num_workers=0)##, pin_memory=True)
testing_dataloader = torch.utils.data.DataLoader(testing_dataset, batch_size=256, shuffle=False, num_workers=0)#, pin_memory=True)

Training dataset size: 6505
testing dataset size: 3290


In [None]:
total_anomalies = 0
total_samples = 0

for _, labels in training_dataloader:
    total_anomalies += torch.sum(labels).item()
    total_samples += len(labels)

print(f"Total anomalies in dataloader: {total_anomalies}/{total_samples} ({(total_anomalies/total_samples)*100:.2f}%)")


In [None]:
total_anomalies = 0
total_samples = 0

for _, labels in testing_dataloader:
    total_anomalies += torch.sum(labels).item()
    total_samples += len(labels)

print(f"Total anomalies in dataloader: {total_anomalies}/{total_samples} ({(total_anomalies/total_samples)*100:.2f}%)")


In [1]:
model_binary = models.video.r3d_18(pretrained=True)
model_binary = nn.Sequential(*list(model_binary.children())[:-1], 
             nn.Flatten(),
             nn.Linear(512,256),
             nn.ReLU(),
             nn.Dropout(0.5),
             nn.Linear(256,1),)

NameError: name 'models' is not defined

In [49]:
class SoftF1Loss(torch.nn.Module):
    def __init__(self, epsilon=1e-7):
        super(SoftF1Loss, self).__init__()
        self.epsilon = epsilon

    def forward(self, y_pred, y_true):
        """
        y_pred: Model outputs (logits or probabilities after sigmoid)
        y_true: Ground truth labels (binary: 0 or 1)
        """
        y_pred = torch.sigmoid(y_pred)  # Ensure predictions are between 0 and 1
        
        tp = torch.sum(y_true * y_pred)  # True Positives
        fp = torch.sum((1 - y_true) * y_pred)  # False Positives
        fn = torch.sum(y_true * (1 - y_pred))  # False Negatives

        f1 = (2 * tp + self.epsilon) / (2 * tp + fp + fn + self.epsilon)
        return 1 - f1  # Minimize (1 - F1)

In [None]:
# Training Hyperparameters

epochs = 25
#pos_weight = torch.tensor([5.0]).to(device)
losses = np.zeros((2, epochs))


optimiser = torch.optim.Adam(model_binary.parameters(), lr=1e-4, weight_decay=1e-4) # I've changed this back from AdamW as the data appears to be less imbalanced

loss_function = nn.BCEWithLogitsLoss()# BCEWithLogitsLoss

#loss_function = SoftF1Loss()

# Use GPU if available

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model_binary.to(device)
best_loss = np.inf


threshhold = 0.5 

# Training Loop
for epoch in range(epochs):
    epoch_loss = 0.0
    model_binary.train()

    for frames, labels in tqdm(training_dataloader,desc = "Training pass"):
        frames, labels = frames.to(device), labels.to(device)  # Move data to GPU if available
        pred = model_binary(frames)  # Forward pass
        loss = loss_function(pred.squeeze(), labels.float())

        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

        epoch_loss += loss.item()

    # Store training loss
    losses[0, epoch] = epoch_loss / len(training_dataloader)

    # Validation Loop
    model_binary.eval()
    test_loss = 0.0

    with torch.no_grad():
        for test_frames, test_labels in tqdm(testing_dataloader,desc = "Cycling Testing Dataloader"):
            test_frames, test_labels = test_frames.to(device), test_labels.to(device)  # Move to GPU

            test_preds = model_binary(test_frames)
            t_loss = loss_function(test_preds.squeeze(), test_labels.float())

            test_loss += t_loss.item()
            
    losses[1, epoch] = test_loss / len(testing_dataloader)
    if best_loss > losses[1, epoch]:
        best_loss = losses[1, epoch] 
        print("Saving Optimal model: {} epoch".format(epoch + 1))
        torch.save(model_binary.state_dict(),os.path.join("Best_Models","E2E_3DCNN.pt"))
    print(f"Epoch [{epoch+1}/{epochs}] - Training Loss: {losses[0,epoch]:.4f}, Test Loss: {losses[1,epoch]:.4f}")

Using device: cpu


  return frames.permute(3, 0, 1, 2), torch.tensor(clip_label, dtype=torch.long)
Training pass:  65%|████████████████████████████████████████████████████████▏                             | 17/26 [2:56:36<1:51:31, 743.54s/it]

In [None]:
plt.plot(losses[0], label = 'Training')
plt.plot(losses[1], label = 'Testing')
plt.grid()
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("R3D CNN Training Binary")

In [16]:
model_binary.load_state_dict(torch.load(os.path.join("Best_Models", "E2E_3DCNN.pt")))
print("Best model loaded!")

Best model loaded!


In [17]:
def get_predictions(model, dataloader, device):
    model.eval()  # Set model to evaluation mode
    all_preds = []
    all_labels = []

    with torch.no_grad():  # Disable gradient computation for efficiency
        for frames, labels in dataloader:
            frames = frames.to(device)
            labels = labels.to(device)

            outputs = model(frames)  # Forward pass
           # _, preds = torch.max(outputs, 1)  # Get predicted class MULTICLASS APPROACH
            #preds = (outputs >= 0.5).float() # Binalry class 
            preds = (torch.sigmoid(outputs) >= 0.5).float()
            all_preds.extend(preds.cpu().numpy())  # Store predictions
            all_labels.extend(labels.cpu().numpy())  # Store true labels

    return np.array(all_labels), np.array(all_preds)

# Example usage:
device = torch.device(device)  # Change to "cuda" if using GPU
true_labels, pred_labels = get_predictions(model_binary, testing_dataloader, device)
print(f"Total test samples: {len(true_labels)} (Expected: 29270)")
print(f"Total predictions: {len(pred_labels)} (Expected: 29270)")


NameError: name 'device' is not defined

In [18]:
print(f"Batch size: {testing_dataloader.batch_size}")
print(f"Total batches: {len(testing_dataloader)}")
print(f"Total samples processed: {len(testing_dataloader) * testing_dataloader.batch_size}")


NameError: name 'testing_dataloader' is not defined

In [None]:
cm = metrics.confusion_matrix(true_labels, pred_labels)
sns.heatmap(cm , annot = True)
plt.title("Confusion Matrix of 3D ResNet-18 Classifier")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()