In [3]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from sklearn.model_selection import train_test_split

# Dataset class for preprocessing
class ViolenceDataset(Dataset):
    def __init__(self, video_paths, labels, transform=None):
        self.video_paths = video_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        label = self.labels[idx]

        # Read video and extract frames
        cap = cv2.VideoCapture(video_path)
        frames = []
        while len(frames) < 16:  # Fixed number of frames per video
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (224, 224))
            frames.append(frame)
        cap.release()

        # If not enough frames, pad with black frames
        while len(frames) < 16:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))

        # Convert to tensor
        frames = np.stack(frames, axis=0)  # Shape: (16, 224, 224, 3)
        frames = frames.transpose((0, 3, 1, 2))  # Shape: (16, 3, 224, 224)
        frames = torch.tensor(frames, dtype=torch.float32) / 255.0

        if self.transform:
            frames = self.transform(frames)

        return frames, label

# Load dataset paths and labels
def load_dataset(root_dir):
    violence_dir = os.path.join(root_dir, "Violence")
    non_violence_dir = os.path.join(root_dir, "NonViolence")

    video_paths = []
    labels = []

    for file in os.listdir(violence_dir):
        video_paths.append(os.path.join(violence_dir, file))
        labels.append(1)  # Violence = 1

    for file in os.listdir(non_violence_dir):
        video_paths.append(os.path.join(non_violence_dir, file))
        labels.append(0)  # Non-violence = 0

    return video_paths, labels

# Define model
def build_model():
    base_model = models.resnet18(pretrained=True)
    base_model.fc = nn.Linear(base_model.fc.in_features, 2)  # Binary classification
    return base_model

# Training function
def train_model(model, dataloaders, criterion, optimizer, num_epochs=10, device="cuda"):
    model.to(device)

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print("-" * 30)

        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(inputs[:, 0, :, :, :])  # Use the first frame for simplicity
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == "train":
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

    return model

# Main script
if __name__ == "__main__":
    root_dir = "dataset/RealLifeViolenceDataset"
    video_paths, labels = load_dataset(root_dir)

    # Train-test split
    train_paths, val_paths, train_labels, val_labels = train_test_split(
        video_paths, labels, test_size=0.2, random_state=42
    )

    # Data transformations
    transform = transforms.Compose([
        transforms.Normalize([0.5], [0.5])
    ])

    # Create datasets and dataloaders
    train_dataset = ViolenceDataset(train_paths, train_labels, transform=transform)
    val_dataset = ViolenceDataset(val_paths, val_labels, transform=transform)

    dataloaders = {
        "train": DataLoader(train_dataset, batch_size=8, shuffle=True),
        "val": DataLoader(val_dataset, batch_size=8, shuffle=False),
    }

    # Initialize model, loss, and optimizer
    model = build_model()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train model
    trained_model = train_model(model, dataloaders, criterion, optimizer, num_epochs=10, device="cuda" if torch.cuda.is_available() else "cpu")

    # Save model
    torch.save(trained_model.state_dict(), "violence_model.pth")
    print("Model training complete and saved.")

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\garvi/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:03<00:00, 14.9MB/s]


Epoch 1/10
------------------------------
train Loss: 0.7212 Acc: 0.6375
val Loss: 1.5310 Acc: 0.6375
Epoch 2/10
------------------------------
train Loss: 0.5480 Acc: 0.7406
val Loss: 2.3427 Acc: 0.6725
Epoch 3/10
------------------------------
train Loss: 0.4866 Acc: 0.7812
val Loss: 0.6100 Acc: 0.6825
Epoch 4/10
------------------------------
train Loss: 0.4341 Acc: 0.8150
val Loss: 0.4789 Acc: 0.8275
Epoch 5/10
------------------------------
train Loss: 0.4138 Acc: 0.8300
val Loss: 0.4775 Acc: 0.7625
Epoch 6/10
------------------------------
train Loss: 0.3803 Acc: 0.8400
val Loss: 0.3790 Acc: 0.8550
Epoch 7/10
------------------------------
train Loss: 0.3389 Acc: 0.8725
val Loss: 0.2958 Acc: 0.8575
Epoch 8/10
------------------------------
train Loss: 0.3072 Acc: 0.8762
val Loss: 0.2650 Acc: 0.8850
Epoch 9/10
------------------------------
train Loss: 0.2999 Acc: 0.8862
val Loss: 0.2674 Acc: 0.8900
Epoch 10/10
------------------------------
train Loss: 0.2680 Acc: 0.8975
val Loss

In [4]:
import torch
import torch.nn as nn
from torchvision import models

# Define the model structure (same as the one used for training)
def build_model():
    base_model = models.resnet18(pretrained=False)  # `pretrained=False` to avoid downloading weights again
    base_model.fc = nn.Linear(base_model.fc.in_features, 2)  # Binary classification
    return base_model

# Load the model
model_path = "violence_model.pth"
model = build_model()
model.load_state_dict(torch.load(model_path))
model.eval()  # Set to evaluation mode

# Print model architecture
print(model)

  model.load_state_dict(torch.load(model_path))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  