In [5]:
import tkinter as tk
from tkinter import filedialog
import cv2
import torch
import torch.nn as nn
import torchvision.models as models
import numpy as np
import threading

# Define the ConvLSTM model
class ConvLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(ConvLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.conv_lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)

    def forward(self, x, hidden):
        h0, c0 = hidden
        out, _ = self.conv_lstm(x, (h0, c0))
        return out[:, -1]

# Load pre-trained VGG19 model
vgg19 = models.vgg19(pretrained=True)
vgg19_features = vgg19.features


# Freeze the parameters in VGG19
for param in vgg19_features.parameters():
    param.requires_grad = False
    
# Combine VGG19 and ConvLSTM
class ViolenceDetectionModel(nn.Module):
    def __init__(self, vgg_features, hidden_dim, num_layers, num_classes):
        super(ViolenceDetectionModel, self).__init__()
        self.vgg_features = vgg_features
        self.conv_lstm = ConvLSTM(512, hidden_dim, num_layers)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        features = self.vgg_features(x)
        features = torch.reshape(features, (features.size(0), -1, features.size(1)))
        h0 = torch.zeros(self.conv_lstm.num_layers, features.size(0), self.conv_lstm.hidden_dim).to(x.device)
        c0 = torch.zeros(self.conv_lstm.num_layers, features.size(0), self.conv_lstm.hidden_dim).to(x.device)
        convlstm_out = self.conv_lstm(features, (h0, c0))  # Pass the hidden tuple
        out = self.fc(convlstm_out)
        return out

# Set hyperparameters
input_dim = 512  # Dimensionality of input features from VGG16
hidden_dim = 128  # Hidden dimension of ConvLSTM
num_layers = 1  # Number of ConvLSTM layers
num_classes = 2  # Number of output classes (violence, non-violence)

# Instantiate the model
model = ViolenceDetectionModel(vgg19_features, hidden_dim, num_layers, num_classes)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Load the trained model
model.load_state_dict(torch.load('violence_detection_model_v2.pth'))
model.eval()

# Tkinter GUI Application
class Application(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Violence Detection System")
        self.geometry("800x400")

        self.header_label = tk.Label(self, text="Violence Detection System", font=("Arial", 20, "bold"))
        self.header_label.pack(pady=10)

        self.instructions = [
            "Instructions:",
            "1. Select a video file to upload",
            "2. Supported file formats: .avi, .mp4",
            "3. Click the 'Upload' button to start the detection",
            "System will analyze the video and start the process",
            "This may take some time...."
        ]

        for instruction in self.instructions:
            instruction_label = tk.Label(self, text=instruction, font=("Arial", 12))
            instruction_label.pack(pady=5)

        self.upload_button = tk.Button(self, text="Upload", command=self.upload_video)
        self.upload_button.pack(pady=10)

        self.result_label = tk.Label(self, text="", font=("Arial", 12))
        self.result_label.pack(pady=10)

    def upload_video(self):
        self.result_label.configure(text="")  # Clear previous results
        filetypes = (("Video Files", "*.avi;*.mp4"), ("All Files", "*.*"))
        filepath = filedialog.askopenfilename(filetypes=filetypes)
        if filepath:
            self.detect_violence(filepath)

    def detect_violence(self, filepath):
        threading.Thread(target=self.process_video, args=(filepath,), daemon=True).start()

    def process_video(self, filepath):
        video = cv2.VideoCapture(filepath)
        frames = []
        while True:
            ret, frame = video.read()
            if not ret:
                break
            frame = cv2.resize(frame, (224, 224))
            frame = frame.astype(np.float32) / 255.0
            frame = torch.from_numpy(frame).permute(2, 0, 1).unsqueeze(0)
            frames.append(frame)
        video.release()
        frames = torch.cat(frames, dim=0)
        with torch.no_grad():
            outputs = model(frames)
        _, predicted_labels = torch.max(outputs, 1)
        predicted_label = 1 if torch.any(predicted_labels == 1) else 0
        if predicted_label == 1:
            result = "Violence detected."
        else:
            result = "No violence detected."
        self.display_result(result)

    def display_result(self, result):
        self.result_label.configure(text=result)


# Run the application
app = Application()
app.mainloop()
