In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("mohamedmustafa/real-life-violence-situations-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/mohamedmustafa/real-life-violence-situations-dataset?dataset_version_number=1...


100%|██████████| 3.58G/3.58G [00:33<00:00, 116MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/mohamedmustafa/real-life-violence-situations-dataset/versions/1


In [None]:
import os

dataset_path = "/root/.cache/kagglehub/datasets/mohamedmustafa/real-life-violence-situations-dataset/versions/1"

print("Subfolders:", os.listdir(dataset_path))

Subfolders: ['real life violence situations', 'Real Life Violence Dataset']


In [None]:
for folder in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder)
    if os.path.isdir(folder_path):
        print(f"Contents of '{folder}':", os.listdir(folder_path))

Contents of 'real life violence situations': ['Real Life Violence Dataset']
Contents of 'Real Life Violence Dataset': ['Violence', 'NonViolence']


In [None]:
import os

for category in os.listdir(dataset_path):
    category_path = os.path.join(dataset_path, category)
    if os.path.isdir(category_path):
        print(f"{category} contains {len(os.listdir(category_path))} files.")

real life violence situations contains 1 files.
Real Life Violence Dataset contains 2 files.


In [None]:
import os

dataset_path = "/root/.cache/kagglehub/datasets/mohamedmustafa/real-life-violence-situations-dataset/versions/1/Real Life Violence Dataset"

print("Subfolders:", os.listdir(dataset_path))  # Should show ['Violence', 'NonViolence']

Subfolders: ['Violence', 'NonViolence']


In [None]:
import os

for category in ["Violence", "NonViolence"]:
    category_path = os.path.join(dataset_path, category)
    files = os.listdir(category_path)[:10]  # List first 10 files
    print(f"{category}: {files}")

Violence: ['V_614.mp4', 'V_415.mp4', 'V_87.mp4', 'V_659.mp4', 'V_504.mp4', 'V_740.mp4', 'V_3.mp4', 'V_351.mp4', 'V_730.mp4', 'V_600.mp4']
NonViolence: ['NV_877.avi', 'NV_638.mp4', 'NV_430.mp4', 'NV_926.mp4', 'NV_685.mp4', 'NV_543.mp4', 'NV_719.mp4', 'NV_301.mp4', 'NV_275.mp4', 'NV_908.avi']


In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, BatchNormalization, Dense, Flatten, TimeDistributed, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import Adam

tf.keras.mixed_precision.set_global_policy('mixed_float16')

# Define dataset path
dataset_path = "/root/.cache/kagglehub/datasets/mohamedmustafa/real-life-violence-situations-dataset/versions/1/Real Life Violence Dataset"
classes = ["Violence", "NonViolence"]

SEQUENCE_LENGTH = 5
FRAME_SIZE = (224, 224)
BATCH_SIZE = 4
EPOCHS = 12

# Video Data Generator
class VideoDataGenerator(Sequence):
    def __init__(self, dataset_path, classes, batch_size=BATCH_SIZE, sequence_length=SEQUENCE_LENGTH):
        self.dataset_path = dataset_path
        self.classes = classes
        self.batch_size = batch_size
        self.sequence_length = sequence_length
        self.video_paths = []
        self.labels = []
        self.prepare_data()

    def prepare_data(self):
        for label, category in enumerate(self.classes):
            category_path = os.path.join(self.dataset_path, category)
            for video_file in os.listdir(category_path):
                if video_file.endswith(".mp4"):
                    self.video_paths.append(os.path.join(category_path, video_file))
                    self.labels.append(label)

    def __len__(self):
        return len(self.video_paths) // self.batch_size

    def __getitem__(self, idx):
        batch_videos = self.video_paths[idx * self.batch_size : (idx + 1) * self.batch_size]
        batch_labels = self.labels[idx * self.batch_size : (idx + 1) * self.batch_size]

        X, y = [], []
        for video_path, label in zip(batch_videos, batch_labels):
            cap = cv2.VideoCapture(video_path)
            frames = []
            success, frame = cap.read()
            frame_count = 0

            while success and len(frames) < self.sequence_length:
                if frame_count % 3 == 0:
                    frame = cv2.resize(frame, FRAME_SIZE)
                    frames.append(frame)
                frame_count += 1
                success, frame = cap.read()

            cap.release()

            while len(frames) < self.sequence_length:
                frames.append(frames[-1])

            X.append(np.array(frames))
            y.append(label)

        return np.array(X) / 255.0, tf.keras.utils.to_categorical(y, num_classes=2)

# Create generators
train_generator = VideoDataGenerator(dataset_path, classes, batch_size=BATCH_SIZE, sequence_length=SEQUENCE_LENGTH)

# Optimized ConvLSTM Model
model = Sequential([
    ConvLSTM2D(64, (3, 3), activation="relu", return_sequences=True, input_shape=(SEQUENCE_LENGTH, 224, 224, 3)),
    BatchNormalization(),
    Dropout(0.3),

    ConvLSTM2D(32, (3, 3), activation="relu", return_sequences=False),
    BatchNormalization(),

    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(2, activation="softmax")  # Binary classification
])

# Compile Model
model.compile(optimizer=Adam(learning_rate=0.0001), loss="categorical_crossentropy", metrics=["accuracy"])

# Train Model
model.fit(train_generator, epochs=EPOCHS)

Epoch 1/12
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 233ms/step - accuracy: 0.5140 - loss: 7.3828
Epoch 2/12
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 224ms/step - accuracy: 0.5330 - loss: 7.4789
Epoch 3/12
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 225ms/step - accuracy: 0.5267 - loss: 7.5553
Epoch 4/12
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 228ms/step - accuracy: 0.5268 - loss: 7.5965
Epoch 5/12
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 223ms/step - accuracy: 0.5441 - loss: 7.2953
Epoch 6/12
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 225ms/step - accuracy: 0.5373 - loss: 7.4447
Epoch 7/12
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 223ms/step - accuracy: 0.5899 - loss: 6.6010
Epoch 8/12
[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 224ms/step - accuracy: 0.5901 - loss: 6.5930
Epoch 9/

<keras.src.callbacks.history.History at 0x79b9fefecc50>

In [None]:
# Evaluate model on the training set
loss, accuracy = model.evaluate(train_generator)
print(f"Training Accuracy: {accuracy * 100:.2f}%")

[1m487/487[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 134ms/step - accuracy: 0.5580 - loss: 7.1032
Training Accuracy: 60.52%


In [None]:
model_path = "/content/violence_detection_model.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved at {model_path}")

Model saved at /content/violence_detection_model.pth


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("jangedoo/utkface-new")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/jangedoo/utkface-new?dataset_version_number=1...


100%|██████████| 331M/331M [00:06<00:00, 53.2MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/jangedoo/utkface-new/versions/1


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import glob

In [None]:
class UTKFaceDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = glob.glob(os.path.join(root_dir, "*.jpg"))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")

        # Extract gender (filename format: age_gender_race.jpg)
        filename = os.path.basename(img_path)
        gender = int(filename.split("_")[1])  # 0: Male, 1: Female

        if self.transform:
            image = self.transform(image)

        return image, gender

In [None]:
# Define data transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Set dataset path from your download
# Ensure this path is correct and contains the image files
dataset_path = "/root/.cache/kagglehub/datasets/jangedoo/utkface-new/versions/1/utkface_aligned_cropped/UTKFace"

# Load dataset
dataset = UTKFaceDataset(dataset_path, transform=transform)

# Split into train and test sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
class GenderClassificationModel(nn.Module):
    def __init__(self):
        super(GenderClassificationModel, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(128 * 16 * 16, 128),
            nn.ReLU(),
            nn.Linear(128, 2)  # Output: 2 classes (Male/Female)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc_layers(x)
        return x

In [None]:
# Initialize model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GenderClassificationModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")

print("Training complete!")

Epoch [1/10], Loss: 0.3753
Epoch [2/10], Loss: 0.2671
Epoch [3/10], Loss: 0.2242
Epoch [4/10], Loss: 0.1861
Epoch [5/10], Loss: 0.1541
Epoch [6/10], Loss: 0.1219
Epoch [7/10], Loss: 0.0946
Epoch [8/10], Loss: 0.0658
Epoch [9/10], Loss: 0.0570
Epoch [10/10], Loss: 0.0421
Training complete!


In [None]:
# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

Test Accuracy: 89.12%


In [None]:
model_path = "/content/gender_classification.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved at {model_path}")


Model saved at /content/gender_classification.pth
