In [1]:
# 🛠️ Install missing packages if any
# !pip install -q torch torchvision tqdm

# 📦 Imports
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import random
import numpy as np

# 🧹 Set random seeds for reproducibility
def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

seed_everything()

print("✅ Libraries imported and seeds set!")


✅ Libraries imported and seeds set!


In [2]:
# 🎯 Evidence classes
evidence_classes = [
    "Gun", "Knife", "Mask", "Car", "Fire", "Glass", "Crowd",
    "Blood", "Explosion", "Bag", "Money", "Weapon", "Smoke", "Person"
]

# 🔗 Crime-to-evidence mapping
crime_to_evidence = {
    "Arrest": ["Person", "Crowd"],
    "Arson": ["Fire", "Smoke"],
    "Assault": ["Blood", "Person"],
    "Burglary": ["Bag", "Glass"],
    "Explosion": ["Explosion", "Smoke"],
    "Fighting": ["Crowd", "Blood"],
    "RoadAccidents": ["Car", "Person"],
    "Robbery": ["Gun", "Bag", "Mask"],
    "Shooting": ["Gun", "Crowd"],
    "Shoplifting": ["Bag", "Money"],
    "Stealing": ["Bag", "Person"],
    "Vandalism": ["Glass", "Crowd"],
    "Abuse": ["Person"],
    "NormalVideos": []
}

# 🏷️ Encode function
def encode_evidence_labels(crime_label_idx):
    index_to_class = {
        0: "Arrest", 1: "Arson", 2: "Assault", 3: "Burglary", 4: "Explosion",
        5: "Fighting", 6: "RoadAccidents", 7: "Robbery", 8: "Shooting",
        9: "Shoplifting", 10: "Stealing", 11: "Vandalism", 12: "Abuse", 13: "NormalVideos"
    }
    crime_class = index_to_class[crime_label_idx]
    evidences = crime_to_evidence.get(crime_class, [])
    binary = [1 if ev in evidences else 0 for ev in evidence_classes]
    return torch.tensor(binary, dtype=torch.float32)

print("✅ Evidence classes and label encoder ready!")



✅ Evidence classes and label encoder ready!


In [3]:
# 🧠 Vision Transformer (ViT) for Evidence Detection
class VisionTransformer(nn.Module):
    def __init__(self, num_classes=14, dim=256, depth=6, heads=8, mlp_dim=512, patch_size=8, image_size=64):
        super().__init__()
        self.patch_size = patch_size
        self.dim = dim
        self.image_size = image_size

        self.conv = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)
        self.num_patches = (image_size // patch_size) ** 2
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.pos_embedding = nn.Parameter(torch.randn(1, self.num_patches + 1, dim))

        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dim_feedforward=mlp_dim, batch_first=True),
            num_layers=depth
        )

        self.to_evidence = nn.Linear(dim, num_classes)

    def forward(self, x):
        x = self.conv(x)                  # (B, D, 8, 8)
        x = x.flatten(2).transpose(1, 2)   # (B, 64, D)
        cls_tokens = self.cls_token.expand(x.shape[0], -1, -1)  # (B, 1, D)
        x = torch.cat([cls_tokens, x], dim=1)                   # (B, 65, D)
        x = x + self.pos_embedding
        x = self.transformer(x)
        x = x[:, 0]  # CLS token output
        return self.to_evidence(x)

print("✅ Vision Transformer model defined!")


✅ Vision Transformer model defined!


In [4]:
# 📂 Define Transformations
print("🔧 Setting up data transformations...")
train_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])
print("✅ Transformations set!")

# 🔥 Hyperparameters
BATCH_SIZE = 128
NUM_CLASSES = 14
print(f"⚙️ Batch Size: {BATCH_SIZE}, Number of Classes: {NUM_CLASSES}")

# 🚀 Set Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"🔥 Device selected: {device}")

# 🗂️ Load Dataset
print("📂 Loading UCF Crime dataset...")
train_dataset = datasets.ImageFolder(r"C:/Users/adity/Downloads/Train", transform=train_transform)

print(f"📊 Dataset contains {len(train_dataset)} samples across {len(train_dataset.classes)} classes.")
print(f"📚 Classes detected: {train_dataset.classes}")

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,   # Kaggle usually gives 2 CPUs
    pin_memory=True
)
print("✅ DataLoader is ready!")
print("🚀 Everything ready to start training!")


🔧 Setting up data transformations...
✅ Transformations set!
⚙️ Batch Size: 128, Number of Classes: 14
🔥 Device selected: cuda
📂 Loading UCF Crime dataset...
📊 Dataset contains 1266345 samples across 14 classes.
📚 Classes detected: ['Abuse', 'Arrest', 'Arson', 'Assault', 'Burglary', 'Explosion', 'Fighting', 'NormalVideos', 'RoadAccidents', 'Robbery', 'Shooting', 'Shoplifting', 'Stealing', 'Vandalism']
✅ DataLoader is ready!
🚀 Everything ready to start training!


In [5]:
# 🧠 Model
model = VisionTransformer(num_classes=NUM_CLASSES)

if torch.cuda.device_count() > 1:
    print(f"🚀 Using {torch.cuda.device_count()} GPUs with DataParallel!")
    model = nn.DataParallel(model)

model = model.to(device)

# 🎯 Loss and Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scaler = torch.cuda.amp.GradScaler()

print("✅ Model, Loss, Optimizer ready!")


✅ Model, Loss, Optimizer ready!


  scaler = torch.cuda.amp.GradScaler()


In [12]:
from sklearn.model_selection import train_test_split
from torch.utils.data import random_split

# 🛠️ Split into train and validation
VAL_SPLIT = 0.1  # 10% for validation

# Calculate lengths
num_samples = len(train_dataset)
num_val = int(VAL_SPLIT * num_samples)
num_train = num_samples - num_val

# Random split
train_dataset, val_dataset = random_split(train_dataset, [num_train, num_val])
print(f"🧩 Split: {num_train} training samples and {num_val} validation samples.")

# Reload loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)


🧩 Split: 1139711 training samples and 126634 validation samples.


In [15]:
import torch
from tqdm import tqdm

print("🚀 Starting training...\n")

# Ensure optimizer is linked to model parameters
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    total = 0
    correct = 0

    loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{EPOCHS}] (Training)", leave=True)

    for images, labels in loop:
        images = images.to(device)
        multi_labels = torch.stack([encode_evidence_labels(lbl.item()) for lbl in labels]).to(device)

        optimizer.zero_grad()

        with torch.amp.autocast(device_type="cuda"):
            outputs = model(images)
            loss = criterion(outputs, multi_labels)

        # Backpropagate gradients with scaled loss
        scaler.scale(loss).backward()

        try:
            scaler.step(optimizer)  # Step optimizer only if gradients are not missing
            scaler.update()
        except AssertionError:
            print("⚠️ Skipping optimizer step due to missing gradients.")
            continue  # Skip this batch if gradients are missing

        running_loss += loss.item()

        # 🎯 Calculate batch accuracy
        preds = torch.sigmoid(outputs) > 0.5
        correct += (preds == multi_labels.bool()).sum().item()
        total += torch.numel(multi_labels)

        loop.set_postfix(loss=loss.item())

    # Average loss and accuracy for the epoch
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total

    # 🧪 Validation Phase
    model.eval()
    val_loss = 0.0
    val_total = 0
    val_correct = 0

    with torch.no_grad():
        val_loop = tqdm(val_loader, desc=f"Epoch [{epoch+1}/{EPOCHS}] (Validation)", leave=True)

        for images, labels in val_loop:
            images = images.to(device)
            multi_labels = torch.stack([encode_evidence_labels(lbl.item()) for lbl in labels]).to(device)

            with torch.amp.autocast(device_type="cuda"):
                outputs = model(images)
                loss = criterion(outputs, multi_labels)

            val_loss += loss.item()

            # 🎯 Validation accuracy
            preds = torch.sigmoid(outputs) > 0.5
            val_correct += (preds == multi_labels.bool()).sum().item()
            val_total += torch.numel(multi_labels)

    # Validation loss and accuracy for the epoch
    val_epoch_loss = val_loss / len(val_loader)
    val_epoch_acc = 100 * val_correct / val_total

    print(f"✅ Epoch [{epoch+1}/{EPOCHS}] - Train Loss: {epoch_loss:.4f} | Train Acc: {epoch_acc:.2f}%")
    print(f"🧪 Epoch [{epoch+1}/{EPOCHS}] - Val Loss: {val_epoch_loss:.4f} | Val Acc: {val_epoch_acc:.2f}%")

    # 💾 Save best model based on validation loss
    if val_epoch_loss < best_loss:
        print(f"💾 Validation Loss improved from {best_loss:.4f} to {val_epoch_loss:.4f} - saving model...")
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        best_loss = val_epoch_loss

print("\n🎯 Training completed!")
print(f"🏆 Best model saved at: {MODEL_SAVE_PATH}")


🚀 Starting training...



Epoch [1/10] (Training): 100%|███████████████████████████████████████| 8904/8904 [15:58<00:00,  9.29it/s, loss=0.00374]
Epoch [1/10] (Validation): 100%|█████████████████████████████████████████████████████| 990/990 [01:52<00:00,  8.83it/s]


✅ Epoch [1/10] - Train Loss: 0.0109 | Train Acc: 99.64%
🧪 Epoch [1/10] - Val Loss: 0.0052 | Val Acc: 99.83%
💾 Validation Loss improved from 0.0352 to 0.0052 - saving model...


Epoch [2/10] (Training): 100%|██████████████████████████████████████| 8904/8904 [15:09<00:00,  9.79it/s, loss=0.000848]
Epoch [2/10] (Validation): 100%|█████████████████████████████████████████████████████| 990/990 [01:46<00:00,  9.30it/s]


✅ Epoch [2/10] - Train Loss: 0.0070 | Train Acc: 99.76%
🧪 Epoch [2/10] - Val Loss: 0.0036 | Val Acc: 99.88%
💾 Validation Loss improved from 0.0052 to 0.0036 - saving model...


Epoch [3/10] (Training): 100%|███████████████████████████████████████| 8904/8904 [15:35<00:00,  9.52it/s, loss=0.00411]
Epoch [3/10] (Validation): 100%|█████████████████████████████████████████████████████| 990/990 [01:59<00:00,  8.27it/s]


✅ Epoch [3/10] - Train Loss: 0.0053 | Train Acc: 99.82%
🧪 Epoch [3/10] - Val Loss: 0.0029 | Val Acc: 99.90%
💾 Validation Loss improved from 0.0036 to 0.0029 - saving model...


Epoch [4/10] (Training): 100%|███████████████████████████████████████| 8904/8904 [14:18<00:00, 10.38it/s, loss=0.00117]
Epoch [4/10] (Validation): 100%|█████████████████████████████████████████████████████| 990/990 [01:36<00:00, 10.22it/s]


✅ Epoch [4/10] - Train Loss: 0.0045 | Train Acc: 99.85%
🧪 Epoch [4/10] - Val Loss: 0.0035 | Val Acc: 99.89%


Epoch [5/10] (Training): 100%|██████████████████████████████████████| 8904/8904 [15:09<00:00,  9.79it/s, loss=0.000604]
Epoch [5/10] (Validation): 100%|█████████████████████████████████████████████████████| 990/990 [01:57<00:00,  8.40it/s]


✅ Epoch [5/10] - Train Loss: 0.0039 | Train Acc: 99.87%
🧪 Epoch [5/10] - Val Loss: 0.0026 | Val Acc: 99.91%
💾 Validation Loss improved from 0.0029 to 0.0026 - saving model...


Epoch [6/10] (Training): 100%|███████████████████████████████████████| 8904/8904 [16:55<00:00,  8.77it/s, loss=0.00019]
Epoch [6/10] (Validation): 100%|█████████████████████████████████████████████████████| 990/990 [02:01<00:00,  8.15it/s]


✅ Epoch [6/10] - Train Loss: 0.0035 | Train Acc: 99.88%
🧪 Epoch [6/10] - Val Loss: 0.0023 | Val Acc: 99.92%
💾 Validation Loss improved from 0.0026 to 0.0023 - saving model...


Epoch [7/10] (Training): 100%|██████████████████████████████████████| 8904/8904 [17:29<00:00,  8.49it/s, loss=0.000701]
Epoch [7/10] (Validation): 100%|█████████████████████████████████████████████████████| 990/990 [01:57<00:00,  8.40it/s]


✅ Epoch [7/10] - Train Loss: 0.0032 | Train Acc: 99.89%
🧪 Epoch [7/10] - Val Loss: 0.0022 | Val Acc: 99.93%
💾 Validation Loss improved from 0.0023 to 0.0022 - saving model...


Epoch [8/10] (Training): 100%|███████████████████████████████████████| 8904/8904 [16:26<00:00,  9.03it/s, loss=0.00135]
Epoch [8/10] (Validation): 100%|█████████████████████████████████████████████████████| 990/990 [01:55<00:00,  8.54it/s]


✅ Epoch [8/10] - Train Loss: 0.0029 | Train Acc: 99.90%
🧪 Epoch [8/10] - Val Loss: 0.0019 | Val Acc: 99.94%
💾 Validation Loss improved from 0.0022 to 0.0019 - saving model...


Epoch [9/10] (Training): 100%|██████████████████████████████████████| 8904/8904 [16:10<00:00,  9.17it/s, loss=0.000128]
Epoch [9/10] (Validation): 100%|█████████████████████████████████████████████████████| 990/990 [01:53<00:00,  8.76it/s]


✅ Epoch [9/10] - Train Loss: 0.0028 | Train Acc: 99.90%
🧪 Epoch [9/10] - Val Loss: 0.0022 | Val Acc: 99.93%


Epoch [10/10] (Training): 100%|██████████████████████████████████████| 8904/8904 [16:34<00:00,  8.95it/s, loss=0.00612]
Epoch [10/10] (Validation): 100%|████████████████████████████████████████████████████| 990/990 [01:57<00:00,  8.42it/s]

✅ Epoch [10/10] - Train Loss: 0.0026 | Train Acc: 99.91%
🧪 Epoch [10/10] - Val Loss: 0.0020 | Val Acc: 99.94%

🎯 Training completed!
🏆 Best model saved at: vit_best_model.pth





In [16]:
# 1️⃣ Initialize the model architecture first
model = VisionTransformer(num_classes=14)  # ⚡ num_classes must match training

# 2️⃣ If you trained using DataParallel, wrap with DataParallel again
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)

# 3️⃣ Move model to device
model = model.to(device)

# 4️⃣ Load saved weights
model.load_state_dict(torch.load("vit_best_model.pth", map_location=device))
model.eval()  # 💤 VERY IMPORTANT for evaluation
print("✅ Model loaded and set to evaluation mode!")


✅ Model loaded and set to evaluation mode!


In [17]:
# ✨ Define transform for test set (no augmentation!)
test_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# 🗂️ Load Test Dataset
test_dataset = datasets.ImageFolder(r"C:/Users/adity/Downloads/Test", transform=test_transform)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,  # ✅ IMPORTANT: No shuffle for test
    num_workers=2,
    pin_memory=True
)

print(f"✅ Loaded {len(test_dataset)} test samples across {len(test_dataset.classes)} classes.")


✅ Loaded 111308 test samples across 14 classes.


In [18]:
# 🎯 Testing Loop
correct = 0
total = 0
running_loss = 0.0

criterion = nn.BCEWithLogitsLoss()

loop = tqdm(test_loader, desc="🔎 Testing", leave=True)

with torch.no_grad():
    for images, labels in loop:
        images = images.to(device)
        multi_labels = torch.stack([encode_evidence_labels(lbl.item()) for lbl in labels]).to(device)

        with torch.cuda.amp.autocast():
            outputs = model(images)
            loss = criterion(outputs, multi_labels)

        running_loss += loss.item()

        preds = torch.sigmoid(outputs) > 0.5
        correct += (preds == multi_labels.bool()).sum().item()
        total += torch.numel(multi_labels)

    test_loss = running_loss / len(test_loader)
    test_acc = 100 * correct / total

print(f"🏁 Test Loss: {test_loss:.4f}")
print(f"🎯 Test Accuracy: {test_acc:.2f}%")


  with torch.cuda.amp.autocast():
🔎 Testing: 100%|██████████████████████████████████████████████████████████████████| 1740/1740 [02:07<00:00, 13.65it/s]

🏁 Test Loss: 0.7845
🎯 Test Accuracy: 86.96%





In [None]:
import torch
from torchvision import transforms
from PIL import Image
import tkinter as tk
from tkinter import filedialog
from tqdm import tqdm
import time

# Initialize Tkinter
root = tk.Tk()
root.withdraw()  # Hide the root window

# Open File Dialog
file_path = filedialog.askopenfilename(
    title="Select an image",
    filetypes=[("Image files", "*.jpg;*.jpeg;*.png;*.bmp")]
)

if not file_path:
    print("❌ No file selected.")
    exit()

print(f"\n📂 Selected file: {file_path}")

# Load Model
print("\n🔄 Loading model...")
progress = tqdm(total=3, bar_format='{l_bar}{bar} | {n_fmt}/{total_fmt} [{elapsed}<{remaining}]')

model = VisionTransformer(num_classes=14).to(device)
progress.update(1)
time.sleep(0.3)

model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
progress.update(1)
time.sleep(0.3)

model.eval()
progress.update(1)
progress.close()
print("✅ Model loaded and ready.\n")

# Image Transform
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# Load and Transform the Uploaded Image
print("🖼️ Processing image...")
progress = tqdm(total=2, bar_format='{l_bar}{bar} | {n_fmt}/{total_fmt} [{elapsed}<{remaining}]')

image = Image.open(file_path).convert("RGB")
progress.update(1)
time.sleep(0.3)

input_tensor = transform(image).unsqueeze(0).to(device)  # [1, 3, 64, 64]
progress.update(1)
progress.close()
print("✅ Image ready.\n")

# Predict
print("🔍 Predicting...")
with torch.no_grad(), torch.amp.autocast(device_type="cuda"):
    outputs = model(input_tensor)
    preds = torch.sigmoid(outputs) > 0.5  # multi-label threshold

print("🎯 Prediction Complete!\n")
print(f"✅ Prediction Result: {preds}")
