In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import numpy as np
import cv2
import os

In [None]:
# 1️⃣ Define the same model

class Improved3DCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(Improved3DCNN, self).__init__()
        self.conv1 = nn.Conv3d(3, 32, kernel_size=(3,3,3), padding=1)
        self.bn1 = nn.BatchNorm3d(32)
        self.pool1 = nn.MaxPool3d((1,2,2))
        self.conv2 = nn.Conv3d(32, 64, kernel_size=(3,3,3), padding=1)
        self.bn2 = nn.BatchNorm3d(64)
        self.pool2 = nn.MaxPool3d((2,2,2))
        self.conv3 = nn.Conv3d(64, 128, kernel_size=(3,3,3), padding=1)
        self.bn3 = nn.BatchNorm3d(128)
        self.pool3 = nn.AdaptiveAvgPool3d((1,1,1))
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool2(x)
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.pool3(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [None]:
# 2️⃣ Load trained model

device = "cuda" if torch.cuda.is_available() else "cpu"
model = Improved3DCNN(num_classes=2).to(device)
model.load_state_dict(torch.load("model_scratch_weights.pth", map_location=device))
model.eval()

  model.load_state_dict(torch.load("model_scratch_weights.pth", map_location=device))


Improved3DCNN(
  (conv1): Conv3d(3, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn1): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn2): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn3): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): AdaptiveAvgPool3d(output_size=(1, 1, 1))
  (fc): Linear(in_features=128, out_features=2, bias=True)
)

In [None]:
# 3️⃣ Define transforms

frame_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


In [None]:
# 4️⃣ Function to extract frames from video

def extract_video_frames(video_path, num_frames=16, size=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total == 0:
        raise ValueError(f"Empty video: {video_path}")

    indices = np.linspace(0, total - 1, num=num_frames, dtype=int)
    frames = []

    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(frame)
            img = frame_transforms(img)
            frames.append(img)
    cap.release()
    return torch.stack(frames)  # shape: [T, C, H, W]


In [None]:
# 5️⃣ Prediction function

def predict_video(video_path):
    video_tensor = extract_video_frames(video_path, num_frames=16)
    video_tensor = video_tensor.unsqueeze(0)  # [1, T, C, H, W]
    video_tensor = video_tensor.permute(0, 2, 1, 3, 4).to(device)  # [1, C, T, H, W]

    with torch.no_grad():
        outputs = model(video_tensor)
        probs = torch.softmax(outputs, dim=1)
        pred_class = torch.argmax(probs, dim=1).item()
        confidence = probs[0, pred_class].item()

    label_map = {0: "no_theft", 1: "store_theft"}
    print(f"🧾 Prediction: {label_map[pred_class]} (confidence {confidence:.2f})")
    return label_map[pred_class], confidence

In [None]:
# 6️⃣ Example usage

video_path = r"D:\cellula\task3\dataset\no_theft\shop_lifter_n_42.mp4"  # ⬅️ put your video path here
predict_video(video_path)

🧾 Prediction: no_theft (confidence 0.70)


('no_theft', 0.7003263831138611)

In [17]:

video_path = r"D:\cellula\task3\dataset\store_theft\shop_lifter_4.mp4"  # ⬅️ put your video path here
predict_video(video_path)

🧾 Prediction: store_theft (confidence 0.62)


('store_theft', 0.6183863282203674)

In [18]:

video_path = r"D:\cellula\task3\dataset\store_theft\shop_lifter_5.mp4"  # ⬅️ put your video path here
predict_video(video_path)

🧾 Prediction: store_theft (confidence 0.62)


('store_theft', 0.6197575330734253)

In [21]:
video_path = r"D:\cellula\task3\dataset\store_theft\shop_lifter_1.mp4"  # ⬅️ put your video path here
predict_video(video_path)

🧾 Prediction: store_theft (confidence 0.62)


('store_theft', 0.621143639087677)