In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models.video import r3d_18, R3D_18_Weights
from PIL import Image
from tqdm import tqdm


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)


Using device: cpu


In [3]:
class VideoDataset(Dataset):
    def __init__(self, root_dir, num_frames=16, transform=None):
        self.root_dir = root_dir
        self.num_frames = num_frames
        self.transform = transform

        self.videos = []
        self.labels = []
        classes = {"no_theft": 0, "store_theft": 1}

        for cls_name, label in classes.items():
            cls_dir = os.path.join(root_dir, cls_name)
            if not os.path.exists(cls_dir):
                continue
            for video_name in os.listdir(cls_dir):
                video_path = os.path.join(cls_dir, video_name)
                if os.path.isdir(video_path):
                    self.videos.append(video_path)
                    self.labels.append(label)

    def __len__(self):
        return len(self.videos)

    def __getitem__(self, idx):
        video_path = self.videos[idx]
        label = self.labels[idx]

        frame_files = sorted(os.listdir(video_path))[:self.num_frames]
        frames = []
        for f in frame_files:
            img = Image.open(os.path.join(video_path, f)).convert("RGB")
            if self.transform:
                img = self.transform(img)
            frames.append(img)
        video_tensor = torch.stack(frames)  # [T, C, H, W]
        return video_tensor, torch.tensor(label, dtype=torch.long)


In [4]:
frame_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = VideoDataset("split_data/train", num_frames=16, transform=frame_transforms)
val_dataset   = VideoDataset("split_data/val", num_frames=16, transform=frame_transforms)
test_dataset  = VideoDataset("split_data/test", num_frames=16, transform=frame_transforms)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=4)
test_loader  = DataLoader(test_dataset, batch_size=4)


In [13]:
weights = R3D_18_Weights.KINETICS400_V1
model = r3d_18(weights=weights)

# Freeze all layers
for param in model.parameters():
    param.requires_grad = False

# Only train the final fully connected layer
model.fc = nn.Linear(model.fc.in_features, 2)  # 2 classes
model.fc.requires_grad = True

model = model.to(device)



In [14]:
criterion = nn.CrossEntropyLoss()
# Only update parameters that require gradients (fc layer)
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
num_epochs = 10



In [15]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    correct = 0
    total = 0

    for videos, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        videos = videos.permute(0, 2, 1, 3, 4).float().to(device)  # [B, C, T, H, W]
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(videos)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_acc = 100 * correct / total

    # Validation
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for videos, labels in val_loader:
            videos = videos.permute(0, 2, 1, 3, 4).float().to(device)
            labels = labels.to(device)
            outputs = model(videos)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_acc = 100 * val_correct / val_total

    print(f"Epoch {epoch+1}/{num_epochs} | "
          f"Loss: {running_loss/len(train_loader):.4f} | "
          f"Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")


Epoch 1/10: 100%|██████████| 154/154 [02:18<00:00,  1.11it/s]


Epoch 1/10 | Loss: 0.7104 | Train Acc: 47.80% | Val Acc: 47.50%


Epoch 2/10: 100%|██████████| 154/154 [02:19<00:00,  1.10it/s]


Epoch 2/10 | Loss: 0.6981 | Train Acc: 51.22% | Val Acc: 58.33%


Epoch 3/10: 100%|██████████| 154/154 [02:19<00:00,  1.10it/s]


Epoch 3/10 | Loss: 0.6809 | Train Acc: 54.00% | Val Acc: 57.92%


Epoch 4/10: 100%|██████████| 154/154 [02:18<00:00,  1.11it/s]


Epoch 4/10 | Loss: 0.6702 | Train Acc: 61.34% | Val Acc: 72.08%


Epoch 5/10: 100%|██████████| 154/154 [02:19<00:00,  1.11it/s]


Epoch 5/10 | Loss: 0.6603 | Train Acc: 67.37% | Val Acc: 77.50%


Epoch 6/10: 100%|██████████| 154/154 [02:24<00:00,  1.06it/s]


Epoch 6/10 | Loss: 0.6549 | Train Acc: 68.68% | Val Acc: 81.25%


Epoch 7/10: 100%|██████████| 154/154 [02:20<00:00,  1.10it/s]


Epoch 7/10 | Loss: 0.6424 | Train Acc: 69.49% | Val Acc: 63.75%


Epoch 8/10: 100%|██████████| 154/154 [02:20<00:00,  1.10it/s]


Epoch 8/10 | Loss: 0.6355 | Train Acc: 69.98% | Val Acc: 83.33%


Epoch 9/10: 100%|██████████| 154/154 [02:20<00:00,  1.10it/s]


Epoch 9/10 | Loss: 0.6268 | Train Acc: 73.41% | Val Acc: 56.25%


Epoch 10/10: 100%|██████████| 154/154 [02:19<00:00,  1.10it/s]


Epoch 10/10 | Loss: 0.6209 | Train Acc: 71.78% | Val Acc: 87.08%


In [16]:
model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for videos, labels in test_loader:
        videos = videos.permute(0, 2, 1, 3, 4).float().to(device)
        labels = labels.to(device)
        outputs = model(videos)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

print(f"✅ Test Accuracy: {100 * test_correct / test_total:.2f}%")

✅ Test Accuracy: 87.85%


In [17]:
torch.save(model.state_dict(), "pretrained_video_model.pth")
print("Model saved ✅")


Model saved ✅
