In [2]:
import cv2
import torch
from torchvision import transforms, models  # Import the 'models' module
from PIL import Image
import torch.nn as nn
from matplotlib import pyplot as plt

# Define the transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Modify VGG16
class CustomVGG16(nn.Module):
    def __init__(self, num_classes=4, dropout_rate=0.7):
        super(CustomVGG16, self).__init__()

        model = models.vgg16(pretrained=True)

        # Unfreeze more layers for fine-tuning
        for param in model.features[10:].parameters():
            param.requires_grad = True

        self.features = nn.Sequential(
            *list(model.features.children())[:24],  # First 24 layers
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            *list(model.features.children())[24:],  # Remaining layers
        )
        self.avgpool = model.avgpool

        # Add new classifier
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(dropout_rate),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(dropout_rate),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Create the model instance
model = CustomVGG16(num_classes=4)
model.load_state_dict(torch.load('vgg_new_model.pth'))  # replace with the path to your saved model
model.eval()  # Set the model to evaluation mode

# Define the class names
class_names = ['Entertainment_commercial', 'Food_commercial', 'Healthcare_commercial', 'Technology_Electronics_commercial']

# Open the video capture
cap = cv2.VideoCapture(0)  # replace with correct device id

if not cap.isOpened():
    print("Unable to read capture feed.")

while True:
    # Read a frame
    ret, frame = cap.read()
    if not ret:
        break

    # Save original frame for later display
    orig_frame = frame.copy()

    # Convert the frame to PIL for the transform step
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_image = Image.fromarray(frame)

    # Transform the image
    transformed_image = transform(pil_image)

    # Add an extra batch dimension since pytorch treats all inputs as batches
    batch_t = torch.unsqueeze(transformed_image, 0)

    # Feed the image to the model
    with torch.no_grad():
        out = model(batch_t)

    _, predicted = torch.max(out, 1)

    # Convert the predicted label to a meaningful class name based on the new classes
    label = class_names[predicted.item()]

    # Convert the frame back to BGR color space
    orig_frame = cv2.cvtColor(orig_frame, cv2.COLOR_RGB2BGR)

    # Overlay the label on the frame
    cv2.putText(orig_frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # Display the frame
    plt.imshow(orig_frame)
    plt.show()

    # Press Q on the keyboard to stop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
