# A real time video CAM, click 'Run All'

In [28]:
# import the libraries
import torch
from PIL import Image, ImageFont, ImageDraw
import cv2      # open CV
import pandas as pd
import numpy as np
from torchcam.utils import overlay_mask

In [29]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device '+str(device))
print( torch. __version__)

Using device cuda
2.1.1


In [30]:
import pandas as pd
df = pd.read_csv('imagenet_class_index.csv')
idx_to_labels = {}
idx_to_labels_cn = {}
for idx, row in df.iterrows():
    idx_to_labels[row['ID']] = row['class']
    idx_to_labels_cn[row['ID']] = row['Chinese']

In [31]:
# Define your model
from torchvision.models import googlenet
from torchcam.methods import SmoothGradCAMpp


model = googlenet(pretrained=True).eval().to(device)

cam_extractor = SmoothGradCAMpp(model)



In [32]:
from torchvision import transforms
# Scale, Crop, Turn Tensor, Normalize
test_transform = transforms.Compose([transforms.Resize(256),
                            transforms.CenterCrop(224),
                            transforms.ToTensor(),
                            transforms.Normalize(
                                mean=[0.485, 0.456, 0.406], 
                                std=[0.229, 0.224, 0.225])
                        ])

In [33]:
show_class_id = None
Chinese = True

In [34]:
font_size = 60
font = ImageFont.truetype("arial.ttf", font_size)

In [35]:
def process_frame(img, show_class_id=None, Chinese=True):
    
    # video output size
    new_width = 1800
    new_height = 1080
    img = cv2.resize(img, (new_width, new_height))
    
    img_bgr = img
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # BGR to RGB
    img_pil = Image.fromarray(img_rgb) # array to pil
    input_tensor = test_transform(img_pil).unsqueeze(0).to(device)

    # send to model
    pred_logits = model(input_tensor) # classification
    pred_top1 = torch.topk(pred_logits, 1)
    pred_id = pred_top1[1].detach().cpu().numpy().squeeze().item()
    
    if show_class_id:
        show_id = show_class_id
    else:
        show_id = pred_id
        show_class_id = pred_id
        
    # CAM
    activation_map = cam_extractor(show_id, pred_logits)
    activation_map = activation_map[0][0].detach().cpu().numpy()
    result = overlay_mask(img_pil, Image.fromarray(activation_map), alpha=0.7)
    
    draw = ImageDraw.Draw(result)
    
    # draw lebels
    text_pred = 'Pred Class: {}'.format(idx_to_labels[pred_id])

    draw.text((50, 100), text_pred, font=font, fill=(255, 0, 0, 1))
        
    img_rgb = np.array(result)
    img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2RGB)
    return img_bgr

In [36]:
import cv2
import time

# get camera
cap = cv2.VideoCapture(1)

# open cap
cap.open(0)


# when camera is open
while cap.isOpened():
    
    # get image of camera
    success, frame = cap.read()
    if not success:
        print('Error')
        break
    
    ## classify image
    frame = process_frame(frame)
    
    # deal with image
    cv2.imshow('my_window',frame)

    if cv2.waitKey(1) in [ord('q'),27]: # exit
        break
    
# close camera
cap.release()
cv2.destroyAllWindows()