In [1]:
import torch
import cv2
from PIL import Image
from transformers import DetrImageProcessor, DetrForObjectDetection
import ipywidgets as widgets
from IPython.display import display
import time

In [2]:
# Define available models and their variants
model_variants = {
    'YOLOv5': ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x'],
    'DETR': ['facebook/detr-resnet-50', 'facebook/detr-resnet-101', 'facebook/detr-resnet-50-panoptic']
}

In [3]:
# Load YOLOv5 model
def load_yolov5_model(variant):
    return torch.hub.load('ultralytics/yolov5', variant, pretrained=True)



In [4]:
# Load DETR model
def load_detr_model(variant):
    processor = DetrImageProcessor.from_pretrained(variant)
    model = DetrForObjectDetection.from_pretrained(variant)
    model.eval()
    return model, processor

In [5]:
# Function to perform YOLOv5 detection
def yolov5_detection(frame, model, model_name, variant_name):
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    start_time = time.time()
    results = model(img_rgb)
    inference_time = time.time() - start_time
    
    df = results.pandas().xyxy[0]
    for _, row in df.iterrows():
        x1, y1, x2, y2 = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])
        label = row['name']
        confidence = row['confidence']
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"{label} {confidence:.2f}", (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    cv2.putText(frame, f"{model_name} - {variant_name}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
    cv2.putText(frame, f"Inference Time: {inference_time:.2f} sec", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    return frame


In [6]:
# Function to perform DETR detection
def detr_detection(frame, model, processor, model_name, variant_name):
    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    
    start_time = time.time()
    inputs = processor(images=img, return_tensors="pt")
    outputs = model(**inputs)
    inference_time = time.time() - start_time
    
    target_sizes = torch.tensor([img.size[::-1]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        if score > 0.9:
            x1, y1, x2, y2 = [int(i) for i in box]
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            label_name = model.config.id2label[label.item()]
            cv2.putText(frame, f"{label_name}: {score:.2f}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    cv2.putText(frame, f"{model_name} - {variant_name}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
    cv2.putText(frame, f"Inference Time: {inference_time:.2f} sec", (10, 60),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    return frame


In [7]:
# Dropdown to choose the model type
model_type_dropdown = widgets.Dropdown(
    options=['YOLOv5', 'DETR'],
    value='YOLOv5',
    description='Model Type:'
)

# Dropdown to choose the model variant
model_variant_dropdown = widgets.Dropdown(
    options=model_variants['YOLOv5'],
    value='yolov5s',
    description='Variant:'
)

In [9]:
# Update the variant dropdown when the model type is changed
def update_variant_options(*args):
    model_variant_dropdown.options = model_variants[model_type_dropdown.value]
    model_variant_dropdown.value = model_variants[model_type_dropdown.value][0]

model_type_dropdown.observe(update_variant_options, 'value')

# Button to start detection
start_button = widgets.Button(description="Start Detection")

# Display the dropdowns and button
display(model_type_dropdown, model_variant_dropdown, start_button)

def on_button_click(b):
    model_choice = model_type_dropdown.value
    variant_choice = model_variant_dropdown.value
    
    if model_choice == 'YOLOv5':
        model = load_yolov5_model(variant_choice)
        detection_function = lambda frame, model: yolov5_detection(frame, model, model_choice, variant_choice)
    elif model_choice == 'DETR':
        model, processor = load_detr_model(variant_choice)
        detection_function = lambda frame, model: detr_detection(frame, model, processor, model_choice, variant_choice)
    
    # Initialize webcam
    cap = cv2.VideoCapture(0)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Perform detection based on the selected model
        frame = detection_function(frame, model)

        # Display the resulting frame with model name and variant
        cv2.imshow('Real-time Detection', frame)

        # Break the loop on 'q' key press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the webcam and close windows
    cap.release()
    cv2.destroyAllWindows()

# Attach the button click event
start_button.on_click(on_button_click)


Dropdown(description='Model Type:', options=('YOLOv5', 'DETR'), value='YOLOv5')

Dropdown(description='Variant:', options=('yolov5s', 'yolov5m', 'yolov5l', 'yolov5x'), value='yolov5s')

Button(description='Start Detection', style=ButtonStyle())