In [1]:
pip install opencv-python torchvision torch torchvision matplotlib pytesseract pyzbar

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pytesseract

# Specify the path to the Tesseract executable
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # Adjust the path if needed


In [3]:
import cv2
import numpy as np
import pytesseract
import torchvision.transforms as T
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from pyzbar.pyzbar import decode  # Library for QR code and barcode detection

# Load Faster R-CNN with ResNet50
def load_faster_rcnn():
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()  # Set the model to evaluation mode
    return model

# Object Detection using Faster R-CNN
def detect_objects(frame, model):
    transform = T.Compose([T.ToTensor()])  # Convert the image to a tensor
    img_tensor = transform(frame).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():  # No need to track gradients
        predictions = model(img_tensor)

    # Filter predictions with a threshold
    detected_objects = []
    threshold = 0.3  # Threshold for detection
    for i, score in enumerate(predictions[0]['scores']):
        if score > threshold:
            box = predictions[0]['boxes'][i].cpu().numpy()
            detected_objects.append({
                "class_id": int(predictions[0]['labels'][i].cpu().numpy()),
                "score": float(score.cpu().numpy()),
                "x": int(box[0]),
                "y": int(box[1]),
                "width": int(box[2] - box[0]),
                "height": int(box[3] - box[1])
            })
    return detected_objects

# Text Recognition using Tesseract
def extract_text(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    text = pytesseract.image_to_string(gray)
    return text.strip()

# QR Code and Barcode Detection using Pyzbar
def extract_qr_barcode(frame):
    barcodes = decode(frame)
    barcode_data_list = []
    for barcode in barcodes:
        barcode_data = barcode.data.decode('utf-8')
        barcode_type = barcode.type
        barcode_data_list.append({"data": barcode_data, "type": barcode_type, "rect": barcode.rect})
    return barcode_data_list

# Analyze box structure based on size
def analyze_box_structure(box):
    # Assume some thresholds for perfect box structure
    acceptable_width_range = (20, 200)  # Example width range in pixels
    acceptable_height_range = (30, 300)  # Example height range in pixels

    width = box["width"]
    height = box["height"]

    if (acceptable_width_range[0] <= width <= acceptable_width_range[1]) and (acceptable_height_range[0] <= height <= acceptable_height_range[1]):
        return "Good Structure"
    else:
        return "Bad Structure"

# Main function to capture video and process frames
def main():
    model = load_faster_rcnn()
    cap = cv2.VideoCapture(0)  # Use the laptop's camera

    frame_skip = 2  # Skip frames to reduce processing load
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        # Resize frame for better performance
        frame = cv2.resize(frame, (640, 480))  # Reducing resolution to 640x480

        # Skip frames to reduce load
        frame_count += 1
        if frame_count % frame_skip != 0:
            continue  # Skip this frame

        # Create a dictionary to store all results
        results = {}

        # Run object detection
        detected_objects = detect_objects(frame, model)
        results["detected_objects"] = detected_objects

        # Process detected objects for sorting
        for obj in detected_objects:
            # Measure size
            size_info = {
                "height": obj["height"],
                "width": obj["width"]
            }

            # Analyze box structure
            structure_status = analyze_box_structure(obj)

            # Log results
            print(f"Detected Product: Size - Height: {size_info['height']}, Width: {size_info['width']}")
            print(f"Box Structure: {structure_status}")

        # Run OCR
        text = extract_text(frame)
        results["text"] = text

        # Run QR/Barcode detection
        qr_barcodes = extract_qr_barcode(frame)
        results["qr_barcodes"] = qr_barcodes

        # Print the aggregated results for this frame in a readable format
        print("\n--- Detection Results ---")
        
        # Object Detection Results
        print("1. Object Detection:")
        if results["detected_objects"]:
            for idx, obj in enumerate(results["detected_objects"], start=1):
                print(f"   Object {idx}: Class ID: {obj['class_id']}, Score: {obj['score']:.2f}, "
                      f"Position: ({obj['x']}, {obj['y']}), Size: {obj['width']}x{obj['height']}")
        else:
            print("   No objects detected.")

        # Text Recognition Results
        print("\n2. Text Recognition (OCR):")
        if results["text"]:
            print(f"   Extracted Text: {results['text']}")
        else:
            print("   No text detected.")

        # QR/Barcode Detection Results
        print("\n3. QR Code and Barcode Detection:")
        if results["qr_barcodes"]:
            for idx, qr in enumerate(results["qr_barcodes"], start=1):
                print(f"   {idx}. Type: {qr['type']}, Data: {qr['data']}")
        else:
            print("   No QR codes or barcodes detected.")
        
        print("-------------------------")

        # Display results on the frame
        # Draw detected objects
        for obj in detected_objects:
            x, y, w, h = obj["x"], obj["y"], obj["width"], obj["height"]
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Draw QR/barcode detection area in red
        for qr in qr_barcodes:
            x, y, w, h = qr['rect'].left, qr['rect'].top, qr['rect'].width, qr['rect'].height
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)  # Red rectangle
            cv2.putText(frame, f"{qr['type']}: {qr['data']}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

        # Show the processed frame
        cv2.imshow("Frame", frame)

        # Press 'q' to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Run the main function
main()




Detected Product: Size - Height: 463, Width: 618
Box Structure: Bad Structure
Detected Product: Size - Height: 192, Width: 183
Box Structure: Good Structure

--- Detection Results ---
1. Object Detection:
   Object 1: Class ID: 1, Score: 0.99, Position: (21, 1), Size: 618x463
   Object 2: Class ID: 72, Score: 0.65, Position: (448, 25), Size: 183x192

2. Text Recognition (OCR):
   No text detected.

3. QR Code and Barcode Detection:
   No QR codes or barcodes detected.
-------------------------
Detected Product: Size - Height: 446, Width: 568
Box Structure: Bad Structure
Detected Product: Size - Height: 326, Width: 75
Box Structure: Bad Structure

--- Detection Results ---
1. Object Detection:
   Object 1: Class ID: 1, Score: 0.99, Position: (52, 28), Size: 568x446
   Object 2: Class ID: 32, Score: 0.31, Position: (110, 71), Size: 75x326

2. Text Recognition (OCR):
   No text detected.

3. QR Code and Barcode Detection:
   No QR codes or barcodes detected.
-------------------------
Dete

KeyboardInterrupt: 