# Please use one of the following methods for inference with the model.

#### 1. Predict video
#### 2. Predict image
#### 3. Live camera

# Before starting, modify your Ultralytics library to handle grayscale images (see the README.md file).

# 1. Predict video

In [None]:
from ultralytics import YOLO
import cv2
import torch

# Load the YOLO model
model = YOLO("Model/yolo11s_30k_images_64_epochs_grayscale_only.pt")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
model = model.to(device)

# Video path
video_path = "Data/test/test5fps.mp4"  # Replace with your video path

# Open the video file
cap = cv2.VideoCapture(video_path)

# Check if video opened successfully
if not cap.isOpened():
    print("Error opening video file")
    exit()

# Get video properties (optional)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for output video

out = cv2.VideoWriter('result_video_1.mp4', fourcc, fps, (width, height)) # Output video writer

while(cap.isOpened()):
    ret, frame = cap.read()
    if ret:
        # Perform inference on the frame
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        results = model(frame_gray, ch=1)

        # Annotate the frame with bounding boxes and labels
        annotated_frame = results[0].plot(img=frame) #Plot a single result on the frame

        for i, result in enumerate(results):
            boxes = result.boxes
            sorted_boxes = sorted(
                boxes,
                key=lambda b: b.xyxy[0][0].item()
            )
           
            digits_in_frame = []
            for box in sorted_boxes:
                class_idx = int(box.cls[0].item())
                class_name = result.names[class_idx]
                digits_in_frame.append(class_name)

            text_result = ''.join(digits_in_frame)

        position = (10, 100) 
        font = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 3
        thickness = 3

        annotated_frame = cv2.putText(annotated_frame, text_result, position, font, font_scale, (0, 255, 0), thickness)


        # Display the annotated frame
        #cv2.imshow('YOLO Video', annotated_frame)
        out.write(annotated_frame)

        # Press 'q' to exit
        #if cv2.waitKey(1) & 0xFF == ord('q'):
            #break
    else:
        break

# Release the video capture and writer objects
cap.release()
out.release()
cv2.destroyAllWindows()


0: 544x640 (no detections), 5.5ms
Speed: 0.5ms preprocess, 5.5ms inference, 0.5ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 (no detections), 5.6ms
Speed: 0.6ms preprocess, 5.6ms inference, 0.4ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 (no detections), 7.0ms
Speed: 0.5ms preprocess, 7.0ms inference, 0.4ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 (no detections), 5.4ms
Speed: 0.6ms preprocess, 5.4ms inference, 0.4ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 (no detections), 5.3ms
Speed: 0.5ms preprocess, 5.3ms inference, 0.5ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 (no detections), 6.8ms
Speed: 0.6ms preprocess, 6.8ms inference, 0.4ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 (no detections), 6.2ms
Speed: 0.9ms preprocess, 6.2ms inference, 0.4ms postprocess per image at shape (1, 3, 544, 640)

0: 544x640 (no detections), 5.7ms
Speed: 0.6ms preprocess, 5.7ms inference, 0.4ms 

# 2. Perdict image

In [None]:
from ultralytics import YOLO
import cv2
import torch

# Load the YOLO model
model = YOLO("Model/yolo11s_30k_images_64_epochs_grayscale_only.pt")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
model = model.to(device)

# Image path
image_path = "Data/test/test_image1.png"  # Replace with your image path

# Read the image
img = cv2.imread(image_path)

# Check if image opened successfully
if img is None:
    print("Error opening image file")
    exit()

# Perform inference on the image
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #convert to gray
results = model(img_gray, ch=1) #run the model with one channel

# Annotate the frame with bounding boxes and labels
annotated_frame = results[0].plot(img=img)  # Plot a single result on the frame

for i, result in enumerate(results):
    boxes = result.boxes
    sorted_boxes = sorted(
        boxes,
        key=lambda b: b.xyxy[0][0].item()
    )

    digits_in_frame = []
    for box in sorted_boxes:
        class_idx = int(box.cls[0].item())
        class_name = result.names[class_idx]
        digits_in_frame.append(class_name)

    text_result = ''.join(digits_in_frame)

position = (10, 100)
font = cv2.FONT_HERSHEY_DUPLEX
font_scale = 3
thickness = 3

annotated_frame = cv2.putText(annotated_frame, text_result, position, font, font_scale, (0, 255, 0), thickness)

# Display the annotated frame
#cv2.imshow('YOLO Image', annotated_frame)
#cv2.waitKey(0)  # Wait for a key press to close the window
#cv2.destroyAllWindows()


# Save the annotated image
output_path = "result_image_1.jpg"  # Replace with your desired output path
cv2.imwrite(output_path, annotated_frame)


0: 640x640 1 ., 1 0, 1 1, 1 2, 1 3, 1 8, 1 9, 5.0ms
Speed: 2.6ms preprocess, 5.0ms inference, 271.1ms postprocess per image at shape (1, 3, 640, 640)


True

# 3. Live camera using opencv / raspberry pi

In [None]:
from ultralytics import YOLO
import cv2
import torch

# Load the YOLO model
model = YOLO("Model/yolo11s_30k_images_64_epochs_grayscale_only.pt")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
model = model.to(device)

# Open the default camera (usually index 0).  Change if needed.
cap = cv2.VideoCapture(0)  # Use 0 for default camera, 1 for external, etc.

# Check if camera opened successfully
if not cap.isOpened():
    print("Error opening camera")
    exit()

while(cap.isOpened()):
    ret, frame = cap.read()
    if ret:
        # Perform inference on the frame
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) # Convert to grayscale
        results = model(frame_gray, ch=1) # Run inference

        # Annotate the frame with bounding boxes and labels
        annotated_frame = results[0].plot(img=frame) #Plot a single result on the frame

        for i, result in enumerate(results):
            boxes = result.boxes
            sorted_boxes = sorted(
                boxes,
                key=lambda b: b.xyxy[0][0].item()
            )
            
            digits_in_frame = []
            for box in sorted_boxes:
                class_idx = int(box.cls[0].item())
                class_name = result.names[class_idx]
                digits_in_frame.append(class_name)

            text_result = ''.join(digits_in_frame)

        position = (10, 100) 
        font = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 3
        thickness = 3

        annotated_frame = cv2.putText(annotated_frame, text_result, position, font, font_scale, (0, 255, 0), thickness)

        # Display the annotated frame
        cv2.imshow('YOLO Live', annotated_frame) # Show the live feed


        # Press 'q' to exit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break

# Release the capture object
cap.release()
cv2.destroyAllWindows()