In [None]:
# Project based on the RaspRover image UGV_S10_240518.zip -> https://drive.google.com/file/d/1ELeIAsUIQ6ydEsc19Vssc6X0nz1myuJ_/view
# Modified and extended by Johny Roa Müller, 2025

# digit recognition

from picamera2 import Picamera2
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import numpy as np
import ipywidgets as widgets
from logger_configurator import setup_ugv_logger
from time import sleep

logger = setup_ugv_logger()

# MNIST-Modell
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# Modell laden
model = SimpleNN()
model.load_state_dict(torch.load('mnist_model.pth'))
model.to("cpu")
model.eval()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Picamera2 initialisieren
picam2 = Picamera2()
picam2.configure(picam2.create_video_configuration(main={"format": 'XRGB8888', "size": (640, 480)}))
picam2.start()

camera_display_handle = widgets.Image(format='jpeg')
display(camera_display_handle)


lower_blue = np.array([110, 50, 50])   # H 110–130, S ≥50, V ≥50
upper_blue = np.array([130, 255, 255]) # V bis 255

lower_green = np.array([60, 50, 50])    # H 60–90, S ≥50, V ≥50
upper_green = np.array([90, 255, 255])  # maximale Sättigung & Helligkeit




try:
    while True:
        frame = picam2.capture_array()
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

        
        # Maske für die blaue Ziffer
        mask = cv2.inRange(hsv, lower_blue, upper_blue) # green
        
        # Morphologie: kleine Löcher schließen und Ziffer dicker machen
        kernel = np.ones((3,3), np.uint8)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
        mask = cv2.dilate(mask, kernel, iterations=1)
        mask = cv2.medianBlur(mask, 3)
        
        # Invertieren für MNIST (weiße Ziffer auf schwarzem Hintergrund)
        #bw = cv2.bitwise_not(mask)

        # Resize für PyTorch
        resized = cv2.resize(mask, (28,28), interpolation=cv2.INTER_AREA)

        # Anzeige in groß
        display_img = cv2.resize(mask, (280,280), interpolation=cv2.INTER_NEAREST)

        # Tensor
        tensor = transform(resized).unsqueeze(0)

        with torch.no_grad():
            outputs = model(tensor)
            probs = torch.nn.functional.softmax(outputs, dim=1)
            confidence, pred = torch.max(probs, 1)
            if confidence.item() > 0.95:
                logger.info(f"Predicted digit: {pred.item()} (confidence: {confidence.item():.2f})")
            else:
                logger.info("No confident prediction.")

        _, frame_jpeg = cv2.imencode('.jpeg', display_img)
        camera_display_handle.value = frame_jpeg.tobytes()


finally:
    cv2.destroyAllWindows()
    picam2.close()


[0:01:25.430375740] [1805] [1;32m INFO [1;37mCamera [1;34mcamera_manager.cpp:330 [0mlibcamera v0.5.2+99-bfd68f78
[0:01:25.476702297] [1839] [1;32m INFO [1;37mIPAProxy [1;34mipa_proxy.cpp:180 [0mUsing tuning file /usr/share/libcamera/ipa/rpi/vc4/ov5647.json
[0:01:25.482627508] [1839] [1;32m INFO [1;37mCamera [1;34mcamera_manager.cpp:220 [0mAdding camera '/base/soc/i2c0mux/i2c@1/ov5647@36' for pipeline handler rpi/vc4
[0:01:25.482667118] [1839] [1;32m INFO [1;37mRPI [1;34mvc4.cpp:440 [0mRegistered camera /base/soc/i2c0mux/i2c@1/ov5647@36 to Unicam device /dev/media0 and ISP device /dev/media1
[0:01:25.482702802] [1839] [1;32m INFO [1;37mRPI [1;34mpipeline_base.cpp:1107 [0mUsing configuration file '/usr/share/libcamera/pipeline/rpi/vc4/rpi_apps.yaml'
[0:01:25.491750743] [1805] [1;32m INFO [1;37mCamera [1;34mcamera.cpp:1215 [0mconfiguring streams: (0) 640x480-XRGB8888/SMPTE170M/Rec709/None/Full (1) 640x480-SGBRG10_CSI2P/RAW
[0:01:25.492181747] [1839] [1;32m INFO [

Image(value=b'', format='jpeg')