In [9]:
import torch
import torchvision.transforms as T
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image, ImageDraw, ImageFont

# Funktion, um das Bild mit den Bounding Boxes und Labels anzuzeigen
def draw_boxes(image, boxes, labels, scores, threshold=0.5, box_color="red", text_color="white", font_size=20):
    """
    Zeichnet Bounding Boxes und Labels auf ein Bild.

    :param image: PIL.Image-Objekt des Eingabebildes
    :param boxes: Liste der Bounding Boxes (Tensor)
    :param labels: Liste der Labels
    :param scores: Liste der Konfidenzwerte
    :param threshold: Konfidenzschwelle (default=0.5)
    :param box_color: Farbe der Bounding Boxes (default="red")
    :param text_color: Farbe des Textes (default="white")
    :param font_size: Schriftgröße (default=20)
    :return: PIL.Image-Objekt mit eingezeichneten Bounding Boxes
    """
    draw = ImageDraw.Draw(image)

    # Verwende eine größere Schriftgröße
    try:
        font = ImageFont.truetype("arial.ttf", font_size)  # Versucht, Arial zu laden (du kannst auch eine andere Schriftart verwenden)
    except IOError:
        font = ImageFont.load_default()  # Fallback, wenn Arial nicht verfügbar ist

    for box, label, score in zip(boxes, labels, scores):
        if score >= threshold:
            # Rechteck zeichnen
            draw.rectangle(box.tolist(), outline=box_color, width=3)
            # Label und Score hinzufügen
            text = f"{label}: {score:.2f}"
            # Textgröße ermitteln mit textbbox()
            text_bbox = draw.textbbox((0, 0), text, font=font)
            text_width = text_bbox[2] - text_bbox[0]
            text_height = text_bbox[3] - text_bbox[1]
            text_background = [box[0], box[1] - text_height, box[0] + text_width, box[1]]
            draw.rectangle(text_background, fill=box_color)
            draw.text((box[0], box[1] - text_height), text, fill=text_color, font=font)

    return image

# Vortrainiertes Modell laden
def load_model():
    """
    Lädt das Faster R-CNN-Modell vortrainiert auf COCO-Daten.
    :return: Vortrainiertes Faster R-CNN-Modell
    """
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

# COCO-Labels laden
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella',
    'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
    'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
    'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork',
    'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
    'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
    'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop',
    'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
    'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
    'hair drier', 'toothbrush'
]

# Bild vorbereiten
def prepare_image(image_path):
    """
    Bereitet das Bild für die Analyse vor.

    :param image_path: Pfad zum Eingabebild
    :return: PIL.Image-Objekt, Tensor des transformierten Bildes
    """
    try:
        image = Image.open(image_path).convert("RGB")
    except Exception as e:
        raise ValueError(f"Fehler beim Laden des Bildes: {e}")

    transform = T.Compose([T.ToTensor()])
    return image, transform(image)

# Bild analysieren
def analyze_image(image_path, model, threshold=0.5, font_size=20):
    """
    Analysiert ein Bild und zeichnet Bounding Boxes und Labels.

    :param image_path: Pfad zum Eingabebild
    :param model: Vortrainiertes Faster R-CNN-Modell
    :param threshold: Konfidenzschwelle für Bounding Boxes
    :param font_size: Schriftgröße für die Labels
    :return: PIL.Image-Objekt mit eingezeichneten Bounding Boxes
    """
    image, tensor = prepare_image(image_path)

    with torch.no_grad():
        predictions = model([tensor])

    # Relevante Informationen extrahieren
    boxes = predictions[0]['boxes']
    scores = predictions[0]['scores']
    labels = predictions[0]['labels']

    # Label-Namen zuweisen
    label_names = [COCO_INSTANCE_CATEGORY_NAMES[label] for label in labels]

    # Bounding Boxes zeichnen
    result_image = draw_boxes(image.copy(), boxes, label_names, scores, threshold=threshold, font_size=font_size)
    return result_image

# Hauptprogramm
if __name__ == "__main__":
    image_path = r"C:\Users\julia\Downloads\kind.webp"  # Pfad zum Bild
    model = load_model()
    output_image = analyze_image(image_path, model, threshold=0.5, font_size=15)  # Größere Schrift
    output_image.show()


In [None]:
import torch
import torchvision.transforms as T
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image, ImageDraw, ImageFont

def draw_boxes(image, boxes, labels, scores, threshold=0.5, box_color="red", text_color="white", font_size=20):

    draw = ImageDraw.Draw(image)

    try:
        font = ImageFont.truetype("arial.ttf", font_size)
    except IOError:
        font = ImageFont.load_default()

    for box, label, score in zip(boxes, labels, scores):
        if score >= threshold:
            draw.rectangle(box.tolist(), outline=box_color, width=3)
            text = f"{label}: {score:.2f}"
            text_bbox = draw.textbbox((0, 0), text, font=font)
            text_width = text_bbox[2] - text_bbox[0]
            text_height = text_bbox[3] - text_bbox[1]
            text_background = [box[0], box[1] - text_height, box[0] + text_width, box[1]]
            draw.rectangle(text_background, fill=box_color)
            draw.text((box[0], box[1] - text_height), text, fill=text_color, font=font)

    return image

# Vortrainiertes Modell laden
def load_model():
    """
    Lädt das Faster R-CNN-Modell vortrainiert auf COCO-Daten.
    :return: Vortrainiertes Faster R-CNN-Modell
    """
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella',
    'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
    'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
    'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork',
    'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
    'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
    'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop',
    'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
    'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
    'hair drier', 'toothbrush'
]

def prepare_image(image_path):

    try:
        image = Image.open(image_path).convert("RGB")
    except Exception as e:
        raise ValueError(f"Fehler beim Laden des Bildes: {e}")

    transform = T.Compose([T.ToTensor()])
    return image, transform(image)

def analyze_image(image_path, model, threshold=0.5, font_size=20):

    image, tensor = prepare_image(image_path)

    with torch.no_grad():
        predictions = model([tensor])

    boxes = predictions[0]['boxes']
    scores = predictions[0]['scores']
    labels = predictions[0]['labels']

    label_names = [COCO_INSTANCE_CATEGORY_NAMES[label] for label in labels]

    result_image = draw_boxes(image.copy(), boxes, label_names, scores, threshold=threshold, font_size=font_size)
    return result_image

if __name__ == "__main__":
    image_path = r"C:\Users\julia\Downloads\kind.webp"  # Pfad zum Bild
    model = load_model()
    output_image = analyze_image(image_path, model, threshold=0.5, font_size=15)  # Größere Schrift
    output_image.show()
