In [None]:
!pip install transformers torchvision torch pycocotools

In [None]:
import os
import json
import torch
from transformers import AutoProcessor, OwlViTForObjectDetection
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from PIL import Image
from tqdm import tqdm

In [None]:
# Initialisierung des OWL-ViT-Modells und Prozessors
model_name = "google/owlvit-large-patch14"
processor_owlvit = AutoProcessor.from_pretrained(model_name)
model_owlvit = OwlViTForObjectDetection.from_pretrained(model_name)

In [None]:
# Überprüfen, ob eine GPU verfügbar ist
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_owlvit.to(device)

In [None]:
# Pfade anpassen
image_folder = "   " # Pfad zum Bilderordner
annotation_file = "   " # Pfad zur COCO-Annotationsdatei

In [None]:
# Laden der Ground-Truth-Annotationen
coco_gt = COCO(annotation_file)

# Bilder sammeln
image_paths = [os.path.join(image_folder, img) for img in os.listdir(image_folder)
               if img.endswith('.jpg') or img.endswith('.png')]

# Erstellen des Mappings von Dateinamen zu Bild-IDs
filename_to_image_id = {img['file_name']: img['id'] for img in coco_gt.dataset['images']}

In [None]:
# animal_category_id = 1
animal_category_id = 1

In [None]:
# Funktion zur Konvertierung in COCO-Format
def convert_to_coco_format(results, ground_truth_category_id):
    coco_results = []
    for result in results:
        image_id = result["image_id"]
        boxes = result["boxes"]
        scores = result["scores"]
        labels = result["labels"]
        for box, score, label in zip(boxes, scores, labels):
            x_min, y_min, x_max, y_max = box
            width = x_max - x_min
            height = y_max - y_min
            category_id = ground_truth_category_id
            coco_results.append({
                "image_id": image_id,
                "category_id": category_id,
                "bbox": [x_min, y_min, width, height],
                "score": score
            })
    return coco_results

In [None]:
# Ergebnisse speichern
output_file = r"E:\Bachelor_Final\OWL\owlvit_results.json"
with open(output_file, "w") as f:
    json.dump(coco_results_owlvit, f)

# Laden der Ergebnisse und Evaluierung
coco_dt_owlvit = coco_gt.loadRes(output_file)
coco_eval_owlvit = COCOeval(coco_gt, coco_dt_owlvit, iouType='bbox')
coco_eval_owlvit.evaluate()
coco_eval_owlvit.accumulate()
coco_eval_owlvit.summarize()

In [None]:
# Verarbeitung der Bilder mit Fortschrittsbalken
results_owlvit = []
for image_path in tqdm(image_paths, desc="Verarbeitung von Bildern"):
    image = Image.open(image_path).convert("RGB")
    inputs = processor_owlvit(text=["animal"], images=image, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model_owlvit(**inputs)

    target_sizes = torch.tensor([image.size[::-1]], device=device)
    results = processor_owlvit.post_process_object_detection(outputs=outputs, threshold=0.1, target_sizes=target_sizes)[0]

    # Ermitteln des Bilddateinamens
    image_filename = os.path.basename(image_path)

    # Abrufen der Bild-ID aus dem Mapping
    image_id = filename_to_image_id.get(image_filename)

    if image_id is None:
        print(f"Bild-ID für {image_filename} nicht gefunden. Überspringe dieses Bild.")
        continue

    # Speichern der Ergebnisse
    results_owlvit.append({
        "image_id": image_id,
        "boxes": results["boxes"].tolist(),
        "scores": results["scores"].tolist(),
        "labels": results["labels"].tolist(),
    })

In [None]:
# Konvertieren der Ergebnisse ins COCO-Format
coco_results_owlvit = convert_to_coco_format(results_owlvit, animal_category_id)

# Ergebnisse speichern
output_file = "   " # Pfad zur Ausgabe-Datei
with open(output_file, "w") as f:
    json.dump(coco_results_owlvit, f)

In [None]:
# Laden der Ergebnisse und Evaluierung
coco_dt_owlvit = coco_gt.loadRes(output_file)
coco_eval_owlvit = COCOeval(coco_gt, coco_dt_owlvit, iouType='bbox')
coco_eval_owlvit.evaluate()
coco_eval_owlvit.accumulate()
coco_eval_owlvit.summarize()