In [None]:
# Using Owlv2 for the object detection model and to evaluate different VLMs performance. 

In [None]:
# Ensures that there is enough memory allocation for the model to load
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [None]:
import requests
from PIL import Image
import torch

from transformers import Owlv2Processor, Owlv2ForObjectDetection

processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble").to("cuda")

In [None]:
import json
def save_labels(labels_list, objects,image_path):
    incorrect = [item for item in labels_list if item not in objects]
    data = {
        "Image Path" : image_path,
        "Original Labels" : labels_list, 
        "Correct Labels" :  objects,
        "Incorrect Labels" : incorrect
    }
    
    new_path = '/home/jovyan/Evaluation/Labels'
    
    if not os.path.exists(new_path):
        os.makedirs(new_path) # Create the directory if it doesn't exist
        print(f"{new_path} successfully created")
    name = image_path.strip(".jpg").strip(".png")
    json_filename = f'{name}.json'
    file_name = os.path.join(new_path , json_filename)
    with open(file_name , 'w') as json_file:
            json.dump(data, json_file, indent=4)
    print(f"File : {json_filename} successfully created!")

In [None]:
def load_files(file):
    # Open and read the JSON file
    path = os.path.join('/home/jovyan/Evaluation/Data', file)
    with open(path, 'r') as json_file:
        data = json.load(json_file)
    return data

In [None]:
# Initiate variables 
success = 0 
# Path to the directory (library)
directory_path = "/home/jovyan/Evaluation/Data"  # Replace this with the actual path

# List all files in the directory
files = os.listdir(directory_path)

# Filter out directories and show only files
files = [file for file in files if os.path.isfile(os.path.join(directory_path, file))]

# Print the list of files
for index, file in enumerate(files):
    torch.cuda.empty_cache()
    image_path = file.strip(".json")
    print(f"Processing Image {image_path}")
    original_image = os.path.join('/home/jovyan/images',image_path)
    image = Image.open(original_image)
    image = image.convert("RGB")
    
    data = load_files(file)
    
    labels_list = data['Labels']
    texts = [labels_list]

    inputs = processor(text=texts, images=image, return_tensors="pt")
    inputs = {key: value.to("cuda") for key, value in inputs.items()}  

    correct = 0
    objects = []
    
    with torch.no_grad():
      outputs = model(**inputs)

    # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
    target_sizes = torch.Tensor([image.size[::-1]])
    # Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
    results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.2)
    i = 0  # Retrieve predictions for the first image for the corresponding text queries
    text = texts[i]
    boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
    for box, score, label in zip(boxes, scores, labels):
        box = [round(i, 2) for i in box.tolist()]
        if text[label] in texts[0]:
            if text[label] not in objects:
                correct += 1 
                objects.append(text[label])
    success += correct/len(labels_list) * 100
    
    save_labels(labels_list,objects,image_path)
    
    print(f"Successfully processed Image {file}, {index+1}/{len(files)} Images Processed")

rate = (success / len(files))
print(f"Model has a success rate of {round(rate,1)}% over {index + 1} images.")

In [None]:
rate = (success / len(files))
print(f"Model has a success rate of {round(rate,1)}% over {index + 1} images.")