<!DOCTYPE html>
<html>
<head>
    <title>Problem 5</title>
</head>
<body>
    <h1>Object Detection with YOLO - Documentation</h1>
    <p>Below is the documentation for the Python script using the Ultralytics YOLO library to perform object detection on images in a specified folder:</p>
    
<h2>Code Overview</h2>
<ol>
    <li>Import necessary libraries including <code>os</code>, <code>shutil</code>, <code>itertools</code>, <code>matplotlib.pyplot</code>, <code>YOLO</code> from Ultralytics, and <code>clip</code> from OpenAI.</li>
    <li>Define the function <code>detect_and_save_objects(yolo_model, input_folder, output_folder)</code> to perform object detection on images and save detected object crops.</li>
    <li>Define the function <code>compare_images_using_clip(clip_model, output_folder)</code> to compare images using the CLIP model and save similar object crops.</li>
    <li>Define the main function <code>main()</code> to orchestrate the execution of object detection and image comparison.</li>
    <li>Call the <code>main()</code> function when the script is run as the main module.</li>
</ol>

<h2>Execution Flow</h2>
<ol>
    <li>Create a YOLO model instance using the pretrained model file <code>yolov8m.pt</code>.</li>
    <li>Specify input and output directories for images and results.</li>
    <li>Call the <code>detect_and_save_objects</code> function to perform object detection and save object crops.</li>
    <li>Call the <code>compare_images_using_clip</code> function to compare images and save similar object crops.</li>
</ol>

<h2>Main Execution</h2>
<ul>
    <li>Check if the script is being run as the main module using <code>if __name__ == "__main__":</code></li>
    <li>Call the <code>main()</code> function to initiate the execution of object detection and comparison.</li>
    <li>Handle exceptions and print error messages in case of failures.</li>
</ul>
    
<p>This documentation provides an overview of the code's functionality and execution process.</p>
</body>
</html>


In [2]:
import os
import shutil
import itertools
import matplotlib.pyplot as plt
from ultralytics import YOLO
import clip


# Function to perform YOLO object detection and save cropped images
def detect_and_save_objects(yolo_model, input_folder, output_folder):
    try:
        # Get YOLO detections function from the model
        
        # Get a list of image paths in the input folder
        image_paths = [os.path.join(input_folder, img) for img in os.listdir(input_folder) if img.lower().endswith(('.jpeg', '.jpg'))]

        for img_path in image_paths:
            # Extract image name without extension
            image_name = os.path.splitext(os.path.basename(img_path))[0]
            # Create output folder for the image's detections
            output_img_folder = os.path.join(output_folder, image_name)
            os.makedirs(output_img_folder, exist_ok=True)

            # Perform object detection using YOLO
            detected_objects = yolo_model.predict(img_path, save=False)
            object_counts = {}  # Track counts of different objects in the image

            for idx, detected_obj in enumerate(detected_objects[0].boxes):
                obj_class = detected_obj.cls[0].item()
                object_counts[obj_class] = object_counts.get(obj_class, 0) + 1

                class_label = detected_objects[0].names[obj_class]
                entity_folder = os.path.join(output_img_folder, f"{class_label}_{object_counts[obj_class]}")
                os.makedirs(entity_folder, exist_ok=True)

                # Extract object's bounding box coordinates
                x_min, y_min, x_max, y_max = detected_obj.xyxy[0]
                x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)

                # Crop the object from the image
                cutout = plt.imread(img_path)[y_min:y_max, x_min:x_max]
                output_filename = f"{class_label}_{object_counts[obj_class]}_crop.jpg"
                plt.imsave(os.path.join(entity_folder, output_filename), cutout)
    
    except Exception as e:
        print("Error occurred during object detection and saving:", e)

# Function to compare images using the CLIP model
def compare_images_using_clip(clip_model, output_folder):
    try:
        # Load CLIP model and preprocessing function
        clip_model, clip_preprocess = clip_model.load("ViT-B/32")
        # Get matched image paths for CLIP comparison
        matched_image_paths = [os.path.join(root, file) for root, _, files in os.walk(output_folder) for file in files if file.endswith('.jpg')]

        for root, _, files in os.walk(output_folder):
            if files:
                # Get the path of the reference image for comparison
                reference_img_path = os.path.join(root, files[0])
                reference_entity = ''.join(filter(lambda z: not z.isdigit(), files[0].split('_')[0]))
                reference_image = clip_preprocess(plt.imread(reference_img_path))[None]
                reference_image_features = clip_model.encode_image(reference_image)
                similarity_scores = {}  # Store similarity scores for images

                for img_path in matched_image_paths:
                    if reference_img_path == img_path:
                        continue
                    target_image = clip_preprocess(plt.imread(img_path))[None]
                    target_image_features = clip_model.encode_image(target_image)
                    # Calculate similarity score using cosine similarity
                    similarity_score = (1 + (target_image_features @ reference_image_features.T) / 2).item()

                    similarity_scores[img_path] = similarity_score

                # Sort the similarity scores and get top similar images
                sorted_scores = dict(sorted(similarity_scores.items(), key=lambda x: x[1], reverse=True))
                top_similar_images = dict(itertools.islice(sorted_scores.items(), 3))

                print(top_similar_images)
                temp_counter = 1

                for image_path in top_similar_images.keys():
                    # Create new filename for copied similar images
                    new_filename = f"top{temp_counter}_crop.jpeg"
                    destination_path = os.path.join(root, new_filename)
                    shutil.copy(image_path, destination_path)
                    temp_counter += 1
    
    except Exception as e:
        print("Error occurred during image comparison:", e)

def main():
    try:
        # Initialize YOLO model
        yolo = YOLO("yolov8m.pt")
        input_directory = "./All_Images"
        output_directory = "output/problem5"

        # Call functions to perform object detection and image comparison
        detect_and_save_objects(yolo, input_directory, output_directory)
        compare_images_using_clip(clip, output_directory)
    
    except Exception as e:
        print("An error occurred during execution:", e)

if __name__ == "__main__":
    main()



image 1/1 d:\Adobe\Aithon\Aithon\All_Images\1.jpg: 384x640 4 persons, 1 couch, 2 potted plants, 1 vase, 1145.2ms
Speed: 8.5ms preprocess, 1145.2ms inference, 15.6ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 d:\Adobe\Aithon\Aithon\All_Images\2.jpg: 448x640 2 persons, 1 tie, 1 bed, 1 cell phone, 1153.7ms
Speed: 7.0ms preprocess, 1153.7ms inference, 31.7ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 d:\Adobe\Aithon\Aithon\All_Images\3.jpg: 448x640 4 persons, 1 sports ball, 1278.9ms
Speed: 7.0ms preprocess, 1278.9ms inference, 5.6ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 d:\Adobe\Aithon\Aithon\All_Images\AdobeStock_112814949.jpeg: 448x640 1 spoon, 1 bowl, 1 sandwich, 1 dining table, 1214.5ms
Speed: 14.6ms preprocess, 1214.5ms inference, 3.5ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 d:\Adobe\Aithon\Aithon\All_Images\AdobeStock_119085612.jpeg: 480x640 1 cup, 1 toothbrush, 1160.3ms
Speed: 6.5ms preprocess, 1160.3ms infere

Error occurred during image comparison: Unexpected type <class 'numpy.ndarray'>
