In [13]:
import numpy as np
import pandas as pd
import time
import cv2
import torch
import torchvision
from torchvision import transforms, models
from PIL import Image
from ultralytics import YOLO
from sklearn.cluster import KMeans


# COCO class names for Faster R-CNN
COCO_CLASSES = COCO_CLASSES = [
    "__background__",
    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
    "truck", "boat", "traffic light", "fire hydrant", "stop sign",
    "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
    "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
    "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
    "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork",
    "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
    "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
    "couch", "potted plant", "bed", "dining table", "toilet", "tv",
    "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
    "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
    "scissors", "teddy bear", "hair drier", "toothbrush"
]


# Transform for Faster R-CNN
transform = transforms.Compose([
    transforms.ToTensor()
])


################### PART B ##########################
# SHARPNESS 
def image_sharpness(image_path):
    """
    Returns the sharpness score using Laplacian variance.
    Higher = sharper. Lower = blurrier.
    """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return None
    return cv2.Laplacian(img, cv2.CV_64F).var()


# DOMINANT COLORS
def dominant_colors(image_path, k=3):
    """
    Returns the top K dominant colors in RGB format.
    Colors are returned as [(R,G,B), ...].
    """
    img = Image.open(image_path).convert("RGB")
    img_np = np.array(img)
    pixels = img_np.reshape(-1, 3)

    kmeans = KMeans(n_clusters=k, n_init='auto')
    kmeans.fit(pixels)

    centers = kmeans.cluster_centers_.astype(int)
    return [tuple(color) for color in centers]

#####################################################

results = []

# MY 10 IMAGES
images = ['A_tig.png', 
          'A_ryder.png', 
          'A_coachella.png', 
          'A_disney.png', 
          'A_surfer.png', 
          'A_surfers.png', 
          'A_football.png', 
          'A_lambeau.png', 
          'A_snowboarder.png', 
          'A_kayak.png'
         ]

# LOOP FOR EACH IMAGE 
for image_path in images:
    

    # YOLO SECTION 
    model1 = YOLO("yolov8m.pt")
    start_time = time.time()
    yolo_results = model1.predict(image_path)
    end_time = time.time()

    yolo_time_ms = (end_time - start_time) * 1000
    yolo_boxes = yolo_results[0].boxes
    yolo_names = yolo_results[0].names

    yolo_objects = []
    yolo_confidences = []

    for box in yolo_boxes:
        cls_id = int(box.cls[0])
        class_name = yolo_names[cls_id]
        conf = float(box.conf[0])
        yolo_objects.append((class_name, conf))
        yolo_confidences.append(conf)

    yolo_avg_conf = sum(yolo_confidences) / len(yolo_confidences) if yolo_confidences else 0.0


    # RCNN SECTION
    model2 = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    model2.eval()

    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img)

    start_time = time.time()
    with torch.no_grad():
        preds = model2([img_tensor])
    end_time = time.time()

    rcnn_time_ms = (end_time - start_time) * 1000

    pred_scores = preds[0]["scores"]
    pred_labels = preds[0]["labels"]

    rcnn_objects = []
    rcnn_confidences = []

    for score, label in zip(pred_scores, pred_labels):
        if score >= 0.5:
            class_name = COCO_CLASSES[label.item()]
            conf = float(score)
            rcnn_objects.append((class_name, conf))
            rcnn_confidences.append(conf)

    rcnn_avg_conf = sum(rcnn_confidences) / len(rcnn_confidences) if rcnn_confidences else 0.0

    sharp = image_sharpness(image_path)
    colors = dominant_colors(image_path, k=3)

    # RESULTS
    results.append({
        "image": image_path,
        "YOLO inference time (ms)": round(yolo_time_ms, 2),
        "YOLO objects": yolo_objects,
        "YOLO avg confidence": round(yolo_avg_conf, 3),
        "RCNN inference time (ms)": round(rcnn_time_ms, 2),
        "RCNN objects": rcnn_objects,
        "RCNN avg confidence": round(rcnn_avg_conf, 3),
        "sharpness": round(sharp, 2),
        "dominant colors": colors
    })


# RESULTS CONVERTED INTO A 
df = pd.DataFrame(results)
df.to_excel("image_model_results.xlsx", index=False)
df





image 1/1 C:\Users\heath\A_tig.png: 480x640 1 person, 1 baseball bat, 380.5ms
Speed: 3.3ms preprocess, 380.5ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)





image 1/1 C:\Users\heath\A_ryder.png: 416x640 15 persons, 328.2ms
Speed: 3.2ms preprocess, 328.2ms inference, 1.3ms postprocess per image at shape (1, 3, 416, 640)





image 1/1 C:\Users\heath\A_coachella.png: 416x640 13 persons, 1 traffic light, 2 backpacks, 1 umbrella, 336.2ms
Speed: 2.6ms preprocess, 336.2ms inference, 1.3ms postprocess per image at shape (1, 3, 416, 640)





image 1/1 C:\Users\heath\A_disney.png: 448x640 7 persons, 2 umbrellas, 1 tie, 1 teddy bear, 305.6ms
Speed: 3.1ms preprocess, 305.6ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)





image 1/1 C:\Users\heath\A_surfer.png: 448x640 1 person, 1 surfboard, 429.6ms
Speed: 3.5ms preprocess, 429.6ms inference, 2.0ms postprocess per image at shape (1, 3, 448, 640)





image 1/1 C:\Users\heath\A_surfers.png: 448x640 2 persons, 2 surfboards, 335.6ms
Speed: 3.0ms preprocess, 335.6ms inference, 1.5ms postprocess per image at shape (1, 3, 448, 640)





image 1/1 C:\Users\heath\A_football.png: 384x640 13 persons, 2 baseball gloves, 349.5ms
Speed: 3.8ms preprocess, 349.5ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)





image 1/1 C:\Users\heath\A_lambeau.png: 352x640 3 persons, 335.2ms
Speed: 3.3ms preprocess, 335.2ms inference, 1.5ms postprocess per image at shape (1, 3, 352, 640)





image 1/1 C:\Users\heath\A_snowboarder.png: 352x640 4 persons, 1 skis, 2 snowboards, 284.8ms
Speed: 2.5ms preprocess, 284.8ms inference, 1.4ms postprocess per image at shape (1, 3, 352, 640)





image 1/1 C:\Users\heath\A_kayak.png: 416x640 2 persons, 3 boats, 344.0ms
Speed: 2.7ms preprocess, 344.0ms inference, 1.5ms postprocess per image at shape (1, 3, 416, 640)




Unnamed: 0,image,YOLO inference time (ms),YOLO objects,YOLO avg confidence,RCNN inference time (ms),RCNN objects,RCNN avg confidence,sharpness,dominant colors
0,A_tig.png,627.95,"[(person, 0.9373472332954407), (baseball bat, ...",0.847,2360.73,"[(person, 0.9979391694068909), (tennis racket,...",0.877,90.84,"[(205, 39, 44), (40, 43, 33), (175, 162, 164)]"
1,A_ryder.png,514.88,"[(person, 0.9350208640098572), (person, 0.9327...",0.757,2775.58,"[(person, 0.9995341300964355), (person, 0.9993...",0.898,2011.94,"[(198, 209, 168), (24, 31, 63), (119, 122, 98)]"
2,A_coachella.png,555.4,"[(umbrella, 0.9083945751190186), (backpack, 0....",0.437,2673.93,"[(traffic light, 0.9569990634918213), (person,...",0.772,1446.86,"[(154, 107, 93), (36, 20, 17), (207, 201, 209)]"
3,A_disney.png,461.2,"[(person, 0.9295040965080261), (person, 0.6147...",0.483,2538.34,"[(person, 0.9995276927947998), (snowboard, 0.9...",0.764,305.64,"[(27, 37, 39), (112, 111, 122), (165, 156, 150)]"
4,A_surfer.png,689.69,"[(person, 0.9259545207023621), (surfboard, 0.6...",0.767,3634.17,"[(person, 0.9998679161071777), (cup, 0.9825803...",0.991,386.49,"[(209, 225, 233), (23, 141, 160), (125, 188, 2..."
5,A_surfers.png,476.16,"[(surfboard, 0.9503766894340515), (surfboard, ...",0.899,3417.27,"[(cup, 0.9991563558578491), (person, 0.9989651...",0.913,1245.27,"[(148, 186, 206), (43, 43, 36), (94, 114, 121)]"
6,A_football.png,613.19,"[(person, 0.9415538311004639), (person, 0.9279...",0.614,3486.63,"[(person, 0.9895995855331421), (person, 0.9890...",0.887,480.18,"[(34, 54, 53), (188, 194, 199), (144, 136, 79)]"
7,A_lambeau.png,519.89,"[(person, 0.5442403554916382), (person, 0.5089...",0.462,3500.16,"[(person, 0.9413757920265198), (person, 0.9263...",0.648,7322.6,"[(22, 34, 39), (141, 174, 186), (75, 94, 70)]"
8,A_snowboarder.png,448.5,"[(person, 0.9479820728302002), (snowboard, 0.9...",0.762,3287.1,"[(person, 0.999415397644043), (person, 0.99920...",0.89,1857.23,"[(133, 160, 193), (50, 48, 54), (234, 234, 239)]"
9,A_kayak.png,502.81,"[(person, 0.7988861203193665), (person, 0.7976...",0.617,3591.81,"[(person, 0.9958186745643616), (person, 0.9947...",0.963,874.96,"[(111, 160, 190), (170, 214, 242), (45, 73, 82)]"
